Repository: juicedata/juicefs
Branch: main
Commit: acb45e1224c5
Files: 755
Total size: 5.9 MB

Directory structure:
gitextract_vo3z05bo/

├── .autocorrectrc
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.md
│   │   ├── enhancement.md
│   │   └── support.md
│   ├── actions/
│   │   ├── build/
│   │   │   └── action.yml
│   │   ├── cancel-outdate-runs/
│   │   │   └── action.yml
│   │   ├── mount-coverage-dir/
│   │   │   └── action.yml
│   │   ├── upload-coverage/
│   │   │   └── action.yml
│   │   └── upload-total-coverage/
│   │       └── action.yml
│   ├── scripts/
│   │   ├── apt_install.sh
│   │   ├── cache.sh
│   │   ├── chaos/
│   │   │   ├── dynamic.yaml
│   │   │   ├── juicefs-csi-driver.Dockerfile
│   │   │   ├── juicefs.Dockerfile
│   │   │   ├── minio.yaml
│   │   │   ├── pvc.yaml
│   │   │   ├── redis.yaml
│   │   │   ├── sc.yaml
│   │   │   └── workflow.yaml
│   │   ├── check_juicefs_log.sh
│   │   ├── cmptree.py
│   │   ├── command/
│   │   │   ├── acl.sh
│   │   │   ├── clone.sh
│   │   │   ├── config.sh
│   │   │   ├── debug.sh
│   │   │   ├── dump_load.sh
│   │   │   ├── dump_load_bench.sh
│   │   │   ├── dump_load_cross_meta.sh
│   │   │   ├── format.sh
│   │   │   ├── fsck.sh
│   │   │   ├── gateway-random.sh
│   │   │   ├── gateway.sh
│   │   │   ├── gc.sh
│   │   │   ├── graceful_upgrade.sh
│   │   │   ├── info.sh
│   │   │   ├── interface.sh
│   │   │   ├── mount.sh
│   │   │   ├── quota.sh
│   │   │   └── random.sh
│   │   ├── command-win/
│   │   │   ├── acl.sh
│   │   │   ├── clone.sh
│   │   │   ├── debug.sh
│   │   │   ├── dump_load.sh
│   │   │   ├── fsck.sh
│   │   │   ├── gateway.sh
│   │   │   ├── gc.sh
│   │   │   ├── profile.sh
│   │   │   └── quota.sh
│   │   ├── common/
│   │   │   ├── common.sh
│   │   │   ├── common_win.sh
│   │   │   └── run_test.sh
│   │   ├── compare_results.sh
│   │   ├── copyFile.js
│   │   ├── fio.sh
│   │   ├── flush_meta.py
│   │   ├── fsrand.py
│   │   ├── hypo/
│   │   │   ├── command.py
│   │   │   ├── command_op.py
│   │   │   ├── command_test.py
│   │   │   ├── common.py
│   │   │   ├── context.py
│   │   │   ├── file.py
│   │   │   ├── file_op.py
│   │   │   ├── file_test.py
│   │   │   ├── fs.py
│   │   │   ├── fs_acl_test.py
│   │   │   ├── fs_op.py
│   │   │   ├── fs_sdk_test.py
│   │   │   ├── fs_test.py
│   │   │   ├── readme.md
│   │   │   ├── s3.py
│   │   │   ├── s3_contant.py
│   │   │   ├── s3_op.py
│   │   │   ├── s3_strategy.py
│   │   │   ├── s3_test.py
│   │   │   ├── stats.py
│   │   │   ├── strategy.py
│   │   │   ├── sync.py
│   │   │   └── sync_test.py
│   │   ├── mutate/
│   │   │   ├── check_coverage.py
│   │   │   ├── check_skip_by_comment.py
│   │   │   ├── how_to_use_mutate_test.md
│   │   │   ├── modify_sdk_pom.py
│   │   │   ├── mutest.sh
│   │   │   ├── mutesting.py
│   │   │   ├── parse_black_list.py
│   │   │   ├── parse_job_total.py
│   │   │   ├── parse_mutate_log.py
│   │   │   ├── parse_test_cases.py
│   │   │   ├── query_report.py
│   │   │   └── save_report.py
│   │   ├── perf/
│   │   │   ├── ai.sh
│   │   │   ├── ai_format_benchmark.py
│   │   │   ├── compare_ai.sh
│   │   │   ├── compare_mdtest_fio.sh
│   │   │   └── mdtest_fio.sh
│   │   ├── prepare_db.sh
│   │   ├── pysdk/
│   │   │   ├── bench.py
│   │   │   └── pysdk_test.py
│   │   ├── random_read_write.py
│   │   ├── save_benchmark.sh
│   │   ├── setup-hdfs.sh
│   │   ├── ssh/
│   │   │   ├── Dockerfile
│   │   │   └── docker-compose.yml
│   │   ├── start_meta_engine.sh
│   │   ├── sync/
│   │   │   ├── sync.sh
│   │   │   ├── sync_cluster.sh
│   │   │   ├── sync_fsrand.sh
│   │   │   └── sync_minio.sh
│   │   ├── test-mac/
│   │   │   ├── mac_commands.sh
│   │   │   └── start_meta_engine.sh
│   │   ├── testVersionCompatible.py
│   │   ├── upload_coverage_report.sh
│   │   ├── utils.py
│   │   └── wins_fs_test.py
│   └── workflows/
│       ├── bash/
│       │   ├── rm_fs
│       │   ├── rm_list.sh
│       │   └── rm_syscalls
│       ├── cache.yml
│       ├── cancel_outdate_runs.yml
│       ├── chaos.yml
│       ├── check-doc.yaml
│       ├── codeql-analysis.yml
│       ├── command-win.yml
│       ├── command.yml
│       ├── command2.yml
│       ├── compile.yml
│       ├── coverage-report.yml
│       ├── dependency-review.yml
│       ├── dockerfile-sftp
│       ├── dump_load.yml
│       ├── dump_load_bench.yml
│       ├── dump_load_cross_meta.yml
│       ├── fsrand.yml
│       ├── fsspec.yml
│       ├── gateway-random.yml
│       ├── gateway.yml
│       ├── integrationtests.yml
│       ├── ltpfs.yml
│       ├── ltpsyscalls.yml
│       ├── mutate-test-sdk.yml
│       ├── mutate-test.yml
│       ├── perf-test.yml
│       ├── permission-check.yaml
│       ├── pjdfstest.yml
│       ├── pysdk.yml
│       ├── random-test.yml
│       ├── release.yml
│       ├── resources/
│       │   ├── core-site.xml
│       │   ├── load-balancer.conf
│       │   ├── sync-options.txt
│       │   ├── tpcds_datagen.scala
│       │   ├── tpcds_run.scala
│       │   ├── vdbench_big_file.conf
│       │   ├── vdbench_long_run.conf
│       │   └── vdbench_small_file.conf
│       ├── rmfiles.yml
│       ├── sdktest.yml
│       ├── sync.yml
│       ├── unit-random-tests.yml
│       ├── unittests.yml
│       ├── vdbench.yml
│       ├── verify.yml
│       ├── version_compatible_hypo.yml
│       ├── wintest.yml
│       └── xattr.yml
├── .gitignore
├── .golangci.yml
├── .goreleaser.yml
├── .markdownlint-cli2.jsonc
├── .pre-commit-config.yaml
├── ADOPTERS.md
├── ADOPTERS_CN.md
├── CODEOWNERS
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── README_CN.md
├── check-changed.sh
├── cmd/
│   ├── bench.go
│   ├── bench_test.go
│   ├── clone.go
│   ├── compact.go
│   ├── compact_test.go
│   ├── config.go
│   ├── config_test.go
│   ├── debug.go
│   ├── debug_test.go
│   ├── debug_unix.go
│   ├── debug_windows.go
│   ├── destroy.go
│   ├── dump.go
│   ├── dump_test.go
│   ├── flags.go
│   ├── flags_test.go
│   ├── format.go
│   ├── format_test.go
│   ├── fsck.go
│   ├── fsck_test.go
│   ├── gateway.go
│   ├── gateway_noop.go
│   ├── gc.go
│   ├── gc_test.go
│   ├── info.go
│   ├── info_test.go
│   ├── integration_test.go
│   ├── load.go
│   ├── main.go
│   ├── main_test.go
│   ├── mdtest.go
│   ├── mount.go
│   ├── mount_test.go
│   ├── mount_unix.go
│   ├── mount_windows.go
│   ├── objbench.go
│   ├── object.go
│   ├── object_test.go
│   ├── passfd.go
│   ├── printsid.go
│   ├── profile.go
│   ├── quota.go
│   ├── restore.go
│   ├── restore_test.go
│   ├── rmr.go
│   ├── rmr_test.go
│   ├── stats.go
│   ├── status.go
│   ├── status_test.go
│   ├── summary.go
│   ├── sync.go
│   ├── sync_test.go
│   ├── umount.go
│   ├── version.go
│   ├── warmup.go
│   ├── warmup_test.go
│   ├── webdav.go
│   └── webdav_noop.go
├── codecov.yml
├── deploy/
│   └── juicefs-s3-gateway.yaml
├── docs/
│   ├── README.md
│   ├── en/
│   │   ├── administration/
│   │   │   ├── destroy.md
│   │   │   ├── fault_diagnosis_and_analysis.md
│   │   │   ├── metadata/
│   │   │   │   ├── _category_.yml
│   │   │   │   ├── etcd_best_practices.md
│   │   │   │   ├── fdb_best_practices.md
│   │   │   │   ├── mysql_best_practices.md
│   │   │   │   ├── postgresql_best_practices.md
│   │   │   │   ├── redis_best_practices.md
│   │   │   │   └── tikv_best_practices.md
│   │   │   ├── metadata_dump_load.md
│   │   │   ├── monitoring.md
│   │   │   ├── mount_at_boot.md
│   │   │   ├── status_check_and_maintenance.md
│   │   │   ├── sync_accounts_between_multiple_hosts.md
│   │   │   ├── troubleshooting.md
│   │   │   └── upgrade.md
│   │   ├── benchmark/
│   │   │   ├── benchmark.md
│   │   │   ├── fio.md
│   │   │   ├── mdtest.md
│   │   │   ├── metadata_engines_benchmark.md
│   │   │   └── performance_evaluation_guide.md
│   │   ├── community/
│   │   │   ├── _roadmap.md
│   │   │   ├── adopters.md
│   │   │   ├── articles.md
│   │   │   ├── integrations.md
│   │   │   └── usage_tracking.md
│   │   ├── deployment/
│   │   │   ├── _share_via_nfs.md
│   │   │   ├── _share_via_smb.md
│   │   │   ├── automation.md
│   │   │   ├── hadoop_java_sdk.md
│   │   │   ├── how_to_use_on_kubernetes.md
│   │   │   ├── juicefs_on_docker.md
│   │   │   ├── nfs.md
│   │   │   ├── production_deployment_recommendations.md
│   │   │   ├── python_sdk.md
│   │   │   ├── samba.md
│   │   │   └── webdav.md
│   │   ├── development/
│   │   │   ├── contributing_guide.md
│   │   │   └── internals.md
│   │   ├── faq.md
│   │   ├── getting-started/
│   │   │   ├── for_distributed.md
│   │   │   ├── installation.md
│   │   │   └── standalone.md
│   │   ├── grafana_template.json
│   │   ├── guide/
│   │   │   ├── cache.md
│   │   │   ├── clone.md
│   │   │   ├── dir-stats.md
│   │   │   ├── gateway.md
│   │   │   ├── quota.md
│   │   │   └── sync.md
│   │   ├── introduction/
│   │   │   ├── README.md
│   │   │   ├── architecture.md
│   │   │   ├── comparison/
│   │   │   │   ├── _category_.yml
│   │   │   │   ├── juicefs_vs_3fs.md
│   │   │   │   ├── juicefs_vs_alluxio.md
│   │   │   │   ├── juicefs_vs_cephfs.md
│   │   │   │   ├── juicefs_vs_glusterfs.md
│   │   │   │   ├── juicefs_vs_lustre.md
│   │   │   │   ├── juicefs_vs_s3fs.md
│   │   │   │   ├── juicefs_vs_s3ql.md
│   │   │   │   └── juicefs_vs_seaweedfs.md
│   │   │   └── io_processing.md
│   │   ├── reference/
│   │   │   ├── _common_options.mdx
│   │   │   ├── command_reference.mdx
│   │   │   ├── fuse_mount_options.md
│   │   │   ├── how_to_set_up_metadata_engine.md
│   │   │   ├── how_to_set_up_object_storage.md
│   │   │   ├── p8s_metrics.md
│   │   │   ├── posix_compatibility.md
│   │   │   ├── redis-csc.md
│   │   │   └── spec-limits.md
│   │   ├── release_notes.md
│   │   ├── security/
│   │   │   ├── encryption.md
│   │   │   ├── posix_acl.md
│   │   │   └── trash.md
│   │   └── tutorials/
│   │       ├── aliyun.md
│   │       ├── aws.md
│   │       ├── digitalocean.md
│   │       ├── juicefs_on_colab.md
│   │       ├── juicefs_on_k3s.md
│   │       ├── juicefs_on_kubesphere.md
│   │       ├── juicefs_on_rancher.md
│   │       ├── juicefs_on_wsl.md
│   │       ├── qcloud.md
│   │       └── windows.md
│   └── zh_cn/
│       ├── administration/
│       │   ├── destroy.md
│       │   ├── fault_diagnosis_and_analysis.md
│       │   ├── metadata/
│       │   │   ├── _category_.yml
│       │   │   ├── etcd_best_practices.md
│       │   │   ├── fdb_best_practices.md
│       │   │   ├── mysql_best_practices.md
│       │   │   ├── postgresql_best_practices.md
│       │   │   ├── redis_best_practices.md
│       │   │   └── tikv_best_practices.md
│       │   ├── metadata_dump_load.md
│       │   ├── monitoring.md
│       │   ├── mount_at_boot.md
│       │   ├── status_check_and_maintenance.md
│       │   ├── sync_accounts_between_multiple_hosts.md
│       │   ├── troubleshooting.md
│       │   └── upgrade.md
│       ├── benchmark/
│       │   ├── benchmark.md
│       │   ├── fio.md
│       │   ├── mdtest.md
│       │   ├── metadata_engines_benchmark.md
│       │   └── performance_evaluation_guide.md
│       ├── community/
│       │   ├── _roadmap.md
│       │   ├── adopters.md
│       │   ├── articles.md
│       │   ├── integrations.md
│       │   └── usage_tracking.md
│       ├── deployment/
│       │   ├── _share_via_nfs.md
│       │   ├── _share_via_smb.md
│       │   ├── automation.md
│       │   ├── hadoop_java_sdk.md
│       │   ├── how_to_use_on_kubernetes.md
│       │   ├── juicefs_on_docker.md
│       │   ├── nfs.md
│       │   ├── production_deployment_recommendations.md
│       │   ├── python_sdk.md
│       │   ├── samba.md
│       │   └── webdav.md
│       ├── development/
│       │   ├── contributing_guide.md
│       │   └── internals.md
│       ├── faq.md
│       ├── getting-started/
│       │   ├── for_distributed.md
│       │   ├── installation.md
│       │   └── standalone.md
│       ├── guide/
│       │   ├── cache.md
│       │   ├── clone.md
│       │   ├── dir-stats.md
│       │   ├── gateway.md
│       │   ├── quota.md
│       │   └── sync.md
│       ├── introduction/
│       │   ├── README.md
│       │   ├── architecture.md
│       │   ├── comparison/
│       │   │   ├── _category_.yml
│       │   │   ├── juicefs_vs_3fs.md
│       │   │   ├── juicefs_vs_alluxio.md
│       │   │   ├── juicefs_vs_cephfs.md
│       │   │   ├── juicefs_vs_glusterfs.md
│       │   │   ├── juicefs_vs_lustre.md
│       │   │   ├── juicefs_vs_s3fs.md
│       │   │   ├── juicefs_vs_s3ql.md
│       │   │   └── juicefs_vs_seaweedfs.md
│       │   └── io_processing.md
│       ├── reference/
│       │   ├── _common_options.mdx
│       │   ├── command_reference.mdx
│       │   ├── fuse_mount_options.md
│       │   ├── how_to_set_up_metadata_engine.md
│       │   ├── how_to_set_up_object_storage.md
│       │   ├── p8s_metrics.md
│       │   ├── posix_compatibility.md
│       │   └── spec-limits.md
│       ├── release_notes.md
│       ├── security/
│       │   ├── encryption.md
│       │   ├── posix_acl.md
│       │   └── trash.md
│       └── tutorials/
│           ├── aliyun.md
│           ├── aws.md
│           ├── digitalocean.md
│           ├── juicefs_on_colab.md
│           ├── juicefs_on_k3s.md
│           ├── juicefs_on_kubesphere.md
│           ├── juicefs_on_rancher.md
│           ├── juicefs_on_wsl.md
│           ├── qcloud.md
│           └── windows.md
├── go.mod
├── go.sum
├── hack/
│   ├── autocomplete/
│   │   ├── bash_autocomplete
│   │   └── zsh_autocomplete
│   ├── builder/
│   │   ├── Dockerfile
│   │   └── sdk.Dockerfile
│   └── winfsp_headers/
│       ├── fuse.h
│       ├── fuse_common.h
│       ├── fuse_opt.h
│       └── winfsp_fuse.h
├── integration/
│   ├── Makefile
│   ├── ioctl_test.sh
│   └── s3gateway_test.sh
├── main.go
├── package.json
├── pkg/
│   ├── acl/
│   │   ├── acl.go
│   │   ├── cache.go
│   │   └── cache_test.go
│   ├── chunk/
│   │   ├── cache_eviction.go
│   │   ├── cached_store.go
│   │   ├── cached_store_test.go
│   │   ├── chunk.go
│   │   ├── disk_cache.go
│   │   ├── disk_cache_state.go
│   │   ├── disk_cache_state_test.go
│   │   ├── disk_cache_test.go
│   │   ├── mem_cache.go
│   │   ├── metrics.go
│   │   ├── page.go
│   │   ├── page_test.go
│   │   ├── prefetch.go
│   │   ├── prefetch_test.go
│   │   ├── singleflight.go
│   │   ├── singleflight_test.go
│   │   ├── utils_darwin.go
│   │   ├── utils_linux.go
│   │   ├── utils_unix.go
│   │   ├── utils_unix_test.go
│   │   └── utils_windows.go
│   ├── compress/
│   │   ├── compress.go
│   │   └── compress_test.go
│   ├── fs/
│   │   ├── fs.go
│   │   ├── fs_test.go
│   │   ├── http.go
│   │   ├── http_test.go
│   │   └── metrics.go
│   ├── fuse/
│   │   ├── context.go
│   │   ├── device_darwin.go
│   │   ├── device_linux.go
│   │   ├── fuse.go
│   │   ├── fuse_darwin.go
│   │   ├── fuse_linux.go
│   │   ├── fuse_test.go
│   │   ├── gidcache.go
│   │   └── utils.go
│   ├── gateway/
│   │   ├── gateway.go
│   │   └── gateway_test.go
│   ├── meta/
│   │   ├── backup.go
│   │   ├── base.go
│   │   ├── base_test.go
│   │   ├── benchmarks_test.go
│   │   ├── config.go
│   │   ├── config_test.go
│   │   ├── context.go
│   │   ├── dump.go
│   │   ├── info.go
│   │   ├── info_test.go
│   │   ├── interface.go
│   │   ├── interface_test.go
│   │   ├── load_dump_test.go
│   │   ├── lua_scripts.go
│   │   ├── metadata-sub.sample
│   │   ├── metadata.sample
│   │   ├── openfile.go
│   │   ├── pb/
│   │   │   ├── backup.pb.go
│   │   │   └── backup.proto
│   │   ├── quota.go
│   │   ├── random_test.go
│   │   ├── redis.go
│   │   ├── redis_bak.go
│   │   ├── redis_csc.go
│   │   ├── redis_csc_test.go
│   │   ├── redis_lock.go
│   │   ├── slice.go
│   │   ├── sql.go
│   │   ├── sql_bak.go
│   │   ├── sql_lock.go
│   │   ├── sql_mysql.go
│   │   ├── sql_pg.go
│   │   ├── sql_sqlite.go
│   │   ├── sql_test.go
│   │   ├── status.go
│   │   ├── tkv.go
│   │   ├── tkv_badger.go
│   │   ├── tkv_bak.go
│   │   ├── tkv_etcd.go
│   │   ├── tkv_fdb.go
│   │   ├── tkv_fdb_test.go
│   │   ├── tkv_lock.go
│   │   ├── tkv_mem.go
│   │   ├── tkv_prefix.go
│   │   ├── tkv_test.go
│   │   ├── tkv_tikv.go
│   │   ├── utils.go
│   │   ├── utils_darwin.go
│   │   ├── utils_linux.go
│   │   ├── utils_test.go
│   │   └── utils_windows.go
│   ├── metric/
│   │   └── metrics.go
│   ├── object/
│   │   ├── azure.go
│   │   ├── b2.go
│   │   ├── bos.go
│   │   ├── bunny.go
│   │   ├── ceph.go
│   │   ├── checksum.go
│   │   ├── checksum_test.go
│   │   ├── cifs.go
│   │   ├── cos.go
│   │   ├── dragonfly.go
│   │   ├── encrypt.go
│   │   ├── encrypt_test.go
│   │   ├── eos.go
│   │   ├── etcd.go
│   │   ├── file.go
│   │   ├── file_darwin.go
│   │   ├── file_linux.go
│   │   ├── file_unix.go
│   │   ├── file_unix_test.go
│   │   ├── file_windows.go
│   │   ├── filesystem_test.go
│   │   ├── gluster.go
│   │   ├── gluster_test.go
│   │   ├── gs.go
│   │   ├── hdfs.go
│   │   ├── hdfs_kerberos.go
│   │   ├── ibmcos.go
│   │   ├── interface.go
│   │   ├── ks3.go
│   │   ├── mem.go
│   │   ├── minio.go
│   │   ├── nfs.go
│   │   ├── object_storage.go
│   │   ├── object_storage_test.go
│   │   ├── obs.go
│   │   ├── oos.go
│   │   ├── oss.go
│   │   ├── prefix.go
│   │   ├── qingstor.go
│   │   ├── qiniu.go
│   │   ├── redis.go
│   │   ├── response_attrs.go
│   │   ├── response_attrs_test.go
│   │   ├── restful.go
│   │   ├── restful_test.go
│   │   ├── s3.go
│   │   ├── s3_test.go
│   │   ├── scw.go
│   │   ├── sftp.go
│   │   ├── sharding.go
│   │   ├── space.go
│   │   ├── sql.go
│   │   ├── sql_mysql.go
│   │   ├── sql_pg.go
│   │   ├── sql_sqlite.go
│   │   ├── swift.go
│   │   ├── tikv.go
│   │   ├── tos.go
│   │   ├── ufile.go
│   │   ├── wasabi.go
│   │   └── webdav.go
│   ├── sync/
│   │   ├── cluster.go
│   │   ├── cluster_test.go
│   │   ├── config.go
│   │   ├── download.go
│   │   ├── download_test.go
│   │   ├── sync.go
│   │   └── sync_test.go
│   ├── usage/
│   │   ├── usage.go
│   │   └── usage_test.go
│   ├── utils/
│   │   ├── alloc.go
│   │   ├── alloc_test.go
│   │   ├── buffer.go
│   │   ├── buffer_test.go
│   │   ├── clock_test.go
│   │   ├── clock_unix.go
│   │   ├── clock_windows.go
│   │   ├── cond.go
│   │   ├── cond_test.go
│   │   ├── errors.go
│   │   ├── general.go
│   │   ├── humanize.go
│   │   ├── logger.go
│   │   ├── logger_syslog.go
│   │   ├── logger_test.go
│   │   ├── logger_windows.go
│   │   ├── memusage.go
│   │   ├── memusage_test.go
│   │   ├── memusage_windows.go
│   │   ├── proc_title.go
│   │   ├── proc_title_noop.go
│   │   ├── progress.go
│   │   ├── progress_test.go
│   │   ├── rusage.go
│   │   ├── rusage_test.go
│   │   ├── rusage_windows.go
│   │   ├── utils.go
│   │   ├── utils_darwin.go
│   │   ├── utils_linux.go
│   │   ├── utils_test.go
│   │   ├── utils_unix.go
│   │   └── utils_windows.go
│   ├── version/
│   │   ├── .gitattributes
│   │   ├── version.go
│   │   └── version_test.go
│   ├── vfs/
│   │   ├── accesslog.go
│   │   ├── accesslog_test.go
│   │   ├── backup.go
│   │   ├── backup_test.go
│   │   ├── compact.go
│   │   ├── compact_test.go
│   │   ├── fill.go
│   │   ├── fill_test.go
│   │   ├── handle.go
│   │   ├── helpers.go
│   │   ├── helpers_test.go
│   │   ├── internal.go
│   │   ├── reader.go
│   │   ├── vfs.go
│   │   ├── vfs_test.go
│   │   ├── vfs_unix.go
│   │   ├── vfs_windows.go
│   │   └── writer.go
│   ├── win/
│   │   ├── ldap.go
│   │   └── sid.go
│   └── winfsp/
│       ├── log.go
│       └── winfs.go
├── rfcs/
│   └── 1-dir-used-statistics.md
└── sdk/
    ├── java/
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── conf/
    │   │   ├── contract/
    │   │   │   └── juicefs.xml
    │   │   ├── core-site.xml
    │   │   └── log4j.properties
    │   ├── kerberos.sh
    │   ├── libjfs/
    │   │   ├── Makefile
    │   │   ├── bridge.go
    │   │   ├── bridge_test.go
    │   │   ├── callback.c
    │   │   ├── guid.go
    │   │   ├── guid_unix.go
    │   │   ├── guid_windows.go
    │   │   ├── kerberos.go
    │   │   ├── main.go
    │   │   ├── remote_write.go
    │   │   └── remote_write_test.go
    │   ├── pom.xml
    │   └── src/
    │       ├── main/
    │       │   ├── java/
    │       │   │   └── io/
    │       │   │       └── juicefs/
    │       │   │           ├── FlinkFileSystemFactory.java
    │       │   │           ├── JuiceFS.java
    │       │   │           ├── JuiceFileSystem.java
    │       │   │           ├── JuiceFileSystemImpl.java
    │       │   │           ├── KiteDataLoader.java
    │       │   │           ├── Main.java
    │       │   │           ├── bench/
    │       │   │           │   ├── AccumulatingReducer.java
    │       │   │           │   ├── IOMapperBase.java
    │       │   │           │   ├── NNBench.java
    │       │   │           │   └── TestDFSIO.java
    │       │   │           ├── exception/
    │       │   │           │   └── QuotaExceededException.java
    │       │   │           ├── kerberos/
    │       │   │           │   ├── AuthCredential.java
    │       │   │           │   ├── JuiceFSDelegationTokenIdentifier.java
    │       │   │           │   ├── JuiceFSTokenRenewer.java
    │       │   │           │   └── KerberosUtil.java
    │       │   │           ├── metrics/
    │       │   │           │   └── JuiceFSInstrumentation.java
    │       │   │           ├── permission/
    │       │   │           │   ├── RangerAdminRefresher.java
    │       │   │           │   ├── RangerConfig.java
    │       │   │           │   ├── RangerJfsAccessRequest.java
    │       │   │           │   ├── RangerJfsPlugin.java
    │       │   │           │   ├── RangerJfsResource.java
    │       │   │           │   ├── RangerPermissionChecker.java
    │       │   │           │   ├── RangerPermissionContext.java
    │       │   │           │   ├── RangerPluginCfg.java
    │       │   │           │   └── RangerRules.java
    │       │   │           ├── tools/
    │       │   │           │   └── RangerDownloader.java
    │       │   │           └── utils/
    │       │   │               ├── AclTransformation.java
    │       │   │               ├── BgTaskUtil.java
    │       │   │               ├── BufferPool.java
    │       │   │               ├── CallerContextUtil.java
    │       │   │               ├── ConsistentHash.java
    │       │   │               ├── FsNodesFetcher.java
    │       │   │               ├── FsPermissionExtension.java
    │       │   │               ├── NodesFetcher.java
    │       │   │               ├── NodesFetcherBuilder.java
    │       │   │               ├── PatchUtil.java
    │       │   │               ├── PrestoNodesFetcher.java
    │       │   │               ├── RedefineClassAgent.java
    │       │   │               ├── ReflectionUtil.java
    │       │   │               ├── SparkNodesFetcher.java
    │       │   │               ├── SparkThriftNodesFetcher.java
    │       │   │               └── YarnNodesFetcher.java
    │       │   └── resources/
    │       │       └── META-INF/
    │       │           └── services/
    │       │               ├── org.apache.flink.core.fs.FileSystemFactory
    │       │               ├── org.apache.hadoop.security.token.TokenIdentifier
    │       │               ├── org.apache.hadoop.security.token.TokenRenewer
    │       │               └── org.kitesdk.data.spi.Loadable
    │       └── test/
    │           ├── java/
    │           │   └── io/
    │           │       └── juicefs/
    │           │           ├── JuiceFileSystemBgTaskTest.java
    │           │           ├── JuiceFileSystemTest.java
    │           │           ├── acl/
    │           │           │   └── TestAclCLI.java
    │           │           ├── contract/
    │           │           │   ├── JuiceFSContract.java
    │           │           │   ├── TestAppend.java
    │           │           │   ├── TestConcat.java
    │           │           │   ├── TestCreate.java
    │           │           │   ├── TestDelete.java
    │           │           │   ├── TestGetFileStatus.java
    │           │           │   ├── TestJuiceFileSystemContract.java
    │           │           │   ├── TestMkdir.java
    │           │           │   ├── TestOpen.java
    │           │           │   ├── TestRename.java
    │           │           │   ├── TestSeek.java
    │           │           │   └── TestSetTimes.java
    │           │           ├── kerberos/
    │           │           │   └── KerberosTest.java
    │           │           ├── permission/
    │           │           │   ├── RangerAdminClientImpl.java
    │           │           │   └── RangerPermissionCheckerTest.java
    │           │           └── utils/
    │           │               ├── BgTaskUtilTest.java
    │           │               └── HashTest.java
    │           ├── resources/
    │           │   ├── hdfs-policies-tag.json
    │           │   ├── hdfs-policies.json
    │           │   ├── kerberos.cfg
    │           │   ├── log4j.properties
    │           │   └── testAclCLI.xml
    │           └── test-spark.sh
    └── python/
        ├── .gitignore
        ├── Dockerfile.builder
        ├── Dockerfile.builder.arm
        ├── Makefile
        ├── examples/
        │   ├── ffrecord/
        │   │   ├── dataloader.py
        │   │   ├── dataset.py
        │   │   ├── filereader.py
        │   │   ├── filereader_dio.py
        │   │   ├── main.py
        │   │   └── readme.md
        │   └── fsspec/
        │       ├── main.py
        │       └── readme.md
        └── juicefs/
            ├── juicefs/
            │   ├── __init__.py
            │   ├── juicefs.py
            │   └── spec.py
            ├── setup.py
            └── tests/
                ├── __init__.py
                └── test.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .autocorrectrc
================================================
rules:
  # Default rules: https://github.com/huacnlee/autocorrect/raw/main/autocorrect/.autocorrectrc.default
  spellcheck: 1
textRules:
  # Config some special rule for some texts
  # For example, if we wants to let "Hello你好" just warning, and "Hi你好" to ignore
  # "Hello你好": 2
  # "Hi你好": 0
fileTypes:
  # Config the files associations, you config is higher priority than default.
  # "rb": ruby
  # "Rakefile": ruby
  # "*.js": javascript
  # ".mdx": markdown
spellcheck:
  words:
    # Please do not add a general English word (eg. apple, python) here.
    # Users can add their special words to their .autocorrectrc file by their need.
    - Digital Ocean = DigitalOcean
    - JucieFS = JuiceFS
    - JueicFS = JuiceFS
    - JuiecFS = JuiceFS
    - filesystem = file system
    - mountpoint = mount point


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.md
================================================
---
name: Bug Report
about: Report a bug encountered while operating JuiceFS
labels: kind/bug
---

<!--
Please use this template while reporting a bug and provide as much info as possible. Not doing so may result in your bug not being addressed in a timely manner. Thanks!
-->

**What happened**:

**What you expected to happen**:

**How to reproduce it (as minimally and precisely as possible)**:

**Anything else we need to know?**

**Environment**:
- JuiceFS version (use `juicefs --version`) or Hadoop Java SDK version:
- Cloud provider or hardware configuration running JuiceFS:
- OS (e.g `cat /etc/os-release`):
- Kernel (e.g. `uname -a`):
- Object storage (cloud provider and region, or self maintained):
- Metadata engine info (version, cloud provider managed or self maintained):
- Network connectivity (JuiceFS to metadata engine, JuiceFS to object storage):
- Others:


================================================
FILE: .github/ISSUE_TEMPLATE/enhancement.md
================================================
---
name: Enhancement Request
about: Suggest an enhancement to the JuiceFS project
labels: kind/feature
---

<!-- Please only use this template for submitting enhancement requests -->

**What would you like to be added**:

**Why is this needed**:


================================================
FILE: .github/ISSUE_TEMPLATE/support.md
================================================
---
name: Support Request
about: Support request or question relating to JuiceFS
labels: kind/question
---

<!--
STOP -- PLEASE READ!

GitHub issue is not the right place for support requests.

If you're looking for help, check the Discussions (https://github.com/juicedata/juicefs/discussions).

You can also post your question on the Discussions or the JuiceFS Slack channel (https://juicefs.slack.com).
-->


================================================
FILE: .github/actions/build/action.yml
================================================
name: 'Build Action'
description: 'Build action'
inputs:
  target:
    description: 'build target: juicefs, juicefs.fdb etc'
    required: true
    default: 'juicefs'
  beta:
    description: 'beta version for the following test'
    required: false
runs:
  using: "composite"
  steps:
    - uses: actions/setup-go@v3
      with:
        go-version: 'oldstable'
        cache: true

    - name: Change go version for root user
      shell: bash
      run: |
        go_path=`which go`
        echo $go_path
        root_go_path=`sudo which go`
        echo $root_go_path
        sudo rm -f $root_go_path
        sudo ln -s $go_path $root_go_path
        go version
        sudo go version

    - name: Install tools
      shell: bash
      run: |
        if [ "${{inputs.target}}" == "juicefs.fdb" ]; then
          wget -q https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-clients_6.3.23-1_amd64.deb
          sudo dpkg -i foundationdb-clients_6.3.23-1_amd64.deb
        elif [ "${{inputs.target}}" == "juicefs.gluster" ]; then
          sudo .github/scripts/apt_install.sh uuid-dev libglusterfs-dev
        fi

    - name: Build linux target
      shell: bash
      run: |
        if [[ -n "${{ inputs.beta }}" ]]; then
          echo "use beta version of juicefs: ${{inputs.beta}}"
          wget -q https://juicefs-com-static.oss-cn-shanghai.aliyuncs.com/juicefs_beta/${{inputs.beta}} -O juicefs
          chmod +x juicefs
          ./juicefs version
        else
          echo "start to build ${{inputs.target}}"
          make ${{inputs.target}}.cover
          [ "${{inputs.target}}" != "juicefs" ] &&  mv ${{inputs.target}} juicefs
          ./juicefs version
          echo "build ${{inputs.target}} succeed"
        fi

================================================
FILE: .github/actions/cancel-outdate-runs/action.yml
================================================
name: 'Cancel Outdate Runs'
description: 'Cancel Outdate Runs'
inputs:
  head_sha:
    description: 'head_sha triggers the workflow runs'
    required: true
    type: string
  per_page:
    description: 'Page size of runs to cancel'
    required: true
    type: number
    default: 5
  page:
    description: 'Page number of runs to cancel'
    required: true
    type: number
    default: 1
  github_token:
    description: 'GITHUB_TOKEN'
    required: true
    type: string

runs:
  using: "composite"
  steps:
    - name: display parameters
      shell: bash
      run: |
        echo "head_sha is ${{inputs.head_sha}}"
        echo "per_page is ${{inputs.per_page}}"
        echo "page is ${{inputs.page}}"
        
    - uses: octokit/request-action@v2.x
      id: get_active_workflows
      with:
        route: GET /repos/${{github.repository}}/actions/runs?status=in_progress&event=pull_request&per_page=${{inputs.per_page}}&page=${{inputs.page}}&head_sha=${{inputs.head_sha}}
      env:
        GITHUB_TOKEN: ${{inputs.github_token}}

    - name: display active workflows
      shell: bash
      env:
        data: ${{ steps.get_active_workflows.outputs.data }}
      run: |
        echo "$data" | jq '.workflow_runs | map({id, head_sha, pull_request_number:.pull_requests[0].number})'
    
    - name: Extract workflow ids
      shell: bash
      id: extract_workflow_ids
      env:
        data: ${{ steps.get_active_workflows.outputs.data }}
      run: |
        echo pull_request_number is ${{ github.event.pull_request.number }}
        echo head_sha is ${{ github.event.pull_request.head.sha }}
        workflow_ids=$(echo "$data" | \
          jq '.workflow_runs | map({id, head_sha, pull_request_number:.pull_requests[0].number})' | \
          jq 'map(select( .pull_request_number == ${{ github.event.pull_request.number }} and .head_sha != "${{ github.event.pull_request.head.sha }}")) | map(.id)' | \
          jq 'join(",")')
        echo workflow_ids is $workflow_ids
        echo 'WORKFLOW_IDS='$(echo $workflow_ids | tr -d '"') >> $GITHUB_ENV
        
    - name: Cancel active workflows
      shell: bash
      run: |
        for i in ${WORKFLOW_IDS//,/ }
        do
          echo "Cancelling workflow with id: $i"
          # use curl here as I have no idea how to use a github action in a loop
          curl \
            -X POST \
            -H "Accept: application/vnd.github+json" \
            -H "Authorization: Bearer ${{inputs.github_token}}" \
            https://api.github.com/repos/${{ github.repository }}/actions/runs/$i/cancel
        done


================================================
FILE: .github/actions/mount-coverage-dir/action.yml
================================================
name: 'mount_coverage_dir'
description: 'mount coverage directory'
inputs:
  mount_point:
    description: 'mount point'
    required: true
    type: string
  subdir:
    description: 'subdir'
    required: false
    type: string
  token:
    description: 'token of jfs'
    required: true
    type: string
  access_key:
    description: 'access key of object storage service'
    required: true
    type: string
  secret_key:
    description: 'secret key of object storage service'
    required: true
    type: string

runs:
  using: "composite"
  steps:
    - name: set subdir
      shell: bash
      env:
        GH_TOKEN: ${{ github.token }}
      run: |
        jobs=$(gh api repos/${{ github.repository }}/actions/runs/${{ github.run_id}}/attempts/${{ github.run_attempt }}/jobs)
        job_id=$(echo $jobs | jq -r '.jobs[] | select(.runner_name=="${{ runner.name }}") | select(.status=="in_progress") | .id')
        echo Job ID is: ${job_id}

        if [ "${{ github.event_name }}" == "pull_request" ]; then
          branch=${GITHUB_BASE_REF} # 目标分支
        elif [ "${{ github.event_name }}" == "push" ]; then
          branch=${GITHUB_REF#refs/heads/} # 当前分支
        else
          branch=${GITHUB_REF#refs/heads/} # 对于 schedule 和 workflow_dispatch
        fi
        echo input.subdir is ${{inputs.subdir}}
        if [ -n "${{inputs.subdir}}" ]; then
          subdir=${{inputs.subdir}}
        elif [[ "${{github.event_name}}" == "schedule" ]]; then
          subdir=juicefs/schedule/$(date +"%Y%m%d")/${{github.workflow}}
        elif [[ "${{github.job}}" == "success-all-test" ]]; then 
          subdir=juicefs/pr/$branch/${{github.workflow}}/${{github.run_id}}
        else
          subdir=juicefs/pr/$branch/${{github.workflow}}/${{github.run_id}}/${job_id}        
        fi
        echo "subdir=$subdir"
        echo "subdir=$subdir" >> $GITHUB_ENV

    - name: mount coverage dir
      shell: bash
      run: |
        sudo mkdir -p /root/.juicefs
        if ! sudo test -f /root/.juicefs/jfsmount; then
          sudo wget -q s.juicefs.com/static/Linux/mount -O /root/.juicefs/jfsmount 
          sudo chmod +x /root/.juicefs/jfsmount
        fi
        sudo curl -s -L https://juicefs.com/static/juicefs -o /usr/local/bin/juicefs && sudo chmod +x /usr/local/bin/juicefs
        if [[ -n "${{inputs.access_key}}" && -n "${{inputs.secret_key}}" && -n "${{inputs.token}}" ]]; then
          sudo juicefs auth ci-coverage --access-key ${{ inputs.access_key }} --secret-key ${{ inputs.secret_key }} --token ${{inputs.token}} --encrypt-keys
          sudo juicefs mount ci-coverage --subdir ${subdir} ${{inputs.mount_point}} --allow-other
        else
          echo "no jfs secrets provided, use local dir instead of jfs"
          mkdir -p ${{inputs.mount_point}}
        fi
        

================================================
FILE: .github/actions/upload-coverage/action.yml
================================================
name: 'upload_coverage_report'
description: 'upload coverage report of one job'
inputs:
  UPLOAD_TOKEN:
    description: 'upload token'
    required: true
    type: string

runs:
  using: "composite"
  steps:
    - name: umount juicefs
      shell: bash
      run: |
        sudo umount /tmp/jfs || true
        sleep 3s

    - name: get job id
      shell: bash
      env:
        GH_TOKEN: ${{ github.token }}
      run: |
        jobs=$(gh api repos/${{ github.repository }}/actions/runs/${{ github.run_id}}/attempts/${{ github.run_attempt }}/jobs)
        job_id=$(echo $jobs | jq -r '.jobs[] | select(.runner_name=="${{ runner.name }}") | select(.status=="in_progress") | .id')
        echo Job ID is: ${job_id}
        echo "job_id=$job_id" >> $GITHUB_ENV

    - name: generate mount coverage report
      shell: bash
      run: |
        echo "generate coverage percentage report"
        sudo go tool covdata percent -i=cover/ | sudo tee cover/percent.txt
        echo "generate coverage text report"
        sudo go tool covdata textfmt -i=cover/ -o cover/cover.txt
        echo "generate coverage html report"
        sudo go tool cover -html=cover/cover.txt -o cover/cover.html
        [[ -z "${{inputs.UPLOAD_TOKEN}}" ]] && echo "no upload token, skip upload" && exit 0 || true
        .github/scripts/upload_coverage_report.sh cover/cover.html juicefs_${{github.workflow}}_${{github.run_id}}_${job_id}.html ${{inputs.UPLOAD_TOKEN}}
        

================================================
FILE: .github/actions/upload-total-coverage/action.yml
================================================
name: 'upload_total_coverage_report'
description: 'upload total coverage report of all jobs in workflow'
inputs:
  UPLOAD_TOKEN:
    description: 'upload token'
    required: true
    type: string

runs:
  using: "composite"
  steps:
    - name: generate total coverage report
      shell: bash
      run: |
        echo "current dir is $(pwd)"
        if [[ "${{github.event_name}}" == "schedule" ]]; then
          coverdirs="cover,"
        else
          for dir in $(find cover -mindepth 1 -maxdepth 1 -type d -exec basename {} \;); do
            coverdirs+="cover/$dir/,"
          done
        fi
        coverdirs=${coverdirs%,}
        echo coverdirs is $coverdirs
        [[ -z "$coverdirs" ]] && echo -e "\e[31m no coverage dir found\e[0m" && exit 0
        sudo go tool covdata percent -i=$coverdirs | sudo tee cover/cover.percent
        echo "generated coverage percent report:" $(realpath cover/cover.percent)
        sudo go tool covdata textfmt -o cover/cover.txt -i=$coverdirs 
        echo "generated coverage report in text format:" $(realpath cover/cover.txt)
        sudo go tool cover -html=cover/cover.txt -o cover/cover.html
        echo "generated coverage report in html format:" $(realpath cover/cover.html)
        ls -l cover/cover*

    - name: upload coverage report
      shell: bash
      run: |
        [[ -z "${{inputs.UPLOAD_TOKEN}}" ]] && echo -e "\e[31m no upload token, skip upload \e[0m" && exit 0 || true
        if [[ -f cover/cover.html ]]; then
          .github/scripts/upload_coverage_report.sh cover/cover.html ${{github.workflow}}_${{github.run_id}}.html ${{inputs.UPLOAD_TOKEN}}
        else
          echo -e "\e[31m no coverage report found\e[0m" && exit 0
        fi

================================================
FILE: .github/scripts/apt_install.sh
================================================
#!/bin/bash

set -e

# Set the maximum number of retries
MAX_RETRIES=3

# Define a function to run a command and check the return code
# The function takes two arguments: the command to run and a description of the command
function run_command() {
  local cmd=$1
  local retries=0
  local retry_cmd="$cmd"
  while true; do
    # Run the command and capture the return code
    $retry_cmd 2>&1 | tee /tmp/install.log || true
    local ret=$?
    # If the command succeeded, break out of the loop
    if [[ $ret -eq 0 ]]; then
      break
    fi
    # If the command failed and we have retries left, print a warning and retry
    if [[ $retries -lt $MAX_RETRIES ]]; then
      retries=$((retries + 1))
      echo "WARNING: $cmd failed with return code $ret. Retrying ($retries/$MAX_RETRIES)..."
      # If the error message indicates missing packages, retry with --fix-missing
      if [[ $cmd == "apt-get update"* ]] && grep -q 'Failed to fetch' /tmp/install.log; then
        retry_cmd="apt-get update -y --fix-missing"
      elif [[ $cmd == "apt-get install"* ]] &&  grep -q 'Unable to fetch some archives' /tmp/install.log; then
        retry_cmd="apt-get install -y --fix-missing $package_name"
      fi
    else
      # If we've exhausted all retries, exit with an error
      echo "ERROR: $cmd failed with return code $ret after $MAX_RETRIES retries."
      exit 1
    fi
  done
}

# Run apt-get update and check the return code
run_command "apt-get update -y" 
package_name=$@
# Run apt-get install and check the return code
run_command "apt-get install -y $package_name"


================================================
FILE: .github/scripts/cache.sh
================================================
#!/bin/bash -e
dpkg -s redis-server || .github/scripts/apt_install.sh  redis-tools redis-server
dpkg -s fio || .github/scripts/apt_install.sh fio
source .github/scripts/common/common.sh
source .github/scripts/start_meta_engine.sh
[[ -z "$META" ]] && META=sqlite3
start_meta_engine $META minio
META_URL=$(get_meta_url $META)
if [[ "$META" == "sqlite3" ]]; then
    META_URL="sqlite3:///tmp/test.db"
fi

test_warmup_in_background(){
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount $META_URL /tmp/jfs -d
    dd if=/dev/zero of=/tmp/jfs/test bs=1M count=1024
    ./juicefs warmup /tmp/jfs/test --evict
    ./juicefs warmup /tmp/jfs/test --background
    wait_warmup_finish /tmp/jfs/test 100
    ./juicefs warmup /tmp/jfs/test --background --evict 
    wait_warmup_finish /tmp/jfs/test 0
}

test_batch_warmup(){
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount $META_URL /tmp/jfs -d
    rm -f file.list
    file_count=11000
    time seq 1 $file_count | xargs -P 8 -I {} sh -c 'echo {} > /tmp/jfs/test_{}; echo /tmp/jfs/test_{} >> file.list'
    # time for i in $(seq 1 $file_count); do echo $i > /tmp/jfs/test_$i; echo /tmp/jfs/test_$i >> file.list; done
    ./juicefs warmup -f file.list 2>&1 | tee warmup.log
    files=$(get_cache_file_count)
    [[ $files -ne $file_count ]] && echo "warmup failed, expect $file_count files, actual $files" && exit 1 || true
    ./juicefs warmup -f file.list --check 2>&1 | tee warmup.log
    files=$(get_cache_file_count)
    [[ $files -ne $file_count ]] && echo "warmup failed, expect $file_count files, actual $files" && exit 1 || true
    grep "(100.0%)" warmup.log || (echo "warmup failed, expect 100.0% warmup" && exit 1)
    ./juicefs warmup -f file.list --evict 2>&1 | tee warmup.log 
    files=$(get_cache_file_count)
    [[ $files -ne $file_count ]] && echo "warmup evict failed, expect $file_count files, actual $files" && exit 1 || true
    ./juicefs warmup -f file.list --check 2>&1 | tee warmup.log
    files=$(get_cache_file_count)
    [[ $files -ne $file_count ]] && echo "warmup evict failed, expect $file_count files, actual $files" && exit 1 || true
    grep "(0.0%)" warmup.log || (echo "warmup failed, expect 0.0% warmup" && exit 1)

    ./juicefs warmup /tmp/jfs/test* 2>&1 | tee warmup.log
    files=$(get_cache_file_count)
    [[ $files -ne $file_count ]] && echo "warmup failed, expect $file_count files, actual $files" && exit 1 || true
}

test_kernel_writeback_cache(){
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount $META_URL /tmp/jfs -d -o writeback_cache
    mkdir /tmp/jfs/fio
    runtime=15
    cat /tmp/jfs/.stats | grep fuse | grep 'juicefs_fuse_written_size_bytes_sum\|juicefs_fuse_ops_total_write'
    fio --name=seq_write_test --rw=write --bs=10 --size=4M --numjobs=8 --nrfiles=1 --runtime=$runtime --time_based --group_reporting --directory=/tmp/jfs/fio | tee fio.log
    cat /tmp/jfs/.stats | grep fuse | grep 'juicefs_fuse_written_size_bytes_sum\|juicefs_fuse_ops_total_write'
    bytes=$(cat /tmp/jfs/.stats | grep juicefs_fuse_written_size_bytes_sum | awk '{print $2}')
    ops=$(cat /tmp/jfs/.stats | grep juicefs_fuse_ops_total_write | awk '{print $2}')
    [[ $((bytes/ops)) -lt 10240 ]] && echo "writeback_cache may not enabled" && exit 1 || true
}

test_o_tmpfile(){
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount $META_URL /tmp/jfs -d -o writeback_cache
    TEST_DIR="/tmp/jfs/tmp"
    mkdir -p "$TEST_DIR"

    cat > /tmp/test_otmp.c << 'EOF'
#define _GNU_SOURCE
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
int main() {
    int fd = openat(AT_FDCWD, "/tmp/jfs/tmp", O_RDWR|O_EXCL|O_CLOEXEC|O_TMPFILE, 0600);
    if (fd < 0) {
        perror("openat");
        return 1;
    }
    puts("openat ok");
    if (write(fd, "x", 1) < 0) perror("write");
    if (close(fd) < 0) {
        printf("close: %s\n", strerror(errno));
        return 1;
    }
    puts("close ok");
    return 0;
}
EOF
    gcc -o /tmp/test_otmp /tmp/test_otmp.c
    /tmp/test_otmp
    result=$?
    if [ $result -ne 0 ]; then
        echo "TEST FAILED: close fail"
        exit 1
    else
        echo "TEST PASSED"
    fi
}

test_cache_items(){
    do_test_cache_items 2-random
}

test_cache_items_lru(){
    do_test_cache_items lru
}

do_test_cache_items(){
    cache_eviction=$1
    prepare_test
    ./juicefs format $META_URL myjfs
    cache_items=500
    ./juicefs mount $META_URL /tmp/jfs -d --cache-items $cache_items --cache-eviction $cache_eviction
    seq 1 $((cache_items*2)) | xargs -P 8 -I {} sh -c 'echo {} > /tmp/jfs/test_{};'
    ./juicefs warmup /tmp/jfs/
    ./juicefs warmup /tmp/jfs/ --check 2>&1 | tee warmup.log
    ratio=$(get_warmup_ratio)
    [[ $ratio -lt 55 ]] || (echo "ratio should less than 55%" && exit 1)
}

test_evict_on_writeback(){
    prepare_test
    ./juicefs format $META_URL myjfs --compress zstd
    ./juicefs mount $META_URL /tmp/jfs -d --writeback --upload-delay 3s
    dd if=/dev/urandom of=/tmp/test bs=1M count=200
    cp /tmp/test /tmp/jfs/test
    sleep 3
    stageBlocks=$(grep "juicefs_staging_blocks" /tmp/jfs/.stats | awk '{print $2}')
    [[ $stageBlocks -eq 0 ]] && echo "stage blocks should not be 0" && exit 1 || true
    ./juicefs warmup /tmp/jfs/test --evict
    wait_stage_uploaded
    compare_md5sum /tmp/test /tmp/jfs/test
}

test_remount_on_writeback(){
    prepare_test
    ./juicefs format $META_URL myjfs --compress lz4
    ./juicefs mount $META_URL /tmp/jfs -d --writeback --upload-delay 3s
    dd if=/dev/urandom of=/tmp/test bs=1M count=200
    cp /tmp/test /tmp/jfs/test
    umount_jfs /tmp/jfs $META_URL
    ./juicefs mount $META_URL /tmp/jfs -d --writeback
    sleep 3
    stage_size=$(du -shm $(get_rawstaging_dir) | awk '{print $1}')
    [[ $stage_size -gt 2 ]] && echo "stage size should not great than 2M" && exit 1 || true
    ./juicefs warmup /tmp/jfs/test --evict
    compare_md5sum /tmp/test /tmp/jfs/test
}
test_memory_cache_none(){
    do_test_memory_cache none
}

test_memory_cache_2_random(){
    do_test_memory_cache 2-random
}

test_memory_cache_lru_fallback(){
    prepare_test
    ./juicefs format $META_URL myjfs --compress lz4
    ./juicefs mount $META_URL /tmp/jfs -d --cache-dir memory --cache-size 100M --cache-eviction lru
    eviction=$(get_cache_eviction)
    [[ "$eviction" == "2-random" ]] || (echo "memory cache should fallback to 2-random, actual is $eviction" && exit 1)

    dd if=/dev/zero of=/tmp/jfs/test bs=1M count=200
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    ratio=$(get_warmup_ratio)
    [[ "$ratio" -gt 40 && "$ratio" -lt 60 ]] || (echo "ratio($ratio) should between 40% and 60% after lru fallback" && exit 1)
}

test_cache_eviction_invalid_fallback(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount $META_URL /tmp/jfs -d --cache-size 100M --cache-eviction invalid-policy
    eviction=$(get_cache_eviction)
    [[ "$eviction" == "2-random" ]] || (echo "invalid cache-eviction should fallback to 2-random, actual is $eviction" && exit 1)
}

do_test_memory_cache(){
    cache_eviction=$1
    prepare_test
    ./juicefs format $META_URL myjfs --compress lz4
    ./juicefs mount $META_URL /tmp/jfs -d --cache-dir memory --cache-size 100M --cache-eviction $cache_eviction
    dd if=/dev/zero of=/tmp/jfs/test bs=1M count=200
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    ratio=$(get_warmup_ratio)
    if [[ $cache_eviction == "2-random" ]]; then
        [[ "$ratio" -gt 40 && "$ratio" -lt 60   ]] || (echo "ratio($ratio) should between 40% and 60%" && exit 1)
    elif [[ $cache_eviction == "none" ]]; then
        [[ "$ratio" -gt 40 && "$ratio" -lt 60   ]] || (echo "ratio($ratio) should between 40% and 60%" && exit 1)
    fi
    ./juicefs warmup /tmp/jfs/test --evict
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    ratio=$(get_warmup_ratio)
    [[ "$ratio" = 0 ]] || (echo "ratio($ratio) should less than 0" && exit 1)
}

test_cache_expired(){
    do_test_cache_expired /var/jfsCache/myjfs 2-random
}

test_cache_expired_memory(){
    do_test_cache_expired memory 2-random
}

test_cache_expired_lru(){
    do_test_cache_expired /var/jfsCache/myjfs lru
}

do_test_cache_expired(){
    cache_dir=$1
    cache_eviction=$2
    [[ -z $cache_eviction ]] && cache_eviction=2-random
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount $META_URL /tmp/jfs -d --cache-dir $cache_dir --cache-expire 3s --cache-eviction $cache_eviction
    dd if=/dev/zero of=/tmp/jfs/test bs=1M count=200
    for i in $(seq 1 1100); do
        dd if=/dev/zero of=/tmp/jfs/test$i bs=32k count=1 status=none
    done
    ./juicefs warmup /tmp/jfs/ 2>&1 | tee warmup.log
    sleep 15
    ./juicefs warmup /tmp/jfs/ --check 2>&1 | tee warmup.log
    grep "(0.0%)" warmup.log || (echo "cache should expired" && exit 1)
}

test_cache_large_write(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount $META_URL /tmp/jfs -d -v
    dd if=/dev/zero of=/tmp/jfs/test bs=1M count=200
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    ratio=$(get_warmup_ratio)
    [[ "$ratio" = 0 ]] || (echo "ratio($ratio) should less than 0" && exit 1)
    ./juicefs mount $META_URL /tmp/jfs -d --cache-large-write 
    dd if=/dev/zero of=/tmp/jfs/test1 bs=1M count=200
    ./juicefs warmup /tmp/jfs/test1 --check 2>&1 | tee warmup.log
    # TODO: should check the ratio
    check_warmup_log 90
}

test_cache_mode(){
    prepare_test
    ./juicefs format $META_URL myjfs
    cache_mode=$(printf "%03o" $((RANDOM % 512)))
    echo "cache mode is $cache_mode"
    ./juicefs mount $META_URL /tmp/jfs -d --cache-mode $cache_mode --writeback --upload-delay 3s
    dd if=/dev/zero of=/tmp/jfs/test bs=1M count=32
    ./juicefs warmup /tmp/jfs/test
    find $(get_raw_dir) -type f ! -perm $cache_mode -exec echo "perm of {} is incorrect" \; -exec false {} +
    find $(get_rawstaging_dir) -type f ! -perm $cache_mode -exec echo "perm of {} is incorrect" \; -exec false {} +
    sleep 5s 
    find $(get_raw_dir) -type f ! -perm $cache_mode -exec echo "perm of {} is incorrect" \; -exec false {} +
    find $(get_rawstaging_dir) -type f ! -perm $cache_mode -exec echo "perm of {} is incorrect" \; -exec false {} +
}

test_cache_compressed(){
    prepare_test
    ./juicefs format $META_URL myjfs --storage minio --bucket http://localhost:9000/test \
        --access-key minioadmin --secret-key minioadmin --compress lz4 --hash-prefix
    ./juicefs mount $META_URL /tmp/jfs -d 
    dd if=/dev/urandom of=/tmp/test bs=1M count=200
    cp /tmp/test /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test --evict
    ./juicefs warmup /tmp/jfs/test
    docker stop minio
    compare_md5sum /tmp/test /tmp/jfs/test
    docker start minio
}

test_cache_checksum_none(){
    do_test_cache_checksum none
}

test_cache_checksum_full(){
    do_test_cache_checksum full
}

test_cache_checksum_shrink(){
    do_test_cache_checksum shrink
}

test_cache_checksum_extend(){
    do_test_cache_checksum extend
}

do_test_cache_checksum(){
    checksum_level=$1
    prepare_test
    ./juicefs format $META_URL myjfs --compress lz4
    ./juicefs mount $META_URL /tmp/jfs -d --verify-cache-checksum $checksum_level
    mkdir -p /tmp/jfs/rand-rw
    fio --name=seq_rw --rw=readwrite --bsrange=1k-4k --size=80M --numjobs=4 --runtime=5 --time_based --group_reporting --filename=/tmp/jfs/req-rw
    fio --name=rand_rw   --rw=randrw --bsrange=1k-4k --size=80M --numjobs=4 --runtime=5 --time_based --group_reporting --directory=/tmp/jfs/rand-rw --nrfiles=1000 --filesize=4k
}

test_disk_full_2_random(){
    do_test_disk_full 2-random
}

test_disk_full_lru(){
    do_test_disk_full lru
}

test_disk_full_none(){
    do_test_disk_full none
}

do_test_disk_full(){
    cache_eviction=$1
    prepare_test
    mount_jfsCache1 1G
    ./juicefs format $META_URL myjfs 
    ./juicefs mount $META_URL /tmp/jfs -d --cache-dir /var/jfsCache1 --cache-eviction $cache_eviction --free-space-ratio 0.2
    dd if=/dev/zero of=/tmp/test bs=1M count=1200
    cp /tmp/test /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test
    sleep 3 # wait to free space
    df -h /var/jfsCache1
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    used_percent=$(df /var/jfsCache1 | tail -1  | awk '{print $5}' | tr -d %)
    echo "used percent is $used_percent"
    if [[ $cache_eviction == "2-random" || $cache_eviction == "lru" ]]; then 
        [[ $used_percent -gt 80 ]] && echo "used percent($used_percent) should not more than 80%" && exit 1 || true
    elif [[ $cache_eviction == "none" ]]; then
        # cache will not evict even reach the free-space-ratio.
        [[ $used_percent -lt 80 ]] && echo "used percent($used_percent) should not less than 80%" && exit 1 || true
    fi
}

test_lru_hotset_prefer_recent(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount $META_URL /tmp/jfs -d --cache-size 128M --cache-items 40 --cache-eviction lru

    mkdir -p /tmp/jfs/lru
    for i in $(seq 1 80); do
        dd if=/dev/zero of=/tmp/jfs/lru/f_$i bs=64k count=1 status=none
    done

    for i in $(seq 1 40); do
        ./juicefs warmup /tmp/jfs/lru/f_$i > /dev/null
    done

    sleep 2
    for i in $(seq 1 10); do
        cat /tmp/jfs/lru/f_$i > /dev/null
    done

    sleep 2
    for i in $(seq 41 70); do
        ./juicefs warmup /tmp/jfs/lru/f_$i > /dev/null
    done

    rm -f hot.list cold.list
    for i in $(seq 1 10); do
        echo /tmp/jfs/lru/f_$i >> hot.list
    done
    for i in $(seq 11 40); do
        echo /tmp/jfs/lru/f_$i >> cold.list
    done

    ./juicefs warmup -f hot.list --check 2>&1 | tee warmup.log
    hot_ratio=$(get_warmup_ratio)
    [[ "$hot_ratio" -eq 100 ]] || (echo "hot set ratio($hot_ratio) should be 100% for lru" && exit 1)

    ./juicefs warmup -f cold.list --check 2>&1 | tee warmup.log
    cold_ratio=$(get_warmup_ratio)
    [[ "$cold_ratio" -lt 20 ]] || (echo "cold set ratio($cold_ratio) should be less than 20% for lru" && exit 1)
}

test_inode_full(){
    prepare_test
    mount_jfsCache1 100G 1000
    ./juicefs format $META_URL myjfs
    ./juicefs mount $META_URL /tmp/jfs -d --cache-dir /var/jfsCache1 --free-space-ratio 0.2
    seq 1 1000 | xargs -P 8 -I {} sh -c 'echo {} > /tmp/jfs/test_{};'
    ./juicefs warmup /tmp/jfs/
    ./juicefs warmup /tmp/jfs/ --check 2>&1 | tee warmup.log
    sleep 3
    used_percent=$(df -i /var/jfsCache1 | tail -1  | awk '{print $5}' | tr -d %)
    [[ $used_percent -gt 85 ]] && echo "used percent($used_percent) should less than 85%" && exit 1 || true
}

test_disk_full_with_writeback(){
    prepare_test
    mount_jfsCache1 1G
    ./juicefs format $META_URL myjfs --compress zstd
    ./juicefs mount $META_URL /tmp/jfs -d --cache-dir /var/jfsCache1 --writeback --free-space-ratio 0.2 --upload-delay 5s
    dd if=/dev/urandom of=/tmp/test bs=1M count=1400
    cp /tmp/test /tmp/jfs/test
    wait_stage_uploaded
    sleep 3
    used_percent=$(df /var/jfsCache1 | tail -1  | awk '{print $5}' | tr -d %)
    [[ $used_percent -gt 80 ]] && echo "used percent($used_percent) should less than 80%" && exit 1 || true
    echo 3 > /proc/sys/vm/drop_caches
    ./juicefs warmup /tmp/jfs/test --evict
    compare_md5sum /tmp/test /tmp/jfs/test
}

test_disk_failover()
{
    prepare_test
    mount_jfsCache1
    rm -rf /var/log/juicefs.log
    rm -rf /var/jfsCache2 /var/jfsCache3
    ./juicefs format $META_URL myjfs --trash-days 0 --storage minio --bucket http://localhost:9000/test --access-key minioadmin --secret-key minioadmin
    JFS_MAX_DURATION_TO_DOWN=10s JFS_MAX_IO_DURATION=3s ./juicefs mount $META_URL /tmp/jfs -d \
        --cache-dir=/var/jfsCache1:/var/jfsCache2:/var/jfsCache3 --io-retries 1 
    dd if=/dev/urandom of=/tmp/test bs=1M count=1024
    cp /tmp/test /tmp/jfs/test
    /etc/init.d/redis-server stop
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup --check /tmp/jfs 2>&1 | tee warmup.log
    check_warmup_log  50
    wait_disk_down 60
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup --check /tmp/jfs 2>&1 | tee warmup.log
    check_warmup_log 98
    check_cache_distribute 1024 /var/jfsCache2 /var/jfsCache3
    echo stop minio && docker stop minio
    compare_md5sum /tmp/test /tmp/jfs/test
    docker start minio && sleep 3
}

test_disk_failover_lru()
{
    prepare_test
    mount_jfsCache1
    rm -rf /var/log/juicefs.log
    rm -rf /var/jfsCache2 /var/jfsCache3
    ./juicefs format $META_URL myjfs --trash-days 0 --storage minio --bucket http://localhost:9000/test --access-key minioadmin --secret-key minioadmin
    JFS_MAX_DURATION_TO_DOWN=10s JFS_MAX_IO_DURATION=3s ./juicefs mount $META_URL /tmp/jfs -d \
        --cache-dir=/var/jfsCache1:/var/jfsCache2:/var/jfsCache3 --io-retries 1 --cache-eviction lru
    dd if=/dev/urandom of=/tmp/test bs=1M count=1024
    cp /tmp/test /tmp/jfs/test
    /etc/init.d/redis-server stop
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup --check /tmp/jfs 2>&1 | tee warmup.log
    check_warmup_log  50
    wait_disk_down 60
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup --check /tmp/jfs 2>&1 | tee warmup.log
    check_warmup_log 98
    check_cache_distribute 1024 /var/jfsCache2 /var/jfsCache3
    echo stop minio && docker stop minio
    compare_md5sum /tmp/test /tmp/jfs/test
    docker start minio && sleep 3
}

test_manual_delete_cache_data_lru()
{
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0 --storage minio --bucket http://localhost:9000/test --access-key minioadmin --secret-key minioadmin
    ./juicefs mount $META_URL /tmp/jfs -d --cache-eviction lru --cache-size 1G --cache-scan-interval -1

    dd if=/dev/urandom of=/tmp/test bs=1M count=256
    cp /tmp/test /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    check_warmup_log 95

    raw_dir=$(get_raw_dir)
    find "$raw_dir" -type f | head -n 200 | xargs rm -f
    sync
    echo 3 > /proc/sys/vm/drop_caches || true

    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    ratio=$(get_warmup_ratio)
    [[ "$ratio" -lt 90 ]] || (echo "after manually deleting cache data, warmup ratio($ratio) should be less than 90%" && exit 1)

    compare_md5sum /tmp/test /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    check_warmup_log 95
}

test_disk_failure_on_writeback()
{
    prepare_test
    mount_jfsCache1
    rm -rf /var/log/juicefs.log
    rm -rf /var/jfsCache2 /var/jfsCache3
    mkdir -p /var/jfsCache2 /var/jfsCache3
    ./juicefs format $META_URL myjfs --trash-days 0 --storage minio --bucket http://localhost:9000/test --access-key minioadmin --secret-key minioadmin
    JFS_MAX_DURATION_TO_DOWN=5s JFS_MAX_IO_DURATION=3s ./juicefs mount $META_URL /tmp/jfs -d \
        --cache-dir=/var/jfsCache? --io-retries 1 --writeback -v
    dd if=/dev/urandom of=/tmp/test bs=1M count=1024
    cp /tmp/test /tmp/jfs/test
    dd if=/dev/urandom of=/tmp/jfs/test2 bs=1M count=10
    /etc/init.d/redis-server stop
    ./juicefs warmup /tmp/jfs/test2 &
    sleep 15
    grep -q "state change from unstable to down" /var/log/juicefs.log && echo "disk should not down" && exit 1 || true
    /etc/init.d/redis-server start
    ./juicefs warmup /tmp/jfs/test
    ./juicefs warmup /tmp/jfs/test --check 2>&1 | tee warmup.log
    # TODO: the ratio should be 100%
    check_warmup_log 60
    check_cache_distribute 1024 /var/jfsCache1 /var/jfsCache2 /var/jfsCache3
    compare_md5sum /tmp/test /tmp/jfs/test
}

prepare_test()
{
    df -h /
    umount_jfs /tmp/jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs || true
    rm -rf /var/jfsCache/myjfs || true
    [[ ! -f /usr/local/bin/mc ]] && wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && chmod +x /usr/local/bin/mc
    mc alias set myminio http://localhost:9000 minioadmin minioadmin
    mc rm --force --recursive myminio/test || true
}

wait_warmup_finish(){
    path=$1
    expected_ratio=$2
    timeout=30
    for i in $(seq 1 $timeout); do
        ./juicefs warmup $path --check 2>&1 |tee warmup.log
        ratio=$(get_warmup_ratio)
        if [[ "$ratio" == "$expected_ratio" ]]; then
            echo "warmup finished after $i seconds, ratio is $ratio, expected ratio is $expected_ratio"
            break
        else
            echo "wait warmup finish $i"
            sleep 1
        fi
        if [[ $i -eq $timeout ]]; then
            echo "wait warmup finish timeout after $timeout seconds" && exit 1
        fi
    done
}

wait_stage_uploaded()
{
    echo "wait stage upload"
    for i in {1..30}; do
        stageBlocks=$(grep "juicefs_staging_blocks" /tmp/jfs/.stats | awk '{print $2}')
        if [[ "$stageBlocks" -eq 0 ]]; then
            echo "stageBlocks is now 0"
            break
        fi
        echo "wait stage upload $i" && sleep 1
    done
    if [[ "$stageBlocks" -ne 0 ]]; then
        echo "stage blocks have not uploaded: $stageBlocks" && exit 1
    fi
}

mount_jfsCache1(){
    capacity=$1
    [[ -z $capacity ]] && capacity=100G
    inodes=$2
    [[ -z $inodes ]] && inodes=10000000
    /etc/init.d/redis-server start
    timeout 30s bash -c 'until nc -zv localhost 6379; do sleep 1; done'
    umount -l /var/jfsCache1 || true
    rm -rf /var/jfsCache1
    redis-cli flushall
    rm -rf /var/jfs/test
    ./juicefs format "redis://localhost/1?read-timeout=3&write-timeout=1&max-retry-backoff=3" test --trash-days 0 --capacity $capacity --inodes $inodes
    ./juicefs mount redis://localhost/1 /var/jfsCache1 -d --log /tmp/juicefs.log
    # trap "echo umount /var/jfsCache1 && umount -l /var/jfsCache1" EXIT
}

get_cache_dir(){
    grep CacheDir /tmp/jfs/.config | awk -F'"' '{print $4}'
}

get_cache_eviction(){
    grep CacheEviction /tmp/jfs/.config | awk -F'"' '{print $4}'
}

get_raw_dir(){
    echo $(get_cache_dir)/raw/
}

get_rawstaging_dir(){
    echo $(get_cache_dir)/rawstaging/
}

check_evict_log(){
    ratio=$(get_warmup_ratio)
    if [[ "$ratio" -gt 0 ]]; then
        echo "cache ratio($ratio) should be 0 after evict"
        exit 1
    fi
}

check_warmup_log(){
    expected_ratio=$1
    ratio=$(get_warmup_ratio)
    if [[ "$ratio" -lt "$expected_ratio" ]]; then
        echo "cache ratio($ratio) should be more than expected_ratio($expected_ratio) after warmup"
        exit 1
    fi
}

get_cache_file_count(){
    sed -n 's/.* \([0-9]\+\) files.*/\1/p' warmup.log
}

get_cache_file_size(){
    sed -n 's/.* \([0-9]*\) MiB of.*/\1/p' warmup.log
}

get_warmup_ratio(){
    sed -n 's/.*(\([0-9]*\.[0-9]*%\)).*/\1/p' warmup.log | sed 's/%//' | awk '{print int($1)}'
}


check_cache_distribute() {
    max_total_size=$(echo "$1 * 1024" | bc | awk '{printf "%.0f", $1}')
    echo check_cache_distribute, max_total_size is $max_total_size
    shift
    total_weight=0
    declare -A weights
    declare -A sizes
    # Parse directory names and weights
    for arg in "$@"; do
        dir=$(echo "$arg" | awk -F: '{print $1}')
        weight=$(echo "$arg" | awk -F: '{print $2}')
        if [[ -z $weight ]]; then
            weight=1
        fi
        weights["$dir"]=$weight
        total_weight=$((total_weight + weight))
    done
    
    # Calculate total size and sizes of each directory
    for dir in "${!weights[@]}"; do
        echo dir is $dir
        du -sh "$dir" || true
        size=$(du -s "$dir" | awk '{print $1}')
        echo size is $size
        sizes["$dir"]=$size
    done
    
    # Check if total size exceeds max limit
    total_size=0
    for dir in "${!sizes[@]}"; do
        size=${sizes["$dir"]}
        total_size=$((total_size + size))
    done
    echo "total size is $total_size, max_total_size is $max_total_size"
    if [[ $total_size -gt $((max_total_size + max_total_size/10)) ]]; then
        echo "Total size of directories exceeds max limit"
        return 1
    fi
    
    # Check if each directory is evenly distributed based on its weight
    for dir in "${!sizes[@]}"; do
        size=${sizes["$dir"]}
        weight=${weights["$dir"]}
        avg_size=$((total_size * weight / total_weight))
        min_size=$((avg_size * 5 / 10))
        max_size=$((avg_size * 20 / 10))
        
        if [[ $size -lt $min_size || $size -gt $max_size ]]; then
            echo "$dir is not evenly distributed, size: $size, weight: $weight, ave_size: $avg_size, min_size: $min_size, max_size: $max_size"
            exit 1
        else
            echo "$dir is evenly distributed"
        fi
    done
}

wait_disk_down()
{
    timeout=$1
    for i in $(seq 1 $timeout); do
        if grep -q "state change from unstable to down" /var/log/juicefs.log; then
            echo "state changed from unstable to down after $i seconds"
            return
        else
            echo "\rWait for state change to down, $i"
            sleep 1
            count=$((count+1))
        fi
    done
    echo "Wait for state change to down timeout after $timeout seconds" && exit 1
}   

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/chaos/dynamic.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: dynamic-ce
  labels:
    juicefs-app-type: dynamic-ce
spec:
  replicas: 1
  selector:
    matchLabels:
      juicefs-app-type: dynamic-ce
  template:
    metadata:
      labels:
        juicefs-app-type: dynamic-ce
    spec:
      containers:
      - name: vdbench
        image: zwwhdlsdocker/vdbench:latest
        imagePullPolicy: IfNotPresent
        volumeMounts:
          - mountPath: /data
            name: data
          - mountPath: /vdbench/config
            name: vdbench-cfg
          - mountPath: /vdbench/output
            name: output
        command: ["sh", "-c", "./vdbench -f /vdbench/config/vdbench.vdb -v"]
      volumes:
      - name: data
        persistentVolumeClaim:
          claimName: dynamic-ce
      - name: output
        hostPath:
          path: /root/vdbench/output
      - name: vdbench-cfg
        configMap:
          name: dynamic-ce
          items:
          - key: "vdbench.vdb"
            path: "vdbench.vdb"
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: dynamic-ce
data:
  vdbench.vdb: |
    messagescan=no
    fsd=fsd1,anchor=/data,depth=1,width=1,files=20000,size=4k,openflags=o_direct
    fwd=fwd1,fsd=fsd1,operation=write,xfersize=4k,fileio=random,fileselect=random,threads=1
    rd=rd1,fwd=fwd1,fwdrate=max,format=yes,elapsed=60,interval=2


================================================
FILE: .github/scripts/chaos/juicefs-csi-driver.Dockerfile
================================================
FROM golang:1.20-buster as builder

ARG GOPROXY
# refs/remotes/pull/3056/merge
ARG GITHUB_REF
# 4ac69613b5919142d87f21a64ca744ae537192d6
ARG GITHUB_SHA
ARG JUICEFS_REPO_URL=https://github.com/juicedata/juicefs

WORKDIR /workspace
ENV GOPROXY=${GOPROXY:-https://proxy.golang.org}
ENV STATIC=1

RUN apt-get update && apt-get install -y musl-tools upx-ucl && \
    cd /workspace && git clone --depth=1 $JUICEFS_REPO_URL && \
    cd juicefs && git fetch --no-tags --prune origin +$GITHUB_SHA:$GITHUB_REF && \
    git checkout $GITHUB_REF && \
    make juicefs

FROM juicedata/juicefs-csi-driver:nightly

WORKDIR /app
COPY --from=builder /workspace/juicefs/juicefs /usr/local/bin/

RUN ls -l /usr/local/bin/juicefs

RUN /usr/local/bin/juicefs --version
RUN echo GITHUB_REF is $GITHUB_REF
RUN echo GITHUB_SHA is $GITHUB_SHA

# ENTRYPOINT ["/tini", "--", "/bin/juicefs-csi-driver"]


================================================
FILE: .github/scripts/chaos/juicefs.Dockerfile
================================================
FROM juicedata/mount:nightly
COPY ./juicefs /usr/local/bin/juicefs
# RUN apt-get update && apt-get install -y musl-tools upx-ucl && STATIC=1 make
# RUN cp -f juicefs /usr/local/bin/juicefs
RUN /usr/local/bin/juicefs version

================================================
FILE: .github/scripts/chaos/minio.yaml
================================================
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: minio-server
  namespace: kube-system
  labels:
    app: minio-server
spec:
  replicas: 1
  selector:
    matchLabels:
      app: minio-server
  serviceName: minio
  template:
    metadata:
      labels:
        app: minio-server
    spec:
      containers:
      - name: minio
        image: minio/minio
        resources:
          limits:
            memory: "500Mi"
            cpu: "500m"
          limits:
            memory: "100Mi"
            cpu: "100m"
        env:
        - name: MINIO_ROOT_USER
          value: minioadmin
        - name: MINIO_ROOT_PASSWORD
          value: minioadmin
        args:
        - server
        - /data
        volumeMounts:
        - mountPath: /data
          name: minio-data
        ports:
        - containerPort: 9000
          name: sever
      volumes:
      - name: minio-data
        hostPath:
          path: /data/minio-data
---
apiVersion: v1
kind: Service
metadata:
  name: minio
  namespace: kube-system
spec:
  type: NodePort
  selector:
    app: minio-server
  ports:
  - protocol: TCP
    port: 9000
    targetPort: 9000
    nodePort: 31275
    name: server

================================================
FILE: .github/scripts/chaos/pvc.yaml
================================================
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: dynamic-ce
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 5Pi
  storageClassName: dynamic-ce

================================================
FILE: .github/scripts/chaos/redis.yaml
================================================
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: redis-server
  namespace: kube-system
  labels:
    app: redis-server
spec:
  replicas: 1
  selector:
    matchLabels:
      app: redis-server
  serviceName: redis
  template:
    metadata:
      labels:
        app: redis-server
    spec:
      containers:
      - name: redis
        image: redis
        volumeMounts:
        - mountPath: /data
          name: redis-data
        resources:
          limits:
            memory: "500Mi"
            cpu: "500m"
          limits:
            memory: "100Mi"
            cpu: "100m"
        ports:
        - containerPort: 6379
      volumes:
      - name: redis-data
        hostPath:
          path: /data/redis
---
apiVersion: v1
kind: Service
metadata:
  name: redis
  namespace: kube-system
spec:
  type: NodePort
  selector:
    app: redis-server
  ports:
  - protocol: TCP
    port: 6379
    targetPort: 6379
    nodePort: 31274


================================================
FILE: .github/scripts/chaos/sc.yaml
================================================
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: dynamic-ce
parameters:
  csi.storage.k8s.io/node-publish-secret-name: dynamic-ce
  csi.storage.k8s.io/node-publish-secret-namespace: kube-system
  csi.storage.k8s.io/provisioner-secret-name: dynamic-ce
  csi.storage.k8s.io/provisioner-secret-namespace: kube-system
  juicefs/mount-cpu-limit: 5000m
  juicefs/mount-memory-limit: 1Gi
  juicefs/mount-cpu-request: 100m
  juicefs/mount-memory-request: 500Mi
  juicefs/mount-image: juicedata/mount:ci
#mountOptions:
#  - cache-dir=/var/foo:/var/foo1:/var/foo2
provisioner: csi.juicefs.com
reclaimPolicy: Delete
volumeBindingMode: Immediate
---
apiVersion: v1
stringData:
  access-key: minioadmin
  bucket: http://minio.kube-system:9000/minio/dynamic-ce
  name: dynamic-ce
  metaurl: redis://redis.kube-system:6379/0
  secret-key: minioadmin
  storage: minio
  format-options: trash-days=0,block-size=4096
kind: Secret
metadata:
  name: dynamic-ce
  namespace: kube-system
type: Opaque


================================================
FILE: .github/scripts/chaos/workflow.yaml
================================================
apiVersion: chaos-mesh.org/v1alpha1
kind: Workflow
metadata:
  name: juicefs-workflow
spec:
  entry: the-entry
  templates:
    - name: the-entry
      templateType: Parallel
      children:
        # - minio-delay
        # - minio-io
        # - minio-memory
        # - minio-cpu
        # - minio-bandwidth
        # - redis-bandwidth
        # - redis-io
        # - redis-delay
        # - redis-memory
        # - redis-cpu
        # - juicefs-bandwidth
        # - juicefs-memory
        # - juicefs-cpu
        # - juicefs-delay
    # minio 带宽
    - name: minio-bandwidth
      templateType: NetworkChaos
      deadline: 20s
      networkChaos:
        action: bandwidth
        mode: all
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: minio-server
        bandwidth:
          rate: '500bps'
          limit: 100
          buffer: 10000
    # minio 网络延迟
    - name: minio-delay
      templateType: NetworkChaos
      networkChaos:
        action: delay
        mode: all
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: minio-server
        delay:
          latency: '500ms'
          correlation: '50'
          jitter: '500ms'
    # minio 磁盘读写延迟
    - name: minio-io
      templateType: IOChaos
      ioChaos:
        action: latency
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: minio-server
        volumePath: /data
        delay: '50ms'
    # minio 内存压力
    - name: minio-memory
      templateType: StressChaos
      stressChaos:
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: minio-server
        stressors:
          memory:
            workers: 4
            size: '128MB'
    # minio cpu 压力
    - name: minio-cpu
      templateType: StressChaos
      stressChaos:
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: minio-server
        stressors:
          cpu:
            workers: 4
            load: 100
    # redis 带宽
    - name: redis-bandwidth
      templateType: NetworkChaos
      networkChaos:
        action: bandwidth
        mode: all
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: redis-server
        bandwidth:
          rate: '200mbps'
          limit: 100
          buffer: 10000
    - name: redis-delay
      templateType: NetworkChaos
      networkChaos:
        action: delay
        mode: all
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: redis-server
        delay:
          latency: '100ms'
          correlation: '50'
          jitter: '500ms'
    # redis 磁盘读写延迟
    - name: redis-io
      templateType: IOChaos
      ioChaos:
        action: latency
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: redis-server
        volumePath: /redis
        delay: '1s'
    # redis 内存压力
    - name: redis-memory
      templateType: StressChaos
      stressChaos:
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: redis-server
        stressors:
          memory:
            workers: 4
            size: '2GB'
    # redis cpu 压力
    - name: redis-cpu
      templateType: StressChaos
      stressChaos:
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app: redis-server
        stressors:
          cpu:
            workers: 4
            load: 100
    # 客户端带宽
    - name: juicefs-bandwidth
      templateType: NetworkChaos
      deadline: 20s
      networkChaos:
        action: bandwidth
        mode: all
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app.kubernetes.io/name: juicefs-mount
        bandwidth:
          rate: '100bps'
          limit: 100
          buffer: 10000
    - name: juicefs-delay
      templateType: NetworkChaos
      networkChaos:
        action: delay
        mode: all
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app.kubernetes.io/name: juicefs-mount
        delay:
          latency: '100ms'
          correlation: '50'
          jitter: '500ms'
    # 客户端内存压力
    - name: juicefs-memory
      templateType: StressChaos
      stressChaos:
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app.kubernetes.io/name: juicefs-mount
        stressors:
          memory:
            workers: 4
            size: '1GB'
    # 客户端cpu压力
    - name: juicefs-cpu
      templateType: StressChaos
      stressChaos:
        mode: one
        selector:
          namespaces:
            - kube-system
          labelSelectors:
            app.kubernetes.io/name: juicefs-mount
        stressors:
          cpu:
            workers: 4
            load: 100


================================================
FILE: .github/scripts/check_juicefs_log.sh
================================================
#!/bin/bash -e
for log_file in /var/log/juicefs.log $HOME/.juicefs/juicefs.log; do
    if [ -f $log_file ]; then
        break
    fi
done
echo "tail -1000 $log_file"
tail -1000 $log_file
grep -i "<FATAL>\|panic" $log_file && exit 1 || true

================================================
FILE: .github/scripts/cmptree.py
================================================
#!/usr/bin/env python

# Copyright (c) 2015, Bill Zissimopoulos. All rights reserved.
#
# Redistribution  and use  in source  and  binary forms,  with or  without
# modification, are  permitted provided that the  following conditions are
# met:
#
# 1.  Redistributions  of source  code  must  retain the  above  copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions  in binary  form must  reproduce the  above copyright
# notice,  this list  of conditions  and the  following disclaimer  in the
# documentation and/or other materials provided with the distribution.
#
# 3.  Neither the  name  of the  copyright  holder nor  the  names of  its
# contributors may  be used  to endorse or  promote products  derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY  THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND  ANY EXPRESS OR  IMPLIED WARRANTIES, INCLUDING, BUT  NOT LIMITED
# TO,  THE  IMPLIED  WARRANTIES  OF  MERCHANTABILITY  AND  FITNESS  FOR  A
# PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT  SHALL THE  COPYRIGHT
# HOLDER OR CONTRIBUTORS  BE LIABLE FOR ANY  DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL   DAMAGES  (INCLUDING,  BUT  NOT
# LIMITED TO,  PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS  OF USE,
# DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION) HOWEVER CAUSED AND  ON ANY
# THEORY  OF LIABILITY,  WHETHER IN  CONTRACT, STRICT  LIABILITY, OR  TORT
# (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT OF  THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import subprocess
try:
    __import__("xattr")
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
import filecmp, os
import xattr

class TreeComparator(object):
    def __init__(self, dir1, dir2):
        self.dir1 = dir1
        self.dir2 = dir2
        self.left_only = []
        self.right_only = []
        self.common_funny = []
        self.funny_files = []
        self.diff_files = []
    def compare(self, p=""):
        d1 = os.path.join(self.dir1, p)
        d2 = os.path.join(self.dir2, p)
        print(f'compare {d1} with {d2}')
        dcmp = filecmp.dircmp(d1, d2, ignore=[])
        self.left_only.extend(os.path.join(p, n) for n in dcmp.left_only)
        self.right_only.extend(os.path.join(p, n) for n in dcmp.right_only)
        self.common_funny.extend(os.path.join(p, n) for n in dcmp.common_funny)
        self.funny_files.extend(os.path.join(p, n) for n in dcmp.funny_files)
        #(match, mismatch, errors) = filecmp.cmpfiles(d1, d2, dcmp.common_files, shallow=False)
        #self.diff_files.extend(os.path.join(p, n) for n in mismatch)
        #self.funny_files.extend(os.path.join(p, n) for n in errors)
        (match, mismatch, errors) = self.compare_files(d1, d2, dcmp.common_files)
        self.diff_files.extend(os.path.join(p, n) for n in mismatch)
        self.funny_files.extend(os.path.join(p, n) for n in errors)
        for d in dcmp.common_dirs:
            self.compare(os.path.join(p, d))

    def compare_files(self, d1, d2, files):
        match = []
        mismatch = []
        errors = []
        for f in files:
            f1 = os.path.join(d1, f)
            f2 = os.path.join(d2, f)
            try:
                s1 = os.stat(f1)
                s2 = os.stat(f2)                    
                for attr in ['st_mode', 'st_nlink', 'st_uid', 'st_gid', 'st_size']:
                    if getattr(s1, attr) != getattr(s2, attr):
                        print(f'{attr} mismatch with {f1}:{getattr(s1, attr)} and {f2}:{getattr(s2, attr)}')
                        mismatch.append(f)
                        continue
                if not filecmp.cmp(f1, f2):
                    print(f'content mismatch with {f1} and {f2}')
                    mismatch.append(f)
                    continue
                if not self.compare_xattr(f1, f2):
                    print(f'xattr mismatch with {f1} and {f2}')
                    mismatch.append(f)
                    continue
                match.append(f)
            except:
                print(f'error: {f}')
                errors.append(f)
        return match, mismatch, errors

    def compare_xattr(self, f1, f2):
        for attr in xattr.listxattr(f1):
            a1 = xattr.getxattr(f1, attr)
            a2 = xattr.getxattr(f2, attr)
            if a1 != a2:
                return False
        return True

if "__main__" == __name__:
    import argparse, sys
    def info(s):
        print ("%s: %s" % (os.path.basename(sys.argv[0]), s))
    def warn(s):
        print ("%s: %s" % (os.path.basename(sys.argv[0]), s))
    def fail(s, exitcode = 1):
        warn(s)
        sys.exit(exitcode)
    def main():
        p = argparse.ArgumentParser()
        p.add_argument("-q", "--quiet", action="store_true")
        p.add_argument("dir1")
        p.add_argument("dir2")
        args = p.parse_args(sys.argv[1:])
        print('start compare tree')
        tcmp = TreeComparator(args.dir1, args.dir2)
        tcmp.compare()
        res = len(tcmp.left_only) + len(tcmp.right_only) + \
             len(tcmp.funny_files) + len(tcmp.diff_files)
        # res = len(tcmp.left_only) + len(tcmp.right_only) + \
        #     len(tcmp.common_funny) + len(tcmp.funny_files) + len(tcmp.diff_files)
        if not args.quiet:
            if tcmp.left_only:
                print ("Left only:")
                for n in tcmp.left_only:
                    print( "    %s" % n)
            if tcmp.right_only:
                print ("Right only:")
                for n in tcmp.right_only:
                    print( "    %s" % n)
            if tcmp.funny_files:
                print ("Funny files:")
                for n in tcmp.funny_files:
                    print( "    %s" % n)
            # if tcmp.common_funny:
            #     print ("Differing stats:")
            #     for n in tcmp.common_funny:
            #         print ("    %s" % n)
            if tcmp.diff_files:
                print ("Differing files:")
                for n in tcmp.diff_files:
                    print ("    %s" % n)
        sys.exit(int(0 < res))
    def __entry():
        try:
            main()
        except EnvironmentError as ex:
            fail(ex)
        except KeyboardInterrupt:
            fail("interrupted", 130)
    __entry()

================================================
FILE: .github/scripts/command/acl.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

prepare_test()
{
    umount_jfs /tmp/jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs || true
    rm -rf /var/jfsCache/myjfs || true
}

test_acl_with_kernel_check()
{
    prepare_test
    ./juicefs format $META_URL myjfs --enable-acl --trash-days 0
    ./juicefs mount -d $META_URL /tmp/jfs
    python3 .github/scripts/hypo/fs_acl_test.py 
}

test_acl_with_user_space_check()
{
    prepare_test
    ./juicefs format $META_URL myjfs --enable-acl --trash-days 0
    ./juicefs mount -d $META_URL /tmp/jfs --non-default-permission
    python3 .github/scripts/hypo/fs_acl_test.py 
}

test_modify_acl_config()
{
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount -d $META_URL /tmp/jfs
    touch /tmp/jfs/test
    setfacl -m u:root:rw /tmp/jfs/test && echo "setfacl should failed" && exit 1
    ./juicefs config $META_URL --enable-acl=true
    ./juicefs mount -d $META_URL /tmp/jfs
    setfacl -m u:root:rw /tmp/jfs/test
    ./juicefs config $META_URL --enable-acl
    umount_jfs /tmp/jfs $META_URL
    ./juicefs mount -d $META_URL /tmp/jfs
    setfacl -m u:root:rw /tmp/jfs/test
    ./juicefs config $META_URL --enable-acl=false && echo "should not disable acl" && exit 1 || true 
    ./juicefs config $META_URL | grep EnableACL | grep "true" || (echo "EnableACL should be true" && exit 1) 
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/clone.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

test_clone_preserve_with_file()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    id -u juicefs  && sudo userdel juicefs
    sudo useradd -u 1101 juicefs
    sudo -u juicefs touch /jfs/test
    for mode in 777 755 644; do
        sudo -u juicefs chmod $mode /jfs/test
        check_guid_after_clone true
        check_guid_after_clone false
    done
}

test_clone_preserve_with_dir()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    id -u juicefs  && sudo userdel juicefs
    sudo useradd -u 1101 juicefs
    sudo -u juicefs mkdir /jfs/test
    for mode in 777 755 644; do
        sudo -u juicefs chmod $mode /jfs/test
        check_guid_after_clone true
        check_guid_after_clone false
    done
}

test_clone_with_jfs_source()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    [[ ! -d /jfs/juicefs ]] && git clone https://github.com/juicedata/juicefs.git /jfs/juicefs --depth 1
    do_clone true
    do_clone false
}

skip_test_clone_with_fsrand()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    seed=$(date +%s)
    python3 .github/scripts/fsrand.py -a -c 2000 -s $seed  /jfs/juicefs
    do_clone true
    do_clone false 
}

do_clone()
{
    is_preserve=$1
    rm -rf /jfs/juicefs1
    rm -rf /jfs/juicefs2
    [[ "$is_preserve" == "true" ]] && preserve="--preserve" || preserve=""
    cp -r /jfs/juicefs /jfs/juicefs1 $preserve
    ./juicefs clone /jfs/juicefs /jfs/juicefs2 $preserve
    diff -ur /jfs/juicefs1 /jfs/juicefs2 --no-dereference
    cd /jfs/juicefs1/ && find . -printf "%m\t%u\t%g\t%p\n"  | sort -k4 >/tmp/log1 && cd -
    cd /jfs/juicefs2/ && find . -printf "%m\t%u\t%g\t%p\n"  | sort -k4 >/tmp/log2 && cd -
    diff -u /tmp/log1 /tmp/log2
}

test_clone_with_big_file()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    dd if=/dev/urandom of=/tmp/test bs=1M count=1000
    cp /tmp/test /jfs/test
    ./juicefs clone /jfs/test /jfs/test1
    rm /jfs/test -rf
    diff /tmp/test /jfs/test1
}
test_clone_with_big_file2()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    dd if=/dev/urandom of=/tmp/test bs=1M count=1000
    echo "a" | tee -a /tmp/test
    cp /tmp/test /jfs/test
    ./juicefs clone /jfs/test /jfs/test1
    rm /jfs/test -rf
    diff /tmp/test /jfs/test1
}

test_clone_with_random_write(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    PATH1=/tmp/test PATH2=/jfs/test python3 .github/scripts/random_read_write.py 
    ./juicefs clone /jfs/test /jfs/test1
    rm /jfs/test -rf
    diff /tmp/test /jfs/test1
}

test_clone_with_sparse_file()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    fallocate -l 1.0001g /jfs/test
    ./juicefs clone /jfs/test /jfs/test1
    diff /jfs/test /jfs/test1
}

test_clone_with_sparse_file2()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    fallocate -l 1.1T /jfs/test
    ./juicefs clone /jfs/test /jfs/test1
}

test_clone_with_small_files(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs/test
    for i in $(seq 1 2000); do
        echo $i > /jfs/test/$i
    done
    ./juicefs clone /jfs/test /jfs/test1
    diff -ur /jfs/test1 /jfs/test1
}

skip_test_clone_with_mdtest1()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    ./juicefs mdtest $META_URL /test --depth 2 --dirs 10 --files 10 --threads 100 --write 8192
    ./juicefs clone /jfs/test /jfs/test1
    ./juicefs rmr /jfs/test
    ./juicefs rmr /jfs/test1
}

skip_test_clone_with_mdtest2()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    ./juicefs mdtest $META_URL /test --depth 1 --dirs 1 --files 1000 --threads 100 --write 8192
    ./juicefs clone /jfs/test /jfs/test1
    ./juicefs rmr /jfs/test
    ./juicefs rmr /jfs/test1
}

check_guid_after_clone(){
    is_preserve=$1
    echo "check_guid_after_clone, is_preserve: $is_preserve"
    [[ "$is_preserve" == "true" ]] && preserve="--preserve" || preserve=""
    rm /jfs/test1 -rf
    sleep 3
    ls /jfs/test1 && echo "test1 should not exist" && exit 1 || echo "/jfs/test1 not exist" 
    rm /jfs/test2 -rf
    ./juicefs clone /jfs/test /jfs/test1 $preserve
    cp /jfs/test /jfs/test2 -rf $preserve
    uid1=$(stat -c %u /jfs/test1)
    gid1=$(stat -c %g /jfs/test1)
    mode1=$(stat -c %a /jfs/test1)
    uid2=$(stat -c %u /jfs/test2)
    gid2=$(stat -c %g /jfs/test2)
    mode2=$(stat -c %a /jfs/test2)

    if [[ "$uid1" != "$uid2" ]] || [[ "$gid1" != "$gid2" ]] || [[ "$mode1" != "$mode2" ]]; then
        echo >&2 "<FATAL>: clone does not same as cp: uid1: $uid1, uid2: $uid2, gid1: $gid1, gid2: $gid2, mode1: $mode1, mode2: $mode2"
        exit 1
    fi
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/config.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META minio
META_URL=$(get_meta_url $META)
# version lower than 1.3.0 does not support parameter max_open_conns
if [[ $META_URL == *"?max_open_conns="* ]]; then
    META_URL=${META_URL%%\?*}
fi
LEGACY_META_URL=$META_URL
if [[ "$META" == "redis" ]]; then
    LEGACY_META_URL=${META_URL%%\?*}
fi
[ ! -x mc ] && wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc && chmod +x mc

download_juicefs_client(){
    version=$1
    wget -q https://github.com/juicedata/juicefs/releases/download/v$version/juicefs-$version-linux-amd64.tar.gz
    tar -xzf juicefs-$version-linux-amd64.tar.gz -C /tmp/
    sudo cp /tmp/juicefs juicefs-$version
    ./juicefs-$version version
}

test_config_min_client_version()
{
    prepare_test
    download_juicefs_client 1.0.0
    ./juicefs format $META_URL myjfs
    ./juicefs-1.0.0 mount $LEGACY_META_URL /jfs -d && exit 1 || true
    ./juicefs config $META_URL --min-client-version 1.0.1
    ./juicefs-1.0.0 mount $LEGACY_META_URL /jfs -d && exit 1 || true
    ./juicefs config $META_URL --min-client-version 1.0.0
    ./juicefs-1.0.0 mount $LEGACY_META_URL /jfs -d
}

test_config_max_client_version()
{
    prepare_test
    current_version=$(./juicefs version | awk '{print $3}')
    download_juicefs_client 1.0.0
    ./juicefs-1.0.0 format $LEGACY_META_URL myjfs
    ./juicefs-1.0.0 config $LEGACY_META_URL --max-client-version 1.0.1
    ./juicefs mount $META_URL /jfs -d && exit 1 || true
    ./juicefs config $META_URL --max-client-version $current_version
    ./juicefs mount $META_URL /jfs -d
}

test_config_secret_key(){
    # # Consider command as failed when any component of the pipe fails:
    # https://stackoverflow.com/questions/1221833/pipe-output-and-capture-exit-status-in-bash
    prepare_test
    set -o pipefail
    ./mc alias set minio http://127.0.0.1:9000 minioadmin minioadmin
    ./mc admin user add minio juicedata juicedata
    ./mc admin policy attach minio consoleAdmin --user juicedata
    ./juicefs format --storage minio --bucket http://localhost:9000/jfs-test --access-key juicedata --secret-key juicedata $META_URL myjfs
    ./juicefs mount $META_URL /jfs -d --io-retries 1 --no-usage-report --heartbeat 3

    ./mc admin user remove minio juicedata
    ./mc admin user add minio juicedata1 juicedata1
    ./mc admin policy attach minio consoleAdmin --user juicedata1
    ./juicefs config $META_URL --access-key juicedata1 --secret-key juicedata1
    sleep 6
    echo abc | tee /jfs/abc.txt && echo "write success"
    cat /jfs/abc.txt | grep abc && echo "read success"
}
          

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/debug.sh
================================================
#!/bin/bash -e

source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

check_debug_file(){
   files=("system-info.log" "juicefs.log" "config.txt" "stats.txt" "stats.5s.txt" "pprof")
   debug_dir="debug"
   if [ ! -d "$debug_dir" ]; then
    echo "error:no debug dir"
    exit 1
   fi
   all_files_exist=true
   for file in "${files[@]}"; do
     exist=`find "$debug_dir" -name $file | wc -l`
     if [ "$exist" == 0 ]; then
        echo "no $file"
        all_files_exist=false
     fi
   done
   if [ "$all_files_exist" = true ]; then
    echo "pass"
   else
    exit 1
   fi
}

test_debug_juicefs(){
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs
    dd if=/dev/urandom of=/jfs/bigfile bs=1M count=128
    ./juicefs debug /jfs/
    check_debug_file
    ./juicefs rmr /jfs/bigfile
}

test_debug_abnormal_juicefs(){
    rm -rf debug | true
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs
    dd if=/dev/urandom of=/jfs/bigfile bs=1M count=128
    killall -9 redis-server | true
    ./juicefs debug /jfs/
#    check_debug_file
    ./juicefs rmr /jfs/bigfile
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/dump_load.sh
================================================
#!/bin/bash -ex
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)
META_URL2=$(get_meta_url2 $META)
[[ -z "$SEED" ]] && SEED=$(date +%s)
HEARTBEAT_INTERVAL=2
DIR_QUOTA_FLUSH_INTERVAL=4
# [[ -z "$SEED" ]] && SEED=1711594639
[[ -z "$BINARY" ]] && BINARY=false
[[ -z "$FAST" ]] && FAST=false

trap "echo random seed is $SEED" EXIT

if ! docker ps | grep -q minio; then
    docker run -d -p 9000:9000 --name minio \
            -e "MINIO_ACCESS_KEY=minioadmin" \
            -e "MINIO_SECRET_KEY=minioadmin" \
            -v /tmp/data:/data \
            -v /tmp/config:/root/.minio \
            minio/minio server /data
fi
[[ ! -f /usr/local/bin/mc ]] && wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && chmod +x /usr/local/bin/mc
sleep 3s
mc alias set myminio http://localhost:9000 minioadmin minioadmin
python3 -c "import xattr" || sudo pip install xattr

test_dump_load_sustained_file(){
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount -d $META_URL /jfs
    file_count=100
    for i in $(seq 1 $file_count); do
        touch /jfs/file$i
        exec {fd}<>/jfs/file$i
        echo fd is $fd
        fds[$i]=$fd
        rm /jfs/file$i
    done
    ./juicefs dump $META_URL dump.json $(get_dump_option)
    for i in $(seq 1 $file_count); do
        fd=${fds[$i]}
        exec {fd}>&-
    done
    if [[ "$BINARY" == "true" ]]; then
        sustained=$(./juicefs load dump.json --binary --stat | grep sustained | awk -F"|" '{print $2}')
    else
        sustained=$(jq '.Sustained[].inodes | length' dump.json)
    fi
    echo "sustained file count: $sustained"
    # TODO： uncomment this line 
    # [[ "$sustained" -eq "$file_count" ]] || (echo "sustained file count($sustained) should be $file_count" && exit 1)
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json $(get_load_option)
    ./juicefs mount -d $META_URL /jfs 
}

test_dump_load_with_copy_file_range(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    rm -rf /tmp/test
    dd if=/dev/zero of=/tmp/test bs=1M count=1024
    cp /tmp/test /jfs/test
    node .github/scripts/copyFile.js /jfs/test /jfs/test1
    ./juicefs dump $META_URL dump.json $(get_dump_option)
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json $(get_load_option)
    ./juicefs mount -d $META_URL /jfs
    compare_md5sum /tmp/test /jfs/test1
}

test_dump_load_with_quota(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --inodes 1000 --capacity 1
    ./juicefs dump --log-level error $META_URL $(get_dump_option) > dump.json
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json $(get_load_option)
    ./juicefs mount $META_URL /jfs -d --heartbeat $HEARTBEAT_INTERVAL
    ./juicefs quota get $META_URL --path /d
    dd if=/dev/zero of=/jfs/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/d/test1 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
}

test_dump_load_with_iflag(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --enable-ioctl
    echo "hello" > /jfs/hello.txt
    chattr +i /jfs/hello.txt
    ./juicefs dump $META_URL dump.json $(get_dump_option)
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json $(get_load_option)
    ./juicefs mount -d $META_URL /jfs --enable-ioctl
    echo "hello" > /jfs/hello.txt && echo "write should fail" && exit 1 || true
    chattr -i /jfs/hello.txt
    echo "world" > /jfs/hello.txt
    cat /jfs/hello.txt | grep world
}

test_dump_load_with_keep_secret_key()
{
    option=$@
    prepare_test
    ./juicefs format $META_URL myjfs --storage minio --bucket http://localhost:9000/test --access-key minioadmin --secret-key minioadmin
    ./juicefs dump --keep-secret-key $META_URL dump.json $(get_dump_option)
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json $(get_load_option)
    ./juicefs mount -d $META_URL /jfs
    echo "hello" > /jfs/hello.txt
    cat /jfs/hello.txt | grep hello

    umount_jfs /jfs $META_URL
    ./juicefs dump $META_URL dump.json $(get_dump_option)
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json $(get_load_option)
    ./juicefs mount -d $META_URL /jfs && echo "mount should fail" && exit 1 || true
    ./juicefs config --secret-key minioadmin $META_URL
    ./juicefs mount -d $META_URL /jfs
    echo "hello" > /jfs/hello.txt
    cat /jfs/hello.txt | grep hello
}

test_load_encrypted_meta_backup()
{
    prepare_test
    [[ ! -f my-priv-key.pem ]] && openssl genrsa -out my-priv-key.pem -aes256 -passout pass:12345678 2048
    export JFS_RSA_PASSPHRASE=12345678
    ./juicefs format $META_URL myjfs --encrypt-rsa-key my-priv-key.pem
    ./juicefs mount -d $META_URL /jfs
    SEED=$SEED LOG_LEVEL=WARNING MAX_EXAMPLE=50 STEP_COUNT=50 PROFILE=generate ROOT_DIR1=/jfs/test ROOT_DIR2=/tmp/test python3 .github/scripts/hypo/fs.py || true
    umount /jfs
    SKIP_BACKUP_META_CHECK=true ./juicefs mount -d --backup-meta 10s $META_URL /jfs
    sleep 10s
    backup_file=$(ls -l /var/jfs/myjfs/meta/ |tail -1 | awk '{print $NF}')
    backup_path=/var/jfs/myjfs/meta/$backup_file
    ls -l $backup_path

    ./juicefs load sqlite3://test2.db $backup_path --encrypt-rsa-key my-priv-key.pem --encrypt-algo aes256gcm-rsa
    ./juicefs mount -d sqlite3://test2.db /jfs2
    diff -ur /jfs/test /jfs2/test --no-dereference
    umount_jfs /jfs2 sqlite3://test2.db
    rm test2.db -rf
}

test_dump_load_with_random_test()
{
    prepare_test
    ./juicefs format $META_URL myjfs --enable-acl
    ./juicefs mount -d $META_URL /jfs 
    ./random-test runOp -baseDir /jfs/test -files 500000 -ops 5000000 -threads 50 -dirSize 100 -duration 30s -createOp 30,uniform -deleteOp 5,end --linkOp 10,uniform --symlinkOp 20,uniform --setXattrOp 10,uniform --truncateOp 10,uniform    
    ./juicefs dump $META_URL dump.json $(get_dump_option)
    create_database $META_URL2
    ./juicefs load $META_URL2 dump.json $(get_load_option)
    ./juicefs dump $META_URL2 dump2.json $(get_dump_option)
    ./juicefs mount -d $META_URL2 /jfs2
    diff -ur /jfs/test /jfs2/test --no-dereference
    diff -ur /jfs/.trash /jfs2/.trash --no-dereference
    # compare_stat_acl_xattr /jfs/test /jfs2/test
    umount_jfs /jfs2 $META_URL2
    ./juicefs status $META_URL2 && UUID=$(./juicefs status $META_URL2 | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --yes $META_URL2 $UUID
}

test_dump_load_with_fsrand()
{
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0 --enable-acl
    ./juicefs mount -d $META_URL /jfs --enable-xattr
    rm -rf /tmp/test
    SEED=$SEED LOG_LEVEL=WARNING MAX_EXAMPLE=30 STEP_COUNT=20 PROFILE=generate ROOT_DIR1=/jfs/test ROOT_DIR2=/tmp/test python3 .github/scripts/hypo/fs.py || true    
    ./juicefs dump $META_URL dump.json $(get_dump_option)
    create_database $META_URL2
    ./juicefs load $META_URL2 dump.json $(get_load_option)
    ./juicefs dump $META_URL2 dump2.json $(get_dump_option)
    # if [[ "$BINARY" == "false" ]]; then
    #     compare_dump_json
    # fi
    ./juicefs mount -d $META_URL2 /jfs2
    diff -ur /jfs/test /jfs2/test --no-dereference
    compare_stat_acl_xattr /jfs/test /jfs2/test
    umount_jfs /jfs2 $META_URL2
    ./juicefs status $META_URL2 && UUID=$(./juicefs status $META_URL2 | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --yes $META_URL2 $UUID
}

compare_dump_json(){
    cp dump.json dump.json.bak
    cp dump2.json dump2.json.bak
    sed -i '/usedSpace/d' dump*.json.bak
    sed -i '/usedInodes/d' dump*.json.bak
    sed -i '/nextInodes/d' dump*.json.bak
    sed -i '/nextChunk/d' dump*.json.bak
    sed -i '/nextTrash/d' dump*.json.bak
    sed -i '/nextSession/d' dump*.json.bak
    sed -i 's/"inode":[0-9]\+/"inode":0/g' dump*.json.bak
    diff -ur dump.json.bak dump2.json.bak
}

compare_stat_acl_xattr(){
    dir1=$1
    dir2=$2
    files1=($(find "$dir1" -type f -o -type d -exec stat -c "%n" {} + | sort))
    files2=($(find "$dir2" -type f -o -type d -exec stat -c "%n" {} + | sort))
    [[ ${#files1[@]} -ne ${#files2[@]} ]] && echo "compare_stat_acl: number of files differs" && exit 1
    for i in "${!files1[@]}"; do
        stat1=$(stat -c "%F %a %s %h %U %G" "${files1[$i]}")
        stat2=$(stat -c "%F %a %s %h %U %G" "${files2[$i]}")
        acl1=$(getfacl -p "${files1[$i]}" | tail -n +2)
        acl2=$(getfacl -p "${files2[$i]}" | tail -n +2)
        xattr1=$(getfattr -d -m . -e hex "${files1[$i]}" 2>/dev/null | tail -n +2 | sort)
        xattr2=$(getfattr -d -m . -e hex "${files2[$i]}" 2>/dev/null | tail -n +2 | sort)
        [[ "$stat1" != "$stat2" ]] && echo "compare_stat_acl: stat for ${files1[$i]} and ${files2[$i]} differs" && echo $stat1 && echo $stat2 && exit 1
        [[ "$acl1" != "$acl2" ]] && echo "compare_stat_acl: ACLs for ${files1[$i]} and ${files2[$i]} differs" && echo $acl1 && echo $acl2 && exit 1
        [[ "$xattr1" != "$xattr2" ]] && echo "compare_stat_acl: xattrs for ${files1[$i]} and ${files2[$i]} differs" && echo $xattr1 && echo $xattr2 && exit 1

    done
    echo "compare_stat_acl: ACLs and stats are the same"
}

get_dump_option(){
    if [[ "$BINARY" == "true" ]]; then 
        option="--binary"
    elif [[ "$FAST" == "true" ]]; then
        option="--fast"
    else
        option=""
    fi
    echo $option
}

get_load_option(){
    if [[ "$BINARY" == "true" ]]; then 
        option="--binary"
    else
        option=""
    fi
    echo $option
}

prepare_test(){
    umount_jfs /jfs $META_URL
    umount_jfs /jfs2 sqlite3://test2.db
    python3 .github/scripts/flush_meta.py $META_URL
    rm test2.db -rf 
    rm -rf /var/jfs/myjfs || true
    mc rm --force --recursive myminio/test || true
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/dump_load_bench.sh
================================================
#!/bin/bash -ex

source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
[[ -z "$START_META" ]] && START_META=true
source .github/scripts/start_meta_engine.sh
META_URL=$(get_meta_url $META)
META_URL2=$(get_meta_url2 $META)
FILE_COUNT_IN_BIGDIR=100000

prepare_test_data(){
  umount_jfs /tmp/jfs $META_URL
  python3 .github/scripts/flush_meta.py $META_URL
  rm -rf /var/jfs/myjfs || true
  create_database $META_URL
  ./juicefs format $META_URL myjfs
  ./juicefs mount -d $META_URL /tmp/jfs
  threads=10
  ./juicefs mdtest $META_URL /bigdir --depth=1 --dirs=0 --files=$((FILE_COUNT_IN_BIGDIR/threads)) --threads=$threads --write=8192
  ./juicefs mdtest $META_URL /smalldir --depth=3 --dirs=10 --files=10 --threads=10 --write=8192
}

if [[ "$START_META" == "true" ]]; then  
  start_meta_engine $META
  prepare_test_data
fi

test_dump_load(){
  do_dump_load dump.json
}

test_dump_load_fast(){
  do_dump_load dump.json.gz --fast
}

test_dump_load_in_binary(){
  do_dump_load dump.bin --binary
}

do_dump_load(){
  dump_file=$1
  shift
  options=$@
  ./juicefs dump $META_URL $dump_file $options --threads=50
  # python3 .github/scripts/flush_meta.py $META_URL2
  create_database $META_URL2
  if [[ "$options" == *"--binary"* ]]; then
    ./juicefs load $META_URL2 $dump_file $options
  else
    ./juicefs load $META_URL2 $dump_file
  fi
  
  ./juicefs mount $META_URL2 /tmp/jfs2 -d
  df -i /tmp/jfs /tmp/jfs2
  iused1=$(df -i /tmp/jfs | tail -1 | awk  '{print $3}')
  iused2=$(df -i /tmp/jfs2 | tail -1 | awk  '{print $3}')
  [[ "$iused1" == "$iused2" ]] || (echo "<FATAL>: iused error: $iused1 $iused2" && exit 1)
  ./juicefs summary /tmp/jfs/ --csv
  ./juicefs summary /tmp/jfs2/ --csv
  summary1=$(./juicefs summary /tmp/jfs/ --csv | head -n +2 | tail -n 1)
  summary2=$(./juicefs summary /tmp/jfs2/ --csv | head -n +2 | tail -n 1)
  [[ "$summary1" == "$summary2" ]] || (echo "<FATAL>: summary error: $summary1 $summary2" && exit 1)
  
  file_count=$(ls -l /tmp/jfs2/bigdir/test-dir.0-0/mdtest_tree.0/ | wc -l)
  file_count=$((file_count-1))
  if [[ "$file_count" -ne "$FILE_COUNT_IN_BIGDIR" ]]; then 
    echo "<FATAL>: file_count error: $file_count"
    exit 1
  fi

  ./juicefs rmr /tmp/jfs2/smalldir
  ls /tmp/jfs2/smalldir && echo "<FATAL>: ls should fail" && exit 1 || true
  umount_jfs /tmp/jfs2 $META_URL2
  ./juicefs status $META_URL2 && UUID=$(./juicefs status $META_URL2 | grep UUID | cut -d '"' -f 4)
  ./juicefs destroy --yes $META_URL2 $UUID
}


source .github/scripts/common/run_test.sh && run_test $@

          
================================================
FILE: .github/scripts/command/dump_load_cross_meta.sh
================================================
#!/bin/bash -ex
source .github/scripts/common/common.sh

[[ -z "$META1" ]] && META1=sqlite3
[[ -z "$META2" ]] && META2=redis
source .github/scripts/start_meta_engine.sh
start_meta_engine $META1
start_meta_engine $META2
META_URL1=$(get_meta_url $META1)
META_URL2=$(get_meta_url $META2)
[[ -z "$SEED" ]] && SEED=$(date +%s)

# [[ -z "$SEED" ]] && SEED=1711594639
[[ -z "$BINARY" ]] && BINARY=false
[[ -z "$FAST" ]] && FAST=false

trap "echo random seed is $SEED" EXIT

if ! docker ps | grep -q minio; then
    docker run -d -p 9000:9000 --name minio \
            -e "MINIO_ACCESS_KEY=minioadmin" \
            -e "MINIO_SECRET_KEY=minioadmin" \
            -v /tmp/data:/data \
            -v /tmp/config:/root/.minio \
            minio/minio server /data
fi
[[ ! -f /usr/local/bin/mc ]] && wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && chmod +x /usr/local/bin/mc
sleep 3s
mc alias set myminio http://localhost:9000 minioadmin minioadmin
[[ ! -x random-test ]] && wget -q https://juicefs-com-static.oss-cn-shanghai.aliyuncs.com/random-test/random-test -O random-test && chmod +x random-test
python3 -c "import xattr" || sudo pip install xattr

test_dump_load_with_rmr()
{
    # ref: https://github.com/juicedata/juicefs/pull/6188
    prepare_test
    ./juicefs format $META_URL1 myjfs --trash-days 0 --enable-acl
    ./juicefs mount -d $META_URL1 /jfs --enable-xattr
    dd if=/dev/urandom of=/jfs/file1 bs=1M count=1024
    ./juicefs dump $META_URL1 dump1.json
    ./juicefs dump $META_URL1 dump1 $(get_dump_option)
    create_database $META_URL2
    ./juicefs load $META_URL2 dump1 $(get_load_option)
    ./juicefs dump $META_URL2 dump2.json
    compare_dump_json dump1.json dump2.json
    ./juicefs mount -d $META_URL2 /jfs2 --no-bgjob
    ./juicefs rmr --skip-trash /jfs2/file1
    JFS_GC_SKIPPEDTIME=1 ./juicefs gc $META_URL2 2>&1| tee gc.log
    count=$(sed -n 's/.*\([0-9]\+\) leaked.*/\1/p' gc.log)
    [[ "$count" -ne 0 ]] && echo "Expected 0 leaked file, but got $count" && exit 1 || true
}

skip_test_dump_load_with_fsrand()
{
    # unskip the test after fix: https://github.com/juicedata/juicefs/issues/6230
    prepare_test
    ./juicefs format $META_URL1 myjfs --trash-days 0 --enable-acl
    ./juicefs mount -d $META_URL1 /jfs --enable-xattr
    rm -rf /tmp/test
    SEED=$SEED LOG_LEVEL=WARNING MAX_EXAMPLE=30 STEP_COUNT=20 PROFILE=generate ROOT_DIR1=/jfs/test ROOT_DIR2=/tmp/test python3 .github/scripts/hypo/fs.py || true    
    for i in {1..60}; do 
        JFS_GC_SKIPPEDTIME=1 ./juicefs gc -v $META_URL1 2>&1| tee gc.log
        count=$(sed -n 's/.*\([0-9]\+\) leaked.*/\1/p' gc.log)
        if [[ "$count" -eq 0 ]]; then 
            echo "Expected 0 leaked file after rmr /jfs2/test, got $count"
            break
        else
            echo "Expected 0 leaked file after rmr /jfs2/test, got $count, retrying..."
            sleep 1s
        fi
        [[ $i -eq 60 ]] && echo "Expected 0 leaked file after rmr /jfs2/test, but got $count" && exit 1 || true
    done
    ./juicefs dump $META_URL1 dump1.json
    ./juicefs dump $META_URL1 dump1 $(get_dump_option)
    create_database $META_URL2
    ./juicefs load $META_URL2 dump1 $(get_load_option)
    ./juicefs dump $META_URL2 dump2.json $(get_dump_option)
    # compare_dump_json
    ./juicefs mount -d $META_URL2 /jfs2 --no-bgjob
    diff -ur /jfs/test /jfs2/test --no-dereference
    compare_stat_acl_xattr /jfs/test /jfs2/test
    ./juicefs rmr --skip-trash /jfs2/test
    for i in {1..60}; do 
        JFS_GC_SKIPPEDTIME=1 ./juicefs gc -v $META_URL2 2>&1| tee gc.log
        count=$(sed -n 's/.*\([0-9]\+\) leaked.*/\1/p' gc.log)
        if [[ "$count" -eq 0 ]]; then 
            echo "Expected 0 leaked file after rmr /jfs2/test, got $count"
            break
        else
            echo "Expected 0 leaked file after rmr /jfs2/test, got $count, retrying..."
            sleep 1s
        fi
        [[ $i -eq 60 ]] && echo "Expected 0 leaked file after rmr /jfs2/test, but got $count" && exit 1 || true
    done
}

skip_test_dump_load_with_random_test()
{
    # unskip the test after fix: https://github.com/juicedata/juicefs/issues/6230
    prepare_test
    ./juicefs format $META_URL1 myjfs --trash-days 0 --enable-acl
    ./juicefs mount -d $META_URL1 /jfs --enable-xattr
    ./random-test runOp --baseDir /jfs/test --logDir random-test-log --withData --writeSize 1,10240 \
             --duration 30s --files 10000000 --ops 100000000 --threads 200 --dirSize 100 \
             --mkdirOp 10,uniform -createOp 10,uniform -readOp 1,uniform -lsOp 1,uniform -deleteOp 0.01,uniform -rmrOp 0.01,end -renameOp 1,uniform -linkOp 3,uniform  
    ./juicefs clone /jfs/test /jfs/test_clone
    ./juicefs dump $META_URL1 dump1.json
    ./juicefs dump $META_URL1 dump1 $(get_dump_option)
    create_database $META_URL2
    ./juicefs load $META_URL2 dump1 $(get_load_option)
    ./juicefs dump $META_URL2 dump2.json $(get_dump_option)
    ./juicefs mount -d $META_URL2 /jfs2 --no-bgjob
    diff -ur /jfs/test /jfs2/test --no-dereference
    diff -ur /jfs/test_clone /jfs2/test_clone --no-dereference
    ./juicefs clone /jfs2/test /jfs2/test_clone2
    for dir in /jfs2/test_clone /jfs2/test /jfs2/test_clone2; do
        ./juicefs rmr --skip-trash $dir
        JFS_GC_SKIPPEDTIME=1 ./juicefs gc -v $META_URL2 2>&1| tee gc.log
        count=$(sed -n 's/.*\([0-9]\+\) leaked.*/\1/p' gc.log)
        [[ "$count" -ne 0 ]] && echo "Expected 0 leaked file after rmr $dir, but got $count" && exit 1 || true
    done
}

compare_dump_json(){
    cat dump1.json
    cat dump2.json
    cp dump1.json dump1.json.bak
    cp dump2.json dump2.json.bak
    sed -i '/usedSpace/d' dump*.json.bak
    sed -i '/usedInodes/d' dump*.json.bak
    sed -i '/nextInodes/d' dump*.json.bak
    sed -i '/nextChunk/d' dump*.json.bak
    sed -i '/nextTrash/d' dump*.json.bak
    sed -i '/nextSession/d' dump*.json.bak
    sed -i 's/"inode":[0-9]\+/"inode":0/g' dump*.json.bak
    diff -ur dump1.json.bak dump2.json.bak
    echo "compare_dump_json: dump json files are the same"
}

compare_stat_acl_xattr(){
    dir1=$1
    dir2=$2
    files1=($(find "$dir1" -type f -o -type d -exec stat -c "%n" {} + | sort))
    files2=($(find "$dir2" -type f -o -type d -exec stat -c "%n" {} + | sort))
    [[ ${#files1[@]} -ne ${#files2[@]} ]] && echo "compare_stat_acl: number of files differs" && exit 1
    for i in "${!files1[@]}"; do
        stat1=$(stat -c "%F %a %s %h %U %G" "${files1[$i]}")
        stat2=$(stat -c "%F %a %s %h %U %G" "${files2[$i]}")
        acl1=$(getfacl -p "${files1[$i]}" | tail -n +2)
        acl2=$(getfacl -p "${files2[$i]}" | tail -n +2)
        xattr1=$(getfattr -d -m . -e hex "${files1[$i]}" 2>/dev/null | tail -n +2 | sort)
        xattr2=$(getfattr -d -m . -e hex "${files2[$i]}" 2>/dev/null | tail -n +2 | sort)
        [[ "$stat1" != "$stat2" ]] && echo "compare_stat_acl: stat for ${files1[$i]} and ${files2[$i]} differs" && echo $stat1 && echo $stat2 && exit 1
        [[ "$acl1" != "$acl2" ]] && echo "compare_stat_acl: ACLs for ${files1[$i]} and ${files2[$i]} differs" && echo $acl1 && echo $acl2 && exit 1
        [[ "$xattr1" != "$xattr2" ]] && echo "compare_stat_acl: xattrs for ${files1[$i]} and ${files2[$i]} differs" && echo $xattr1 && echo $xattr2 && exit 1

    done
    echo "compare_stat_acl: ACLs and stats are the same"
}

get_dump_option(){
    if [[ "$BINARY" == "true" ]]; then 
        option="--binary"
    elif [[ "$FAST" == "true" ]]; then
        option="--fast"
    else
        option=""
    fi
    echo $option
}

get_load_option(){
    if [[ "$BINARY" == "true" ]]; then 
        option="--binary"
    else
        option=""
    fi
    echo $option
}

prepare_test(){
    umount_jfs /jfs $META_URL1
    umount_jfs /jfs2 $META_URL2
    python3 .github/scripts/flush_meta.py $META_URL1
    python3 .github/scripts/flush_meta.py $META_URL2
    rm -rf /var/jfs/myjfs || true
    mc rm --force --recursive myminio/test || true
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/format.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

SMB_CONTAINER_NAME="juicefs-ci-smb"
SMB_USER="juicefs"
SMB_PASSWORD="juicefs"
SMB_SHARE="share"

cleanup_smb_container()
{
    docker rm -f "$SMB_CONTAINER_NAME" >/dev/null 2>&1 || true
    rm -rf /tmp/${SMB_CONTAINER_NAME}-data >/dev/null 2>&1 || true
}

start_smb_container()
{
    cleanup_smb_container
    mkdir -p /tmp/${SMB_CONTAINER_NAME}-data
    chmod 0777 /tmp/${SMB_CONTAINER_NAME}-data
    if [[ "$(uname)" == "Darwin" ]]; then
        docker run -d --name "$SMB_CONTAINER_NAME" -p 1445:445 \
            -v /tmp/${SMB_CONTAINER_NAME}-data:/mount \
            dperson/samba \
            -u "$SMB_USER;$SMB_PASSWORD" \
            -s "$SMB_SHARE;/mount;yes;no;no;$SMB_USER" >/dev/null
        wait_tcp_ready 127.0.0.1 1445 40
        SMB_ENDPOINT="127.0.0.1:1445/${SMB_SHARE}"
        export SMB_ENDPOINT
        return
    fi

    docker run -d --name "$SMB_CONTAINER_NAME" \
        -v /tmp/${SMB_CONTAINER_NAME}-data:/mount \
        dperson/samba \
        -u "$SMB_USER;$SMB_PASSWORD" \
        -s "$SMB_SHARE;/mount;yes;no;no;$SMB_USER" >/dev/null

    local container_ip
    container_ip=$(docker container inspect "$SMB_CONTAINER_NAME" --format '{{ .NetworkSettings.IPAddress }}')
    wait_tcp_ready "$container_ip" 445 40
    SMB_ENDPOINT="${container_ip}/${SMB_SHARE}"
    export SMB_ENDPOINT
}

assert_objbench_result()
{
    local log_file=$1
    local test_name=$2
    local expected=$3
    if ! grep -E "${test_name}.*${expected}" "$log_file" >/dev/null; then
        echo "objbench assertion failed: test=${test_name}, expected=${expected}"
        echo "--- objbench log ---"
        cat "$log_file"
        exit 1
    fi
}

kill_gateway_by_port()
{
    local port=$1
    lsof -t -i :$port | xargs -r kill -9 >/dev/null 2>&1 || true
}

wait_tcp_ready()
{
    local host=$1
    local port=$2
    local timeout=${3:-30}
    for _ in $(seq 1 "$timeout"); do
        if (echo > /dev/tcp/${host}/${port}) >/dev/null 2>&1; then
            return
        fi
        sleep 1
    done
    echo "tcp ${host}:${port} is not ready in ${timeout} seconds"
    exit 1
}

ensure_mc_binary()
{
    if [[ -x ./mc ]]; then
        return
    fi
    local os_arch
    local cpu_arch
    cpu_arch=$(uname -m)
    if [[ "$(uname)" == "Darwin" ]]; then
        if [[ "$cpu_arch" == "arm64" ]]; then
            os_arch="darwin-arm64"
        else
            os_arch="darwin-amd64"
        fi
    else
        if [[ "$cpu_arch" == "aarch64" || "$cpu_arch" == "arm64" ]]; then
            os_arch="linux-arm64"
        else
            os_arch="linux-amd64"
        fi
    fi
    wget -q "https://dl.min.io/client/mc/release/${os_arch}/mc" -O ./mc
    chmod +x ./mc
}

generate_sha_manifest()
{
    local root_dir=$1
    local output_file=$2
    rm -f "$output_file"
    if [[ "$(uname)" == "Darwin" ]]; then
        while IFS= read -r rel; do
            sum=$(shasum -a 256 "$root_dir/$rel" | awk '{print $1}')
            echo "$sum  $rel" >> "$output_file"
        done < <(cd "$root_dir" && find . -type f | sort | sed 's#^\./##')
    else
        while IFS= read -r rel; do
            sum=$(sha256sum "$root_dir/$rel" | awk '{print $1}')
            echo "$sum  $rel" >> "$output_file"
        done < <(cd "$root_dir" && find . -type f | sort | sed 's#^\./##')
    fi
}

prepare_sync_source_tree()
{
    local src_dir=$1
    mkdir -p "$src_dir/dir1/dir2"
    echo "hello-juicefs" > "$src_dir/plain.txt"
    echo "with space" > "$src_dir/dir1/file with space.txt"
    echo "cifs-中文文件" > "$src_dir/dir1/中文文件.txt"
    : > "$src_dir/empty.file"
    dd if=/dev/urandom of="$src_dir/dir1/dir2/binary.bin" bs=1M count=4 >/dev/null 2>&1
}

skip_test_mount_process_exit_on_format()
{
    prepare_test
    echo "round $i"
    ./juicefs format $META_URL volume-$i
    ./juicefs mount -d $META_URL /tmp/myjfs$i_$j --no-usage-report
    cd /tmp/myjfs$i_$j
    bash -c 'for k in {1..300}; do echo abc>$k; sleep 0.2; done' || true & 
    cd -
    sleep 3
    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4) 
    ./juicefs destroy --force $META_URL $uuid
    ./juicefs format $META_URL new-volume-$i 
    sleep 15   
    ps -ef | grep juicefs
    # TODO: fix the bug and remove the following line
    # SEE https://github.com/juicedata/juicefs/issues/4534
    pidof juicefs && exit 1
    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4) 
    ./juicefs destroy --force $META_URL $uuid
}

test_format_sftp_object()
{
    docker run -d --name sftp -p 2222:22 juicedata/ci-sftp
    prepare_test
    CONTAINER_IP=$(docker container inspect sftp --format '{{ .NetworkSettings.IPAddress }}')
    echo "round $i"
    ./juicefs format $META_URL volume-$i --storage sftp \
    --bucket $CONTAINER_IP:myjfs/ \
    --access-key testUser1 \
    --secret-key password
    ./juicefs mount -d $META_URL /tmp/jfs --no-usage-report --cache-size 0
    cd /tmp/jfs
    bash -c 'for k in {1..100}; do echo abc>$k; sleep 0.1; done' || true &
    bg_pid=$!
    cd -
    sleep 1
    docker stop sftp
    sleep 10
    docker start sftp
    sleep 2
    wait $bg_pid
    echo "Checking JuiceFS read/write"
    echo abc > /tmp/jfs/101
    for k in {1..100}; do
        if [[ $(cat /tmp/jfs/$k) != "abc" ]]; then
            echo "ERROR: File $k corrupted after SFTP restart!"
            exit 1
        fi
    done
    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --force $META_URL $uuid
    ./juicefs format $META_URL new-volume-$i
}

test_format_cifs_objbench_matrix()
{
    prepare_test
    start_smb_container
    local log_raw=/tmp/objbench-cifs-raw.log
    local log_plain=/tmp/objbench-cifs.log
    ./juicefs objbench --storage cifs \
        --access-key "$SMB_USER" \
        --secret-key "$SMB_PASSWORD" \
        --threads 2 \
        --small-objects 5 \
        --small-object-size 4K \
        --block-size 1M \
        --big-object-size 8M \
        "$SMB_ENDPOINT" 2>&1 | tee "$log_raw"

    sed -E 's/\x1B\[[0-9;]*[mK]//g' "$log_raw" > "$log_plain"

    assert_objbench_result "$log_plain" "create a bucket" "pass"
    assert_objbench_result "$log_plain" "put an object" "pass"
    assert_objbench_result "$log_plain" "get an object" "pass"
    assert_objbench_result "$log_plain" "get non-exist" "pass"
    assert_objbench_result "$log_plain" "get partial object" "pass"
    assert_objbench_result "$log_plain" "head an object" "pass"
    assert_objbench_result "$log_plain" "delete an object" "pass"
    assert_objbench_result "$log_plain" "delete non-exist" "pass"
    assert_objbench_result "$log_plain" "list objects" "pass"
    assert_objbench_result "$log_plain" "special key" "put encode file failed"
    assert_objbench_result "$log_plain" "put a big object" "pass"
    assert_objbench_result "$log_plain" "put an empty object" "pass"
    assert_objbench_result "$log_plain" "multipart upload" "not support"
    assert_objbench_result "$log_plain" "change owner/group" "failed to chown object"
    assert_objbench_result "$log_plain" "change permission" "expect mode 777 but got"
    assert_objbench_result "$log_plain" "change mtime" "pass"

    cleanup_smb_container
}

test_format_smb_object_alias()
{
    prepare_test
    start_smb_container
    local volume_name="smb-alias-$RANDOM"
    local mount_point="/tmp/jfs-smb-$RANDOM"
    ./juicefs format $META_URL "$volume_name" --storage smb \
        --bucket "$SMB_ENDPOINT" \
        --access-key "$SMB_USER" \
        --secret-key "$SMB_PASSWORD"

    mkdir -p "$mount_point"
    ./juicefs mount -d $META_URL "$mount_point" --no-usage-report --cache-size 0

    echo "smb-alias-ok" > "$mount_point/smb-alias.txt"
    read_content=$(cat "$mount_point/smb-alias.txt")
    [[ "$read_content" != "smb-alias-ok" ]] && echo "smb alias read/write check failed" && exit 1

    ./juicefs umount "$mount_point" || true
    rm -rf "$mount_point"

    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --force $META_URL $uuid
    cleanup_smb_container
}

test_format_cifs_sync_consistency()
{
    prepare_test
    start_smb_container
    local volume_name="cifs-sync-$RANDOM"
    local mount_point="/tmp/jfs-cifs-sync-$RANDOM"
    local mount_data_dir
    local src_dir="/tmp/cifs-sync-src-$RANDOM"
    local dst_dir="/tmp/cifs-sync-dst-$RANDOM"
    local src_manifest="/tmp/cifs-sync-src-$RANDOM.sha256"
    local dst_manifest="/tmp/cifs-sync-dst-$RANDOM.sha256"

    ./juicefs format $META_URL "$volume_name" --storage cifs \
        --bucket "$SMB_ENDPOINT" \
        --access-key "$SMB_USER" \
        --secret-key "$SMB_PASSWORD"

    mkdir -p "$mount_point"
    ./juicefs mount -d $META_URL "$mount_point" --no-usage-report --cache-size 0
    mount_data_dir="$mount_point/sync-data"
    mkdir -p "$mount_data_dir"

    rm -rf "$src_dir" "$dst_dir"
    mkdir -p "$src_dir" "$dst_dir"
    prepare_sync_source_tree "$src_dir"

    ./juicefs sync "$src_dir/" "$mount_data_dir/" --threads 8 --dirs
    ./juicefs sync "$mount_data_dir/" "$dst_dir/" --threads 8 --dirs

    generate_sha_manifest "$src_dir" "$src_manifest"
    generate_sha_manifest "$dst_dir" "$dst_manifest"
    diff "$src_manifest" "$dst_manifest"

    src_count=$(find "$src_dir" -type f | wc -l | tr -d ' ')
    dst_count=$(find "$dst_dir" -type f | wc -l | tr -d ' ')
    [[ "$src_count" != "$dst_count" ]] && echo "sync file count mismatch: $src_count vs $dst_count" && exit 1

    ./juicefs umount "$mount_point" || true
    rm -rf "$mount_point" "$src_dir" "$dst_dir"

    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --force $META_URL $uuid
    cleanup_smb_container
}

test_format_cifs_object_recovery()
{
    prepare_test
    start_smb_container
    local volume_name="cifs-recovery-$RANDOM"
    local mount_point="/tmp/jfs-cifs-recovery-$RANDOM"

    ./juicefs format $META_URL "$volume_name" --storage cifs \
        --bucket "$SMB_ENDPOINT" \
        --access-key "$SMB_USER" \
        --secret-key "$SMB_PASSWORD"

    mkdir -p "$mount_point"
    ./juicefs mount -d $META_URL "$mount_point" --no-usage-report --cache-size 0

    for k in {1..20}; do
        echo "before-restart-$k" > "$mount_point/before-$k.txt"
    done

    docker stop "$SMB_CONTAINER_NAME"
    sleep 8
    docker start "$SMB_CONTAINER_NAME"
    container_ip=$(docker container inspect "$SMB_CONTAINER_NAME" --format '{{ .NetworkSettings.IPAddress }}')
    wait_tcp_ready "$container_ip" 445 40
    sleep 3

    for k in {1..20}; do
        content=$(cat "$mount_point/before-$k.txt")
        [[ "$content" != "before-restart-$k" ]] && echo "file check failed after restart: before-$k.txt" && exit 1
    done
    echo "after-restart" > "$mount_point/after-restart.txt"
    [[ "$(cat "$mount_point/after-restart.txt")" != "after-restart" ]] && echo "write/read failed after cifs restart" && exit 1

    ./juicefs umount "$mount_point" || true
    rm -rf "$mount_point"

    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --force $META_URL $uuid
    cleanup_smb_container
}

test_format_cifs_gateway_read_write()
{
    prepare_test
    start_smb_container
    ensure_mc_binary
    local volume_name="cifs-gateway-$RANDOM"
    local gateway_port=9015

    ./juicefs format $META_URL "$volume_name" --storage cifs \
        --bucket "$SMB_ENDPOINT" \
        --access-key "$SMB_USER" \
        --secret-key "$SMB_PASSWORD"

    kill_gateway_by_port $gateway_port
    export MINIO_ROOT_USER=admin
    export MINIO_ROOT_PASSWORD=admin123
    ./juicefs gateway $META_URL 127.0.0.1:${gateway_port} --multi-buckets --keep-etag --object-tag -background
    wait_tcp_ready 127.0.0.1 $gateway_port 30

    ./mc alias set cifsgw http://127.0.0.1:${gateway_port} admin admin123 --api S3v4
    ./mc mb cifsgw/test-cifs-gw
    echo "gateway-cifs-ok" > /tmp/cifs-gateway-file.txt
    ./mc cp /tmp/cifs-gateway-file.txt cifsgw/test-cifs-gw/cifs-gateway-file.txt
    ./mc cat cifsgw/test-cifs-gw/cifs-gateway-file.txt | grep "gateway-cifs-ok"

    ./mc rm cifsgw/test-cifs-gw/cifs-gateway-file.txt
    ./mc rb cifsgw/test-cifs-gw --force
    kill_gateway_by_port $gateway_port

    uuid=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4)
    ./juicefs destroy --force $META_URL $uuid
    cleanup_smb_container
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/fsck.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

test_fix_nlink(){
    if [[ "$META" == "sqlite3" ]]; then
        do_fix_nlink_sqlite3
    fi
}
do_fix_nlink_sqlite3(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs/a
    mkdir /jfs/a/b
    touch /jfs/a/c
    sleep 4s # to wait dir stat update
    ./juicefs fsck $META_URL --path / -r
    sqlite3 test.db "update jfs_node set nlink=100 where inode=2"
    sqlite3 test.db "select nlink from jfs_node where inode=2"
    ./juicefs fsck $META_URL --path / -r && exit 1 || true
    ./juicefs fsck $META_URL --path / -r --repair
    ./juicefs fsck $META_URL --path / -r
}

test_sync_dir_stat()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    ./juicefs mdtest $META_URL /d --depth 15 --dirs 2 --files 100 --threads 10 & 
    pid=$!
    sleep 15s
    kill -9 $pid
    ./juicefs info -r /jfs/d
    ./juicefs info -r /jfs/d --strict 
    ./juicefs fsck $META_URL --path /d --sync-dir-stat --repair -r
    ./juicefs info -r /jfs/d | tee info1.log
    ./juicefs info -r /jfs/d --strict | tee info2.log
    diff info1.log info2.log
    rm info*.log
    ./juicefs fsck $META_URL --path / --sync-dir-stat --repair -r
    ./juicefs info -r /jfs | tee info1.log
    ./juicefs info -r /jfs --strict | tee info2.log
    diff info1.log info2.log
}

test_fsck_with_random_test()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    ./random-test runOp -baseDir /jfs/test -files 500000 -ops 5000000 -threads 50 -dirSize 100 -duration 30s -createOp 30,uniform -deleteOp 5,end --linkOp 10,uniform  --symlinkOp 20,uniform --setXattrOp 10,uniform --truncateOp 10,uniform    
    ./juicefs fsck $META_URL --path /test --sync-dir-stat --repair -r
    ./juicefs info -r /jfs | tee info1.log
    ./juicefs info -r /jfs --strict | tee info2.log
    diff info1.log info2.log || true
}

test_fsck_delete_object()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    echo "test" > /jfs/test.txt
    sleep 1
    object=$(./juicefs info /jfs/test.txt | grep chunks | awk '{print $4}')
    rm /var/jfs/$object
    ./juicefs fsck $META_URL 2>&1 | tee fsck.log
    grep -q "1 objects are lost" fsck.log || exit 1
    rm fsck.log
 #   ./juicefs fsck $META_URL --path / --sync-dir-stat --repair -r 2>&1 | tee fsck.log
 #   grep -q "1 objects are lost" fsck.log || exit 1
 #   rm fsck.log
    ./juicefs rmr /jfs/test.txt --skip-trash
    ./juicefs fsck $META_URL || { echo "files is deleted, fsck should success"; exit 1; }
}

test_sync_dir_df()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    ./juicefs mdtest $META_URL /d --depth 15 --dirs 2 --files 100 --threads 10 & 
    pid=$!
    sleep 60s
    kill -9 $pid
    ./juicefs info -r /jfs/d --strict
    #df -h /jfs的Used和
    df -h /jfs
    ./juicefs fsck $META_URL --path /d --sync-dir-stat --repair -r
    ./juicefs info -r /jfs/d | tee info1.log
    ./juicefs info -r /jfs/d --strict | tee info2.log
    diff info1.log info2.log
    rm info*.log
    ./juicefs fsck $META_URL --path / --sync-dir-stat --repair -r
    ./juicefs info -r /jfs | tee info1.log
    ./juicefs info -r /jfs --strict | tee info2.log
    diff info1.log info2.log
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/gateway-random.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
[[ -z "$SUBDIR" ]] && SUBDIR=false
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)
[[ ! -x /usr/local/bin/mc ]] && wget -q https://dl.min.io/client/mc/release/linux-amd64/archive/mc.RELEASE.2021-04-22T17-40-00Z -O /usr/local/bin/mc && sudo chmod +x /usr/local/bin/mc
# docker ps -aq --filter "status=exited" --filter "name=minio_old" | xargs -r docker rm -v
if ! docker ps --filter "name=minio_old$" | grep minio_old; then
    echo start minio_old
    docker run -d -p 9000:9000 --name minio_old -e "MINIO_ACCESS_KEY=minioadmin" -e "MINIO_SECRET_KEY=minioadmin" minio/minio:RELEASE.2021-04-22T15-44-28Z server /tmp/minio_old
    while ! curl -s http://localhost:9000/minio/health/live > /dev/null; do
        echo "Waiting for MinIO to be ready..."
        sleep 1
    done
    echo "MinIO is ready."
fi

timeout 30 bash -c 'counter=0; until lsof -i:9000; do echo -ne "wait port ready in $counter\r" && ((counter++)) && sleep 1; done'

[[ -n $CI ]] && trap 'kill_gateway 9005;' EXIT
kill_gateway() {
    port=$1
    lsof -i:$port || true
    lsof -t -i :$port | xargs -r kill -9 || true
}

prepare_test()
{
    umount_jfs /tmp/jfs $META_URL
    kill_gateway 9005
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs || true
    ./juicefs format $META_URL myjfs  --trash-days 0
    ./juicefs mount -d $META_URL /tmp/jfs
    if [ "$SUBDIR" = true ]; then
        echo "start gateway with subdir"
        mkdir /tmp/jfs/subdir
        MINIO_ROOT_USER=minioadmin MINIO_ROOT_PASSWORD=minioadmin ./juicefs gateway \
            $META_URL localhost:9005 --multi-buckets --keep-etag -d --subdir /subdir
    else
        MINIO_ROOT_USER=minioadmin MINIO_ROOT_PASSWORD=minioadmin ./juicefs gateway \
            $META_URL localhost:9005 --multi-buckets --keep-etag -d
    fi
}

test_run_example()
{
    prepare_test
    python3 .github/scripts/hypo/s3_test.py
}

test_run_all()
{
    prepare_test
    python3 .github/scripts/hypo/s3.py
}


source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/gateway.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=redis
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)
wget https://dl.min.io/client/mc/release/linux-amd64/archive/mc.RELEASE.2021-04-22T17-40-00Z -O mc
chmod +x mc
export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=admin123
export MINIO_REFRESH_IAM_INTERVAL=3s

prepare_test()
{
    umount_jfs /tmp/jfs $META_URL
    kill_gateway 9001
    kill_gateway 9002
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs || true
    rm -rf /var/jfsCache/myjfs || true
}

kill_gateway() {
    port=$1
    lsof -i:$port || true
    lsof -t -i :$port | xargs -r kill -9 || true
}

trap 'kill_gateway 9001; kill_gateway 9002; kill_gateway 9003' EXIT

start_two_gateway()
{
    prepare_test
    ./juicefs format $META_URL myjfs  --trash-days 0
    ./juicefs mount -d $META_URL /tmp/jfs
    export MINIO_ROOT_USER=admin
    export MINIO_ROOT_PASSWORD=admin123
    ./juicefs gateway $META_URL 127.0.0.1:9001 --multi-buckets --keep-etag --object-tag -background
    sleep 1
    ./juicefs gateway $META_URL 127.0.0.1:9002 --multi-buckets --keep-etag --object-tag -background 
    sleep 2
    ./mc alias set gateway1 http://127.0.0.1:9001 admin admin123
    ./mc alias set gateway2 http://127.0.0.1:9002 admin admin123
}

test_user_management()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    sleep 5
    user=$(./mc admin user list gateway2 | grep user1) || true
    if [ -z "$user" ]
    then
      echo "user synchronization error"
      exit 1
    fi
    ./mc mb gateway1/test1
    ./mc alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    if ./mc cp mc gateway1_user1/test1/file1
    then
      echo "By default, the user has no read and write permission"
      exit 1
    fi
    ./mc admin policy set gateway1 readwrite user=user1
    if ./mc cp mc gateway1_user1/test1/file1
    then 
      echo "readwrite policy can read and write objects" 
    else
      echo "set readwrite policy fail"
      exit 1
    fi
    ./mc cp gateway2/test1/file1 .
    compare_md5sum file1 mc  
    ./mc admin user disable gateway1 user1
    ./mc admin user remove gateway2 user1
    sleep 5
    user=$(./mc admin user list gateway1 | grep user1) || true
    if [ ! -z "$user" ]
    then
      echo "remove user user1 fail"
      echo $user
      exit 1
    fi
}

test_group_management()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    ./mc admin user add gateway1 user2 admin123
    ./mc admin user add gateway1 user3 admin123
    ./mc admin group add gateway1 testcents user1 user2 user3
    result=$(./mc admin group info gateway1 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
    ./mc admin policy set gateway1 readwrite group=testcents
    sleep 5
    ./mc alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    ./mc mb gateway1/test1
    if ./mc cp mc gateway1_user1/test1/file1
    then
      echo "readwrite policy can read write"
    else
      echo "the readwrite group has no read and write permission"
      exit 1
    fi
    ./mc admin policy set gateway1 readonly group=testcents
    sleep 5
    if ./mc cp mc gateway1_user1/test1/file1
    then
      echo "readonly group policy can not write"
      exit 1
    else
      echo "the readonly group has no write permission"
    fi

    ./mc admin group remove gateway1 testcents user1 user2 user3 
    ./mc admin group remove gateway1 testcents
}

test_mult_gateways_set_group()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    ./mc admin user add gateway1 user2 admin123
    ./mc admin user add gateway1 user3 admin123
    ./mc admin group add gateway1 testcents user1 user2 user3
    ./mc admin group disable gateway2 testcents
    sleep 5
    result=$(./mc admin group info gateway2 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
    ./mc admin group enable gateway1 testcents
    ./mc admin user add gateway1 user4 admin123
    ./mc admin group add gateway1 testcents user4
    sleep 1
    ./mc admin group disable gateway2 testcents
    sleep 5
    result=$(./mc admin group info gateway2 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3,user4" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
}

test_user_svcacct_add()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    ./mc admin policy set gateway1 consoleAdmin user=user1
    ./mc alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    ./mc admin user svcacct add gateway1_user1 user1 --access-key 12345678 --secret-key 12345678
    ./mc admin user svcacct info gateway1_user1 12345678
    ./mc admin user svcacct set gateway1_user1 12345678 --secret-key 123456789
    ./mc alias set svcacct1 http://127.0.0.1:9001 12345678 123456789
    ./mc mb svcacct1/test1
    if ./mc cp mc svcacct1/test1/file1
    then
      echo "svcacct user consoleAdmin policy can read write"
    else
      echo "the svcacct user has no read and write permission"
      exit 1
    fi
    ./mc admin user svcacct disable gateway1_user1 12345678
    ./mc admin user svcacct rm gateway1_user1 12345678
}

test_user_admin_svcacct_add()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    ./mc admin policy set gateway1 readwrite user=user1
    ./mc admin user svcacct add gateway1 user1 --access-key 12345678 --secret-key 12345678
    ./mc admin user svcacct info gateway1 12345678
    ./mc admin user svcacct set gateway1 12345678 --secret-key 12345678910
    ./mc alias set svcacct1 http://127.0.0.1:9001 12345678 12345678910
    ./mc mb svcacct1/test1
    if ./mc cp mc svcacct1/test1/file1
    then
      echo "amdin user can do svcacct "
    else
      echo "the svcacct user has no read and write permission"
      exit 1
    fi
    ./mc admin user svcacct disable gateway1 12345678
    ./mc admin user svcacct rm gateway1 12345678
}

test_user_sts()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    ./mc admin policy set gateway1 consoleAdmin user=user1
    ./mc alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    git clone https://github.com/juicedata/minio.git -b gateway-1.1
    ./mc mb gateway1_user1/test1
    ./mc cp mc gateway1_user1/test1/mc
    cd minio
    go run docs/sts/assume-role.go -sts-ep http://127.0.0.1:9001 -u user1 -p admin123 -b test1 -d
    go run docs/sts/assume-role.go -sts-ep http://127.0.0.1:9001 -u user1 -p admin123 -b test1
    cd -
    ./mc admin user remove gateway1 user1     
}


skip_test_change_credentials()
{
    prepare_test
    start_two_gateway
    ./mc mb gateway1/test1
    ./mc cp mc gateway1/test1/file1
    lsof -i :9001 | awk 'NR!=1 {print $2}' | xargs -r kill -9 || true
    lsof -i :9002 | awk 'NR!=1 {print $2}' | xargs -r kill -9 || true
    export MINIO_ROOT_USER=newadmin
    export MINIO_ROOT_PASSWORD=newadmin123
    export MINIO_ROOT_USER_OLD=admin
    export MINIO_ROOT_PASSWORD_OLD=admin123
    ./juicefs gateway $META_URL 127.0.0.1:9001 --multi-buckets --keep-etag --object-tag -background
    ./juicefs gateway $META_URL 127.0.0.1:9002 --multi-buckets --keep-etag --object-tag -background
    sleep 5
    ./mc alias set gateway1 http://127.0.0.1:9001 newadmin newadmin123
    ./mc alias set gateway2 http://127.0.0.1:9002 newadmin newadmin123
    ./mc cp gateway1/test1/file1 file1
    ./mc cp gateway2/test1/file1 file2
    compare_md5sum file1 mc
    compare_md5sum file2 mc  
}


test_ro_gateway()
{   
    prepare_test
    start_two_gateway
    ./juicefs gateway $META_URL 127.0.0.1:9003 --read-only --multi-buckets --keep-etag --object-tag -background    
    ./mc alias set gateway3 http://127.0.0.1:9003 admin admin123 
    ./mc mb gateway1/test1
    ./mc cp mc gateway1/test1/file1
    ./mc admin user add gateway1 user1 admin123
    sleep 4
    user=$(./mc admin user list gateway3 | grep user1) || true
    [[ -z "$user" ]] && echo "user synchronization error" && exit 1 || true
    ./mc mb gateway3/test3 && echo "By default, the ro has no write permission for creating buckets" && exit 1 || true
    ./mc cp mc gateway3/test1/file1 && echo "By default, the ro has no write permission for copying files" && exit 1 || true
    ./mc cp gateway3/test1/file1 .
    diff mc file1
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/gc.sh
================================================
#!/bin/bash -e

python3 -c "import xattr" || pip install xattr 
dpkg -s redis-tools || .github/scripts/apt_install.sh redis-tools
dpkg -s fio || .github/scripts/apt_install.sh fio
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

test_delay_delete_slice_after_compaction(){
    if [[ "$META" != redis* ]]; then
        echo "this test only runs for redis meta engine"
        return
    fi
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 1
    ./juicefs mount -d $META_URL /jfs --no-usage-report
    fio --name=abc --rw=randwrite --refill_buffers --size=500M --bs=256k --directory=/jfs
    redis-cli save
    # don't skip files when gc compact
    export JFS_SKIPPED_TIME=1
    ./juicefs gc --compact --delete $META_URL
    killall -9 redis-server
    sleep 3
    ./juicefs fsck $META_URL
}

test_gc_trash_slices(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    PATH1=/tmp/test PATH2=/jfs/test python3 .github/scripts/random_read_write.py 
    ./juicefs status --more $META_URL
    ./juicefs config $META_URL --trash-days 0 --yes
    ./juicefs gc $META_URL 
    ./juicefs gc $META_URL --delete
    ./juicefs status --more $META_URL
}

test_gc_trash_files(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    python3 .github/scripts/fsrand.py -c 1000 /jfs/fsrand
    rm -rf /jfs/fsrand
    ./juicefs status --more $META_URL
    ./juicefs config $META_URL --trash-days 0 --yes
    ./juicefs gc $META_URL 
    ./juicefs gc $META_URL --delete
    ./juicefs status --more $META_URL
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/graceful_upgrade.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)
LEGACY_META_URL=$META_URL
if [[ "$META" == "redis" ]]; then
    LEGACY_META_URL=${META_URL%%\?*}
fi
echo meta_url is $META_URL

dpkg -s fio >/dev/null 2>&1 || .github/scripts/apt_install.sh fio
dpkg -s attr >/dev/null 2>&1 || .github/scripts/apt_install.sh attr

if [[ ! -x "./juicefs-1.1" ]]; then 
    wget -q https://github.com/juicedata/juicefs/releases/download/v1.1.0/juicefs-1.1.0-linux-amd64.tar.gz
    rm /tmp/juicefs -rf && mkdir -p /tmp/juicefs
    tar -xzvf juicefs-1.1.0-linux-amd64.tar.gz -C /tmp/juicefs
    mv /tmp/juicefs/juicefs juicefs-1.1 && chmod +x juicefs-1.1 
    rm /tmp/juicefs -rf && rm juicefs-1.1.0-linux-amd64.tar.gz
    ./juicefs-1.1 version | grep "version 1.1"
fi
[[ ! -f my-priv-key.pem ]] && openssl genrsa -out my-priv-key.pem -aes256  -passout pass:12345678 2048


test_kill_mount_process()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount $META_URL /tmp/jfs -d
    wait_process_started 1
    force_kill_child_process
    sleep 3
    wait_process_started 2
    kill_parent_process
    wait_command_success "ps -ef | grep "mount" | grep "/tmp/jfs" | grep -v grep | wc -l" 0
    ./juicefs mount $META_URL /tmp/jfs -d
    kill_child_process
    wait_command_success "ps -ef | grep "mount" | grep "/tmp/jfs" | grep -v grep | wc -l" 0
    ./juicefs mount $META_URL /tmp/jfs -d
    ./juicefs umount /tmp/jfs
    wait_command_success "ps -ef | grep "mount" | grep "/tmp/jfs" | grep -v grep | wc -l" 0
}

skip_test_update_with_flock(){
    prepare_test 
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /tmp/jfs
    ps -ef | grep mount
    cat /tmp/jfs/.config | grep -i sid
    echo abc | tee /tmp/jfs/test
    sleep 1s
    flock -x /tmp/jfs/test -c cat & 
    sleep 1s
    flock -s /tmp/jfs/test -c "echo abc" > flock.log 2>&1 &
    sleep 1s
    exit 1
    ./juicefs mount -d $META_URL /tmp/jfs
    ps -ef | grep mount
    cat /tmp/jfs/.config | grep -i sid
    cat flock.log
    count=$(ps -ef | grep flock | grep -v grep | wc -l)
    [[ $count -ne 2 ]] && echo "flock process should be 2, count=$count" && exit 1 || true    
}

test_update_non_fuse_option(){
    prepare_test
    JFS_RSA_PASSPHRASE=12345678 ./juicefs format $META_URL myjfs --encrypt-rsa-key my-priv-key.pem
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL /tmp/jfs
    echo abc | tee /tmp/jfs/test
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL /tmp/jfs --read-only
    echo abc | tee /tmp/jfs/test && (echo "should not write read-only file system" && exit 1) || true
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL /tmp/jfs 
    echo abc | tee /tmp/jfs/test
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 2 ]] && echo "mount process count should be 2, count=$count" && exit 1 || true
    umount /tmp/jfs
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 0 ]] && echo "mount process count should be 0, count=$count" && exit 1 || true
}

test_update_on_failure(){
    prepare_test
    JFS_RSA_PASSPHRASE=12345678 ./juicefs format $META_URL myjfs --encrypt-rsa-key my-priv-key.pem
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL /tmp/jfs
    echo abc | tee /tmp/jfs/test
    JFS_RSA_PASSPHRASE=abc123xx ./juicefs mount -d $META_URL /tmp/jfs || true
    echo abc | tee /tmp/jfs/test
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 2 ]] && echo "mount process count should be 2, count=$count" && exit 1 || true
    umount /tmp/jfs
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 0 ]] && echo "mount process count should be 0, count=$count" && exit 1 || true
}
#TODO: fio test failed on database locked.
test_update_on_fio(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /tmp/jfs --buffer-size 300
    fio -name=fio -filename=/tmp/jfs/testfile -direct=1 -iodepth 16 -ioengine=libaio \
        -rw=randwrite -bs=4k -size=100M -numjobs=4 -runtime=30 -group_reporting >fio.log 2>&1 &
    fio_pid=$!
    trap "kill -9 $fio_pid > /dev/null || true" EXIT
    for i in {1..5}; do
        echo "update buffer-size to $((i+300))"
        ./juicefs mount -d $META_URL /tmp/jfs --buffer-size $((i+300))
        wait_command_success "ps -ef | grep juicefs | grep mount | grep \"buffer-size $((i+300))\" | wc -l" 2
        echo abc | tee /tmp/jfs/test
    done
    kill -9 $fio_pid > /dev/null 2>&1 || true
    # umount_jfs /tmp/jfs $META_URL
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 2 ]] && echo "mount process count should be 2, count=$count" && exit 1 || true
}

test_update_fuse_option(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /tmp/jfs --enable-xattr
    setfattr -n user.test -v "juicedata" /tmp/jfs
    getfattr -n user.test /tmp/jfs | grep juicedata
    ./juicefs mount -d $META_URL /tmp/jfs
    getfattr -n user.test /tmp/jfs && exit 1 || true
    ./juicefs mount -d $META_URL /tmp/jfs --enable-xattr
    getfattr -n user.test /tmp/jfs | grep juicedata
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 4 ]] && echo "mount process count should be 4, count=$count" && exit 1 || true
    umount /tmp/jfs
    getfattr -n user.test /tmp/jfs && exit 1 || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 2 ]] && echo "mount process count should be 2, count=$count" && exit 1 || true
    umount /tmp/jfs
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 0 ]] && echo "mount process count should be 0, count=$count" && exit 1 || true
}

test_update_from_old_version(){
    prepare_test
    ./juicefs-1.1 format $LEGACY_META_URL myjfs
    ./juicefs-1.1 mount  -d $LEGACY_META_URL /tmp/jfs
    echo hello |tee /tmp/jfs/test
    ./juicefs mount -d $META_URL /tmp/jfs
    count=$(ps -ef | grep juicefs | grep mount | wc -l)
    [[ $count -ne 3 ]] && echo "mount process count should be 3" && exit 1 || true
    version=$(./juicefs version | awk '{print $3,$4,$5}')
    grep Version /tmp/jfs/.config | grep $version
    grep "hello" /tmp/jfs/test
    echo world | tee /tmp/jfs/test 
    ./juicefs umount /tmp/jfs
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 1 ]] && echo "mount process count should be 1" && exit 1 || true
    ./juicefs umount /tmp/jfs
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l)
    [[ $count -ne 0 ]] && echo "mount process count should be 0" && exit 1 || true
}

test_update_on_fstab(){
    prepare_test
    ./juicefs format $META_URL myjfs
    umount_jfs /tmp/jfs $META_URL
    rm /sbin/mount.juicefs -rf 
    ./juicefs mount --update-fstab $META_URL /tmp/jfs -d \
        -o debug,allow_other,writeback_cache \
        --max-uploads 20  --prefetch 3 --upload-limit 3 \
        --download-limit 100 --get-timeout 60  --put-timeout 60
    grep /tmp/jfs /etc/fstab
    ls /sbin/mount.juicefs -l
    umount /tmp/jfs
    for i in {1..5}; do
        mount /tmp/jfs
        wait_command_success "ps -ef | grep juicefs | grep /tmp/jfs | grep -v grep | wc -l" 2
        # cat /tmp/jfs/.config
    done
}

prepare_test(){
    umount_jfs /tmp/jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs || true
}

kill_child_process()
{
    echo "kill_child_process"
    child_pid=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | awk '$3 != 1 {print $2}')
    kill $child_pid
}

force_kill_child_process()
{
    echo "force_kill_child_process"
    child_pid=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | awk '$3 != 1 {print $2}')
    kill -9 $child_pid
}


kill_parent_process()
{
    echo "kill_parent_process"
    parent_pid=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | awk '$3 == 1 {print $2}')
    kill $parent_pid
}

wait_process_started()
{   
    echo "wait_process_to_start $1"
    wait_seconds=15
    for i in $(seq 1 $wait_seconds); do
        if check_process_is_alive ; then
            echo "mount process is started"
            break
        fi
        if [ $i -eq $wait_seconds ]; then
            ps -ef | grep "juicefs" | grep "mount" | grep -v grep 
            echo "mount process is not started after $wait_seconds"
            exit 1
        fi
        echo "wait process to start" && sleep 1
    done
}

check_process_is_alive()
{   
    echo >&2 "check_process_is_alive $1"
    count=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | wc -l)
    if [ $count -ne 2 ]; then
        ps -ef | grep "juicefs" | grep -v "grep"
        echo >&2 "mount process is not equal 2"
        return 1
    fi
    child_count=$(ps -ef | grep "juicefs" | grep  "mount" | grep -v grep | awk '$3 != 1 {print $2}' | wc -l)
    if [[ $child_count -ne 1 ]]; then
        ps -ef | grep "juicefs" | grep -v "grep"
        echo >&2 "mount child process is not equal 1"
        return 1
    fi
    parent_count=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | awk '$3 == 1 {print $2}' | wc -l)
    if [ $parent_count -ne 1 ]; then
        ps -ef | grep "juicefs" | grep -v "grep"
        echo >&2 "mount parent process is not equal 1"
        return 1
    fi
    ppid1=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | awk '$3 == 1 {print $2}')
    ppid2=$(ps -ef | grep "juicefs" | grep "mount" | grep -v grep | awk '$3 != 1 {print $3}')
    if [ $ppid1 -ne $ppid2 ]; then
        ps -ef | grep "juicefs" | grep "mount" | grep -v "grep"
        echo >&2 "mount parent process is not equal child process's ppid"
        return 1
    fi
}


source .github/scripts/common/run_test.sh && run_test $@

================================================
FILE: .github/scripts/command/info.sh
================================================
#!/bin/bash -e

sudo dpkg -s redis-tools || sudo .github/scripts/apt_install.sh redis-tools
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

test_info_big_file(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    dd if=/dev/zero of=/jfs/bigfile bs=1M count=4096
    ./juicefs info /jfs/bigfile
    ./juicefs rmr /jfs/bigfile
    df -h /jfs
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/interface.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

test_list_large_dir()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    local files_count=100000
    if [[ "$META_URL" == redis://* ]]; then
        files_count=1300000
    fi
    ./juicefs mdtest $META_URL /test --depth 0 --dirs 1 --files $files_count --threads 1
    du /jfs/test & du_pid=$!
    sleep 2
    kill -INT $du_pid || true
    wait $du_pid || true
    if ! [ -d "/jfs/test" ]; then
        echo >&2 "<FATAL>: directory /jfs/test is not accessible after ls interruption"
        exit 1
    fi
}

test_deep_nested_dirs() {
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    dir="/juicefs1/test"
    for i in $(seq 1 100); do
        dir="$dir/dir$i"
        mkdir -p "$dir"
        echo "content$i" > "$dir/file$i"
    done
    max_jobs=10
    for i in $(seq 1 50); do
        nested_dir="/juicefs1/test"
        for j in $(seq 1 $i); do
            nested_dir="$nested_dir/dir$j"
        done
        ls "$nested_dir" > /dev/null 2>&1 &
        if (( $(jobs -p | wc -l) >= max_jobs )); then
            wait -n
        fi
    done
    wait
    file_count=$(find /juicefs1/test -type f | wc -l)
    if [[ $file_count -ne 100 ]]; then
        echo "File number error： $file_count"
        return 1
    fi
    for i in $(seq 1 100); do
        nested_dir="/juicefs1/test"
        for j in $(seq 1 $i); do
            nested_dir="$nested_dir/dir$j"
        done
        expected_content="content$i"
        actual_content=$(cat "$nested_dir/file$i" 2>/dev/null)
        if [[ "$actual_content" != "$expected_content" ]]; then
            echo "expect: '$expected_content'，actual: '$actual_content'"
            return 1
        fi
    done
    return 0
}


source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/mount.sh
================================================
#!/bin/bash -e

source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

test_sort_dir(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --sort-dir
    
    for i in {1..1000}; do
        touch "/jfs/file_$i"
    done
        mkdir -p /jfs/subdir
    for i in {1..1000}; do
        touch "/jfs/subdir/file_$i"
    done    
    ls -lh /jfs > /tmp/sorted_no_u
    ls -U -lh /jfs > /tmp/sorted_with_u
    diff /tmp/sorted_no_u /tmp/sorted_with_u
    
    ls -lh /jfs/subdir > /tmp/subdir_sorted_no_u
    ls -U -lh /jfs/subdir > /tmp/subdir_sorted_with_u
    diff /tmp/subdir_sorted_no_u /tmp/subdir_sorted_with_u    
    rm -f /tmp/sorted_*
    rm -f /tmp/subdir_sorted_*
}

measure_lookup_time() {
    local start_time end_time elapsed
    start_time=$(date +%s.%N)
    for file in "${FILE_LIST[@]}"; do
        if [[ -e "$file" ]]; then
            echo "Error: $file exists!" >&2
            exit 1
        fi
    done
    end_time=$(date +%s.%N)
    elapsed=$(echo "$end_time - $start_time" | bc)
    echo "$elapsed"
}

test_negative_dir(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --negative-entry-cache 5
    TEST_DIR="/jfs/test_dir_$$"
    mkdir -p "${TEST_DIR}"

    FILE_LIST=()
    for i in {1..1000}; do
      FILE_LIST+=("${TEST_DIR}/nonexistent_file_$(printf "%04d" $i)")
    done
    echo -e "\n=== First lookup (uncached) ==="
    time1=$(measure_lookup_time)
    echo "Time taken: ${time1} seconds"
    echo -e "\n=== Second lookup (cached) ==="
    time2=$(measure_lookup_time)
    echo "Time taken: ${time2} seconds"
    echo -e "\n=== Waiting for cache to expire... ==="
    sleep 6 
    echo -e "\n=== Third lookup (after cache expiry) ==="
    time3=$(measure_lookup_time)
    echo "Time taken: ${time3} seconds"
    echo -e "\n=== Test Result ==="
    if (( $(echo "$time1 > 2 * $time2" | bc -l) )) && \
       (( $(echo "$time3 > 2 * $time2" | bc -l) )) && \
       (( $(echo "$time1 - $time3 < 0.5" | bc -l) )); then
        echo "PASS: Caching behavior matches expectations:"
    else
        echo "FAIL: Caching behavior does NOT match expectations:"
        echo "Expected: First ≈ Third > 2 x Second"
        exit 1
    fi
    rm -rf "${TEST_DIR}"
    echo -e "\nTest directory removed: ${TEST_DIR}"
}

test_redis_client_cache()
{
    if [[ "$META" != "redis" ]]; then
        echo "Skip redis client cache test for META=$META"
        return 0
    fi

    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs2 || true
    ./juicefs mount -d $META_URL /jfs2

    mkdir -p /jfs/redis_csc
    for i in {1..100}; do
        echo "v$i" > "/jfs/redis_csc/file_$i"
    done

    wait_command_success "ls /jfs2/redis_csc | wc -l" "100" 30
    echo "cache-sync" > /jfs/redis_csc/shared_file
    wait_command_success "cat /jfs2/redis_csc/shared_file" "cache-sync" 30

    ./juicefs umount /jfs2 || umount -l /jfs2 || true
}

test_check_storage(){
    start_meta_engine $META minio
    prepare_test
    sleep 2
    ./juicefs format $META_URL myjfs --storage minio --bucket http://localhost:9000/test \
        --access-key minioadmin --secret-key minioadmin --compress lz4 --hash-prefix
    docker stop minio
    ./juicefs mount $META_URL /tmp/jfs --check-storage || echo "PASS: Mount failed as expected when storage is not accessible"
    docker start minio
    sleep 2
    ./juicefs mount $META_URL /tmp/jfs -d
    ./juicefs umount /tmp/jfs
    docker stop minio && docker rm minio
}

test_capabilities()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --enable-xattr --enable-cap
    cp /bin/ls /jfs/test_ls
    cp /bin/ping /jfs/test_ping
    chmod +x /jfs/test_ls /jfs/test_ping
    setcap "cap_net_raw+ep" /jfs/test_ping
    setcap "cap_dac_override+ep" /jfs/test_ls
    sleep 1
    getcap /jfs/test_ping | grep -E "cap_net_raw[+=]ep" || {
        echo "FAIL: capability not set correctly on test_ping"
        exit 1
    }
    getcap /jfs/test_ls | grep -E "cap_dac_override[+=]ep" || {
        echo "FAIL: capability not set correctly on test_ls"
        exit 1
    }
    capsh --print | grep "Current:" || {
        echo "FAIL: cannot get current capabilities"
        exit 1
    }
    setcap -r /jfs/test_ping
    setcap -r /jfs/test_ls
    getcap /jfs/test_ping | grep -E "cap_net_raw[+=]ep" && {
        echo "FAIL: capability not removed from test_ping"
        exit 1
    }
    getcap /jfs/test_ls | grep -E "cap_dac_override[+=]ep" && {
        echo "FAIL: capability not removed from test_ls"
        exit 1
    }
    rm -f /jfs/test_ls /jfs/test_ping
    echo "PASS: Capabilities test completed successfully"
}

test_all_squash()
{
    prepare_test
   ./juicefs format $META_URL myjfs
   ./juicefs mount -d $META_URL /jfs --all-squash 1101:1101
    mkdir -p /jfs/test_dir
    touch /jfs/test_dir/test_file
    uid1=$(stat -c %u /jfs/test_dir)
    gid1=$(stat -c %g /jfs/test_dir)
    uid2=$(stat -c %u /jfs/test_dir/test_file)
    gid2=$(stat -c %g /jfs/test_dir/test_file)
    if [[ "$uid1" != "1101" ]] || [[ "$gid1" != "1101" ]] || [[ "$uid2" != "1101" ]] || [[ "$gid2" != "1101" ]]; then
        echo >&2 "<FATAL>: uid/gid does not same as squash: uid1: $uid1, uid2: $uid2, gid1: $gid1, gid2: $gid2"
        exit 1
    fi
}

test_umask()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --umask 0027

    mkdir -p /jfs/test_dir
    dir_perms=$(stat -c %a /jfs/test_dir)
    if [[ "$dir_perms" != "750" ]]; then
        echo >&2 "<FATAL>: Directory permissions incorrect. Expected: 750, Got: $dir_perms"
        exit 1
    fi
    touch /jfs/test_file
    file_perms=$(stat -c %a /jfs/test_file)
    if [[ "$file_perms" != "640" ]]; then
        echo >&2 "<FATAL>: File permissions incorrect. Expected: 640, Got: $file_perms"
        exit 1
    fi
    touch /jfs/test_dir/nested_file
    nested_perms=$(stat -c %a /jfs/test_dir/nested_file)
    if [[ "$nested_perms" != "640" ]]; then
        echo >&2 "<FATAL>: Nested file permissions incorrect. Expected: 640, Got: $nested_perms"
        exit 1
    fi
    echo "PASS: Umask test completed successfully"
}

test_close_to_open1()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs2 || true
    ./juicefs mount -d $META_URL /jfs2
    file1="/jfs/testfile.tmp"
    file2="/jfs2/testfile.tmp"
    rm $file1 || true
    openssl rand -base64 -out $file1 512000
    sleep 3
    ls -ls $file2
    echo "#########################"
    echo "hello" > $file1
    hex_file2=$(cat $file2 | hexdump -C)
    echo "#########################"
    hex_file2_2=$(cat $file2 | hexdump -C)
    hex_file1=$(cat $file1 | hexdump -C)
    [[ "$hex_file2" != "$hex_file1" ]] && echo "Content of $hex_file2 and $hex_file1 do not match" && exit 1 || true
    [[ "$hex_file2_2" != "$hex_file1" ]] && echo "Content of $hex_file2_2 and $hex_file1 do not match" && exit 1 || true
}

test_colse_to_open2()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs2 || true
    ./juicefs mount -d $META_URL /jfs2
    file1="/jfs/testfile.tmp"
    file2="/jfs2/testfile.tmp"
    rm $file1 || true
    python3 -c "
for i in range(1, 101):
    with open('$file1', 'a') as f:
        f.write(f'{i}\\n')
    with open('$file2', 'a') as f:
        f.write(f'{i}\\n')
"
    line_count1=$(cat $file1 | wc -l)
    line_count2=$(cat $file2 | wc -l)
    [[ $line_count1 -ne 200 ]] && cat $file1 && echo "Error: $file1 should have 200 lines but has $line_count1" && exit 1 || true
    [[ $line_count2 -ne 200 ]] && cat $file2 && echo "Error: $file2 should have 200 lines but has $line_count2" && exit 1 || true
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/quota.sh
================================================
#!/bin/bash -e

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

HEARTBEAT_INTERVAL=3
HEARTBEAT_SLEEP=3
DIR_QUOTA_FLUSH_INTERVAL=4
VOLUME_QUOTA_FLUSH_INTERVAL=2
source .github/scripts/common/common.sh

test_total_capacity()
{
    prepare_test
    ./juicefs format $META_URL myjfs --capacity 1
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL --debug
    dd if=/dev/zero of=/jfs/test1 bs=1G count=1
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/test1 2>error.log && echo "echo should fail on out of space" && exit 1 || true
    grep "No space left on device" error.log
    ./juicefs config $META_URL --capacity 2
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/jfs/test2 bs=1G count=1
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/test2 2>error.log && echo "echo should fail on out of space" && exit 1 || true
    grep "No space left on device" error.log

    rm /jfs/test1 -rf
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/test3 2>error.log && echo "echo should fail on out of space" && exit 1 || true

    ./juicefs rmr /jfs/.trash
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/test3 

    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    ln /jfs/test2 /jfs/test4
    ln /jfs/test2 /jfs/test5
}

test_total_inodes(){
    prepare_test
    ./juicefs format $META_URL myjfs --inodes 1000
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    set +x
    for i in {1..1000}; do
        echo $i | tee /jfs/test$i > /dev/null
    done
    set -x
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee /jfs/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep "No space left on device" error.log
    ./juicefs config $META_URL --inodes 2000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1001..2000}; do
        echo $i | tee /jfs/test$i > /dev/null || (df -i /jfs && ls /jfs/ -l | wc -l  && exit 1)
    done
    set -x
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee /jfs/test2001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
}

test_nested_dir(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    file_count=1000
    mkdir -p /jfs/d1/{d1,d2,d3,d4,d5,d6}/{d1,d2,d3,d4,d5,d6}/{d1,d2,d3,d4,d5,d6}
    dir_count=$(find /jfs/d1 -type d | wc -l)
    echo "dir_count: $dir_count"
    ./juicefs quota set $META_URL --path /d1 --inodes $((file_count+dir_count-1))
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    for i in $(seq 1 $file_count); do
        subdir=$(find /jfs/d1/ -type d | shuf -n 1)
        echo "touch $subdir/test$i" && touch $subdir/test$i
    done
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    subdir=$(find /jfs/d1/ -type d | shuf -n 1)
    touch $subdir/test 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)

    ./juicefs quota set $META_URL --path /d1 --inodes $((file_count+dir_count))
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    subdir=$(find /jfs/d1/ -type d | shuf -n 1)
    touch $subdir/test
}

test_remove_and_restore(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/jfs/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota get $META_URL --path /d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    echo a | tee -a /jfs/d/test1 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)

    echo "remove test1" && rm /jfs/d/test1 -rf
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota get $META_URL --path /d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "0%" ]] && echo "used should be 0%" && exit 1 || true

    trash_dir=$(ls /jfs/.trash)
    ./juicefs restore $META_URL $trash_dir --put-back
    ./juicefs quota get $META_URL --path /d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    echo a | tee -a /jfs/d/test1 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)

    echo "remove test1" && rm /jfs/d/test1 -rf
    dd if=/dev/zero of=/jfs/d/test2 bs=1M count=1
    trash_dir=$(ls /jfs/.trash)
    ./juicefs restore $META_URL $trash_dir --put-back 2>&1 | tee restore.log
    grep "disk quota exceeded" restore.log || (echo "check restore log failed" && exit 1)
}

test_dir_capacity(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/jfs/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota get $META_URL --path /d
    used=$(./juicefs quota get $META_URL --path /d 2>&1 | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    echo a | tee -a /jfs/d/test1 2>error.log && echo "echo should fail on out of space" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)

    ./juicefs quota set $META_URL --path /d --capacity 2
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/jfs/d/test2 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/d/test2 2>error.log && echo "echo should fail on out of space" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm -rf /jfs/d/test1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    used=$(./juicefs quota get $META_URL --path /d 2>&1 | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "50%" ]] && echo "used should be 50%" && exit 1 || true
    dd if=/dev/zero of=/jfs/d/test3 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota check $META_URL --path /d --strict
}

test_dir_inodes(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --inodes 1000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1..1000}; do
        echo $i > /jfs/d/test$i > /dev/null
    done
    set -x
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee /jfs/d/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm -rf error.log
    ./juicefs quota set $META_URL --path /d --inodes 2000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1001..2000}; do
        echo $i | tee  /jfs/d/test$i > /dev/null
    done
    set -x
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee  /jfs/d/test2001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm /jfs/d/test1 -rf
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee  /jfs/d/test2001
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota check $META_URL --path /d --strict
}

test_sub_dir(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --inodes 1000 --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    umount_jfs /jfs $META_URL
    ./juicefs mount -d $META_URL --subdir /d /jfs --heartbeat 2
    size=$(df -h /jfs | grep "JuiceFS" | awk '{print $2}')
    [[ $size != "1.0G" ]] && echo "size should be 1.0G" && exit 1 || true
    inodes=$(df -ih /jfs | grep "JuiceFS" | awk '{print $2}')
    [[ $inodes != "1000" ]] && echo "inodes should be 1000" && exit 1 || true
    dd if=/dev/zero of=/jfs/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/test1 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm /jfs/test1 -rf
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    set +x
    for i in {1..1000}; do
        echo $i | tee /jfs/test$i > /dev/null
    done
    set -x
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo $i | tee /jfs/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    ./juicefs quota check $META_URL --path /d --strict
}

test_dump_load(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --inodes 1000 --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    ./juicefs dump --log-level error $META_URL --fast > dump.json
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    ./juicefs load $META_URL dump.json
    ./juicefs mount $META_URL /jfs -d --heartbeat 5
    dd if=/dev/zero of=/jfs/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee -a /jfs/d/test1 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm /jfs/d/test1 -rf
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    set +x
    for i in {1..1000}; do
        echo $i | tee /jfs/d/test$i > /dev/null
    done
    set -x
    sleep 3s
    echo a | tee /jfs/d/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    ./juicefs quota check $META_URL --path /d --strict
}

test_hard_link(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    dd if=/dev/zero of=/jfs/file bs=1G count=1
    ./juicefs quota set $META_URL --path /d --capacity 2
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/jfs/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ln /jfs/file /jfs/d/test2
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ln /jfs/file /jfs/d/test3 2>error.log && echo "hard link should fail on out of space" && exit 1 || true
    grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota check $META_URL --path /d --strict
}

test_check_and_repair_quota(){
    prepare_test
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /jfs/d
    ./juicefs quota set $META_URL --path /d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/jfs/d/test1 bs=1G count=1
    pid=$(ps -ef | grep "juicefs mount" | grep -v grep | awk '{print $2}')
    kill -9 $pid
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    # ./juicefs quota check $META_URL --path /d --strict && echo "quota check should fail" && exit 1 || true
    ./juicefs quota check $META_URL --path /d --strict --repair
    ./juicefs quota check $META_URL --path /d --strict
}

wait_until()
{   
    key=$1
    value=$2
    echo "wait until $key becomes $value"
    wait_seconds=15
    for i in $(seq 1 $wait_seconds); do
        if [ "$key" == "ifree" ]; then
            expect_value=$(df -ih /jfs | grep JuiceFS | awk '{print $4}')
        elif [ "$key" == "avail_size" ]; then
            expect_value=$(df h /jfs | grep JuiceFS | awk '{print $4}')
        fi
        if [ "$expect_value" == "$value" ]; then
            echo "$key becomes $value" && return 0
        fi
        echo "wait until $key becomes $value" && sleep 1s
    done
    echo "wait until $key becomes $value failed after $wait_seconds seconds" && exit 1
}

prepare_ug_quota_test()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs config $META_URL --user-group-quota
    ./juicefs mount -d $META_URL /jfs --heartbeat $HEARTBEAT_INTERVAL
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
}

resolve_test_users()
{
    if [[ -n "$TEST_USER_1" ]] && [[ -n "$TEST_USER_2" ]]; then
        return 0
    fi

    TEST_USER_1=""
    TEST_USER_2=""

    for candidate in nobody daemon bin; do
        if id "$candidate" >/dev/null 2>&1; then
            candidate_uid=$(id -u "$candidate")
            candidate_gid=$(id -g "$candidate")
            if [[ "$candidate_uid" == "0" ]] || [[ "$candidate_gid" == "0" ]]; then
                continue
            fi
            if [[ -z "$TEST_USER_1" ]]; then
                TEST_USER_1="$candidate"
                TEST_UID_1=$candidate_uid
                TEST_GID_1=$candidate_gid
            elif [[ "$candidate_uid" != "$TEST_UID_1" ]]; then
                TEST_USER_2="$candidate"
                TEST_UID_2=$candidate_uid
                TEST_GID_2=$candidate_gid
                break
            fi
        fi
    done
    create_temp_user()
    {
        idx=$1
        if ! command -v useradd >/dev/null 2>&1; then
            return 1
        fi
        name="jfs-quota-test-${idx}-${RANDOM}"
        if ! useradd -M -s /usr/sbin/nologin "$name" >/dev/null 2>&1; then
            return 1
        fi
        uid=$(id -u "$name" 2>/dev/null || echo 0)
        gid=$(id -g "$name" 2>/dev/null || echo 0)
        if [[ "$uid" == "0" ]] || [[ "$gid" == "0" ]]; then
            userdel -f "$name" >/dev/null 2>&1 || true
            return 1
        fi
        echo "$name:$uid:$gid"
        return 0
    }

    if [[ -z "$TEST_USER_1" ]] || [[ -z "$TEST_USER_2" ]]; then
        if [[ "$(id -u)" != "0" ]]; then
            echo "cannot find two non-root users for user/group quota tests"
            return 1
        fi
        for i in 1 2 3 4; do
            info=$(create_temp_user "$i") || continue
            name=$(echo "$info" | cut -d: -f1)
            uid=$(echo "$info" | cut -d: -f2)
            gid=$(echo "$info" | cut -d: -f3)
            if [[ -z "$TEST_USER_1" ]]; then
                TEST_USER_1="$name"
                TEST_UID_1=$uid
                TEST_GID_1=$gid
            elif [[ -z "$TEST_USER_2" ]] && [[ "$uid" != "$TEST_UID_1" ]]; then
                TEST_USER_2="$name"
                TEST_UID_2=$uid
                TEST_GID_2=$gid
                break
            fi
        done
    fi

    if [[ -z "$TEST_USER_1" ]] || [[ -z "$TEST_USER_2" ]]; then
        echo "cannot find two non-root users for user/group quota tests"
        return 1
    fi

    echo "test users: $TEST_USER_1($TEST_UID_1:$TEST_GID_1), $TEST_USER_2($TEST_UID_2:$TEST_GID_2)"
}

run_as_user_cmd()
{
    user=$1
    shift
    cmd="$*"

    if [[ "$(id -un)" == "$user" ]]; then
        bash -c "$cmd"
        return $?
    fi

    if command -v sudo >/dev/null 2>&1; then
        sudo -n -u "$user" bash -c "$cmd" && return 0 || true
    fi

    if command -v runuser >/dev/null 2>&1; then
        runuser -u "$user" -- bash -c "$cmd" && return 0 || true
    fi

    if command -v su >/dev/null 2>&1; then
        su -s /bin/bash "$user" -c "$cmd" && return 0 || true
    fi

    echo "cannot run command as user $user"
    return 1
}

set_quota_by_username()
{
    username=$1
    capacity=$2
    inodes=$3
    uid=$(id -u "$username")
    ./juicefs quota set $META_URL --uid "$uid" --capacity "$capacity" --inodes "$inodes"
}

test_user_group_quota_set_get_list_delete(){
    prepare_ug_quota_test
    resolve_test_users || return 0

    ./juicefs quota set $META_URL --uid "$TEST_UID_1" --capacity 1 --inodes 20
    ./juicefs quota set $META_URL --gid "$TEST_GID_1" --capacity 1 --inodes 20
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    ./juicefs quota get $META_URL --uid "$TEST_UID_1" 2>&1 | tee uid_quota.log
    grep "UID:$TEST_UID_1" uid_quota.log || (echo "uid quota should exist" && exit 1)

    ./juicefs quota get $META_URL --gid "$TEST_GID_1" 2>&1 | tee gid_quota.log
    grep "GID:$TEST_GID_1" gid_quota.log || (echo "gid quota should exist" && exit 1)

    ./juicefs quota list $META_URL 2>&1 | tee quota_list.log
    grep "UID:$TEST_UID_1" quota_list.log || (echo "uid quota should be listed" && exit 1)
    grep "GID:$TEST_GID_1" quota_list.log || (echo "gid quota should be listed" && exit 1)

    ./juicefs quota delete $META_URL --uid "$TEST_UID_1"
    ./juicefs quota delete $META_URL --gid "$TEST_GID_1"
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    ./juicefs quota list $META_URL 2>&1 | tee quota_list_after_delete.log
    grep "UID:$TEST_UID_1" quota_list_after_delete.log && echo "uid quota should be deleted" && exit 1 || true
    grep "GID:$TEST_GID_1" quota_list_after_delete.log && echo "gid quota should be deleted" && exit 1 || true
}

test_uid_quota_check_on_write_and_truncate(){
    prepare_ug_quota_test
    resolve_test_users || return 0

    mkdir -p /jfs/uidq
    chmod 777 /jfs/uidq

    ./juicefs quota set $META_URL --uid "$TEST_UID_2" --capacity 1 --inodes 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))

    run_as_user_cmd "$TEST_USER_2" "touch /jfs/uidq/inode1"
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    run_as_user_cmd "$TEST_USER_2" "touch /jfs/uidq/inode2" 2>error.log && echo "second inode should fail for uid quota" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "uid inode quota check failed" && exit 1)

    ./juicefs quota set $META_URL --uid "$TEST_UID_2" --capacity 1 --inodes 10
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    rm -f /jfs/uidq/inode1
    sleep $DIR_QUOTA_FLUSH_INTERVAL

    run_as_user_cmd "$TEST_USER_2" "truncate -s 900M /jfs/uidq/space1"
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    run_as_user_cmd "$TEST_USER_2" "truncate -s 1100M /jfs/uidq/space1" 2>error.log && echo "truncate should fail for uid capacity quota" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "uid capacity quota check failed" && exit 1)
}

test_gid_quota_check_on_write(){
    prepare_ug_quota_test
    resolve_test_users || return 0

    mkdir -p /jfs/gidq
    chmod 777 /jfs/gidq

    ./juicefs quota set $META_URL --gid "$TEST_GID_2" --inodes 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))

    run_as_user_cmd "$TEST_USER_2" "touch /jfs/gidq/file1"
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    run_as_user_cmd "$TEST_USER_2" "touch /jfs/gidq/file2" 2>error.log && echo "second inode should fail for gid quota" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "gid inode quota check failed" && exit 1)
}

test_chown_transfer_user_group_quota(){
    prepare_ug_quota_test
    resolve_test_users || return 0

    mkdir -p /jfs/chownq
    chmod 777 /jfs/chownq

    ./juicefs quota set $META_URL --uid "$TEST_UID_1" --inodes 1
    ./juicefs quota set $META_URL --uid "$TEST_UID_2" --inodes 1
    ./juicefs quota set $META_URL --gid "$TEST_GID_1" --inodes 1
    ./juicefs quota set $META_URL --gid "$TEST_GID_2" --inodes 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))

    run_as_user_cmd "$TEST_USER_1" "touch /jfs/chownq/src_file"
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    run_as_user_cmd "$TEST_USER_1" "touch /jfs/chownq/src_file2" 2>error.log && echo "user1 should exceed inode quota before chown" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "user1 pre-chown quota check failed" && exit 1)

    chown "$TEST_UID_2:$TEST_GID_2" /jfs/chownq/src_file
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    run_as_user_cmd "$TEST_USER_1" "touch /jfs/chownq/src_file2"
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    run_as_user_cmd "$TEST_USER_2" "touch /jfs/chownq/dst_file" 2>error.log && echo "user2 should exceed inode quota after chown transfer" && exit 1 || true
    grep -i "Disk quota exceeded" error.log || (echo "user2 post-chown quota check failed" && exit 1)
}

test_set_quota_by_username(){
    prepare_ug_quota_test
    resolve_test_users || return 0

    set_quota_by_username "$TEST_USER_2" 1 10
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    uid=$(id -u "$TEST_USER_2")
    ./juicefs quota get $META_URL --uid "$uid" 2>&1 | tee username_quota.log
    grep "UID:$uid" username_quota.log || (echo "quota set by username should be visible in uid quota" && exit 1)

    ./juicefs quota list $META_URL 2>&1 | tee username_quota_list.log
    grep "UID:$uid" username_quota_list.log || (echo "quota set by username should be listed" && exit 1)
}

test_quota_list_uid_filter_regression(){
    prepare_ug_quota_test
    resolve_test_users || return 0

    ./juicefs quota set $META_URL --uid "$TEST_UID_1" --capacity 1 --inodes 3
    ./juicefs quota set $META_URL --uid "$TEST_UID_2" --capacity 1 --inodes 7
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))

    ./juicefs quota list $META_URL --uid "$TEST_UID_1" 2>&1 | tee uid_filter_1.log
    grep "UID:$TEST_UID_1" uid_filter_1.log || (echo "uid filter should show requested uid quota" && exit 1)
    grep "UID:$TEST_UID_2" uid_filter_1.log && echo "uid filter should not include other uid quota" && exit 1 || true
    uid_rows=$(grep -c "UID:" uid_filter_1.log || true)
    [[ "$uid_rows" -ne 1 ]] && echo "uid filter should only return one UID row" && exit 1 || true
    inodes_value=$(grep "UID:$TEST_UID_1" uid_filter_1.log | head -n1 | awk -F'|' '{gsub(/[[:space:]]/,"",$6); print $6}')
    [[ "$inodes_value" != "3" ]] && echo "uid filter should return uid1 inodes=3" && exit 1 || true

    ./juicefs quota list $META_URL --uid "$TEST_UID_2" 2>&1 | tee uid_filter_2.log
    grep "UID:$TEST_UID_2" uid_filter_2.log || (echo "uid filter should show requested uid quota" && exit 1)
    grep "UID:$TEST_UID_1" uid_filter_2.log && echo "uid filter should not include other uid quota" && exit 1 || true
    uid_rows=$(grep -c "UID:" uid_filter_2.log || true)
    [[ "$uid_rows" -ne 1 ]] && echo "uid filter should only return one UID row" && exit 1 || true
    inodes_value=$(grep "UID:$TEST_UID_2" uid_filter_2.log | head -n1 | awk -F'|' '{gsub(/[[:space:]]/,"",$6); print $6}')
    [[ "$inodes_value" != "7" ]] && echo "uid filter should return uid2 inodes=7" && exit 1 || true
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command/random.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh
[[ -z "$MAX_EXAMPLE" ]] && MAX_EXAMPLE=100
[[ -z "$STEP_COUNT" ]] && STEP_COUNT=50

[[ -z "$META1" ]] && META1=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META1
META_URL1=$(get_meta_url $META1)

[[ -z "$META2" ]] && META2=redis
source .github/scripts/start_meta_engine.sh
start_meta_engine $META2
META_URL2=$(get_meta_url $META2)

prepare_test()
{
    meta_url=$1
    mp=$2
    volume=$3
    shift 3
    options=$@
    umount_jfs $mp $meta_url
    python3 .github/scripts/flush_meta.py $meta_url
    rm -rf /var/jfs/$volume || true
    rm -rf /var/jfsCache/$volume || true
    ./juicefs format $meta_url $volume $options
    ./juicefs mount -d $meta_url $mp
}

test_run_examples()
{
    prepare_test $META_URL1 /tmp/jfs1 myjfs1 --enable-acl --trash-days 0
    prepare_test $META_URL2 /tmp/jfs2 myjfs2 --enable-acl --trash-days 0
    python3 .github/scripts/hypo/command_test.py
}

test_run_all()
{
    prepare_test $META_URL1 /tmp/jfs1 myjfs1
    prepare_test $META_URL2 /tmp/jfs2 myjfs2
    CHECK_NLINK=false MAX_EXAMPLE=$MAX_EXAMPLE STEP_COUNT=$STEP_COUNT python3 .github/scripts/hypo/command.py
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/acl.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common_win.sh

[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1

test_modify_acl_config()
{
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs --trash-days 0
    ./juicefs.exe mount -d $META_URL z:
    touch z:test
    cmd.exe /c "icacls z:\test /grant Everyone:(R,W)" && echo "setfacl should failed" && exit 1
    ./juicefs.exe config $META_URL --enable-acl=true
    ./juicefs.exe umount z:
    ./juicefs.exe mount -d $META_URL z:
    cmd.exe /c "icacls z:\test /grant Everyone:(R,W)"
    ./juicefs.exe config $META_URL --enable-acl=false && echo "should not disable acl" && exit 1 || true 
    ./juicefs.exe config $META_URL | grep EnableACL | grep "true" || (echo "EnableACL should be true" && exit 1) 
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/clone.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common_win.sh


[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1

test_clone_with_jfs_source()
{
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z:
    ls /z
    [[ ! -d /z/juicefs ]] && git clone https://github.com/juicedata/juicefs.git /z/juicefs --depth 1
    ls /z/juicefs
    do_clone true
    echo "test clone without --preserve"
#    do_clone false
}

do_clone()
{
    is_preserve=$1
    cmd.exe /c "taskkill /F /IM git.exe 2>nul || ver>nul"
    cmd.exe /c "rmdir /s /q z:\juicefs1 2>nul || ver>nul"
    cmd.exe /c "rmdir /s /q z:\juicefs2 2>nul || ver>nul"
    sleep 1
    
    [[ "$is_preserve" == "true" ]] && preserve="--preserve" || preserve=""
    cp -r /z/juicefs /z/juicefs1 $preserve
    ./juicefs.exe clone /z/juicefs /z/juicefs2 $preserve
    diff -ur /z/juicefs1 /z/juicefs2 --no-dereference
 #   CURRENT_DIR=$(pwd)
 #   cmd.exe /c "dir /s /b /a z:\juicefs1" > "${CURRENT_DIR}/log1"
 #   cmd.exe /c "dir /s /b /a z:\juicefs2" > "${CURRENT_DIR}/log2"
 #   diff -u "${CURRENT_DIR}/log1" "${CURRENT_DIR}/log2"
 #   rm -f "${CURRENT_DIR}/log1" "${CURRENT_DIR}/log2"
}

test_clone_with_small_files(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z:
    mkdir /z/test
    for i in $(seq 1 2000); do
        echo $i > /z/test/$i
    done
    ./juicefs.exe clone /z/test /z/test1
    diff -ur /z/test1 /z/test1
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/debug.sh
================================================
#!/bin/bash -e

source .github/scripts/common/common_win.sh
[[ -z "$META_URL" ]] && META=redis://127.0.0.1:6379/1


check_debug_file(){
   files=("system-info.log" "juicefs.log" "config.txt" "stats.txt" "stats.5s.txt")
   debug_dir="debug"
   if [ ! -d "$debug_dir" ]; then
    echo "error:no debug dir"
    exit 1
   fi
   all_files_exist=true
   for file in "${files[@]}"; do
     exist=`find "$debug_dir" -name $file | wc -l`
     if [ "$exist" == 0 ]; then
        echo "no $file"
        all_files_exist=false
     fi
   done
   if [ "$all_files_exist" = true ]; then
    echo "pass"
   else
    exit 1
   fi
}

test_debug_juicefs(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs 
    ./juicefs.exe mount -d $META_URL z:
    dd if=/dev/urandom of=/z/bigfile bs=1M count=1024
    ./juicefs.exe debug z:
    check_debug_file
    find debug -print | sed -e 's;[^/]*/;|____;g;s;____|; |;g'
    ./juicefs.exe rmr /z/bigfile
}

test_debug_abnormal_juicefs(){
    rm -rf debug | true
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs 
    ./juicefs.exe mount -d $META_URL z:
    dd if=/dev/urandom of=/z/bigfile bs=1M count=1024
    killall -9 redis-server | true
    ./juicefs.exe debug z:
    check_debug_file
    ./juicefs.exe rmr /z/bigfile
}

source .github/scripts/common/run_test.sh && run_test $@

================================================
FILE: .github/scripts/command-win/dump_load.sh
================================================
#!/bin/bash -ex
source .github/scripts/common/common_win.sh

[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1

[[ -z "$SEED" ]] && SEED=$(date +%s)
HEARTBEAT_INTERVAL=2
DIR_QUOTA_FLUSH_INTERVAL=4
[[ -z "$BINARY" ]] && BINARY=false
[[ -z "$FAST" ]] && FAST=false

trap "echo random seed is $SEED" EXIT

test_dump_load_sustained_file(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs --trash-days 0
    ./juicefs.exe mount -d $META_URL z:
    file_count=100
    for i in $(seq 1 $file_count); do
        touch /z/file$i
        exec {fd}<>/z/file$i
        echo fd is $fd
        fds[$i]=$fd
        rm /z/file$i
    done
    ./juicefs.exe dump $META_URL dump.json $(get_dump_option)
    for i in $(seq 1 $file_count); do
        fd=${fds[$i]}
        exec {fd}>&-
    done
    if [[ "$BINARY" == "true" ]]; then
        sustained=$(./juicefs.exe load dump.json --binary --stat | grep sustained | awk -F"|" '{print $2}')
    else
        sustained=$(jq '.Sustained[].inodes | length' dump.json)
    fi
    echo "sustained file count: $sustained"
    ./juicefs.exe umount z:
    prepare_win_test
    ./juicefs.exe load $META_URL dump.json $(get_load_option)
    ./juicefs.exe mount -d $META_URL z:
}

test_dump_load_with_copy_file_range(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z:
    rm -rf /tmp/test
    dd if=/dev/zero of=/tmp/test bs=1M count=1024
    cp /tmp/test /z/test
    node .github/scripts/copyFile.js /z/test /z/test1
    ./juicefs.exe dump $META_URL dump.json $(get_dump_option)
    ./juicefs.exe umount z:
    redis-cli -h 127.0.0.1 -p 6379 -n 1 FLUSHDB
    ./juicefs.exe load $META_URL dump.json $(get_load_option)
    ./juicefs.exe mount -d $META_URL z:
    compare_md5sum /tmp/test /z/test1
}

test_dump_load_with_quota(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs 
    ./juicefs.exe mount -d $META_URL z: --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /z/d
    ./juicefs.exe quota set $META_URL --path //d --inodes 1000 --capacity 1
    ./juicefs.exe dump --log-level error $META_URL $(get_dump_option) > dump.json
    ./juicefs.exe umount z:
    redis-cli -h 127.0.0.1 -p 6379 -n 1 FLUSHDB
    ./juicefs.exe load $META_URL dump.json $(get_load_option)
    ./juicefs.exe mount $META_URL z: -d --heartbeat $HEARTBEAT_INTERVAL
    ./juicefs.exe quota get $META_URL --path //d
    dd if=/dev/zero of=/z/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    dd if=/dev/zero of=/z/d/test2 bs=1G count=1 2>error.log && echo "write should fail on out of space" && exit 1 || true
}

get_dump_option(){
    if [[ "$BINARY" == "true" ]]; then 
        option="--binary"
    elif [[ "$FAST" == "true" ]]; then
        option="--fast"
    else
        option=""
    fi
    echo $option
}

get_load_option(){
    if [[ "$BINARY" == "true" ]]; then 
        option="--binary"
    else
        option=""
    fi
    echo $option
}

prepare_test(){
    umount_jfs /jfs $META_URL
    umount_jfs /jfs2 sqlite3://test2.db
    python3 .github/scripts/flush_meta.py $META_URL
    rm test2.db -rf 
    rm -rf /var/jfs/myjfs || true
    mc rm --force --recursive myminio/test || true
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/fsck.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common_win.sh


[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1


test_sync_dir_stat()
{
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z:
    ./juicefs.exe mdtest $META_URL //d --depth 15 --dirs 2 --files 100 --threads 10 & 
    pid=$!
    sleep 15s
    kill -9 $pid
    ./juicefs.exe info -r /z/d
    ./juicefs.exe info -r /z/d --strict 
    ./juicefs.exe fsck $META_URL --path //d --sync-dir-stat --repair -r
    ./juicefs.exe info -r /z/d | tee info1.log
    ./juicefs.exe info -r /z/d --strict | tee info2.log
    diff info1.log info2.log
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/gateway.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common_win.sh

[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1


wget https://dl.min.io/client/mc/release/windows-amd64/archive/mc.RELEASE.2021-04-22T17-40-00Z -O mc.exe
chmod +x mc.exe
export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=admin123
export MINIO_REFRESH_IAM_INTERVAL=3s

prepare_test()
{
    kill_gateway 9001 || true
    kill_gateway 9002 || true
    prepare_win_test
}

kill_gateway() {
    port=$1
    for pid in $(netstat -ano | findstr ":$port" | findstr "LISTENING" | awk '{print $5}'); do
        taskkill //F //PID $pid
    done
}

trap 'kill_gateway 9001; kill_gateway 9002' EXIT

start_two_gateway()
{
    prepare_test
    ./juicefs.exe format $META_URL myjfs  --trash-days 0
    ./juicefs.exe mount -d $META_URL z:
    export MINIO_ROOT_USER=admin
    export MINIO_ROOT_PASSWORD=admin123
    nohup ./juicefs.exe gateway $META_URL 127.0.0.1:9001 --multi-buckets --keep-etag --object-tag --log=gateway1.log &
    sleep 1
    nohup ./juicefs.exe gateway $META_URL 127.0.0.1:9002 --multi-buckets --keep-etag --object-tag --log=gateway2.log &
    sleep 2
    ./mc.exe alias set gateway1 http://127.0.0.1:9001 admin admin123
    ./mc.exe alias set gateway2 http://127.0.0.1:9002 admin admin123
}

test_user_management()
{
    prepare_test
    start_two_gateway
    ./mc.exe admin user add gateway1 user1 admin123
    sleep 5
    user=$(./mc.exe admin user list gateway2 | grep user1) || true
    if [ -z "$user" ]
    then
      echo "user synchronization error"
      exit 1
    fi
    ./mc.exe mb gateway1/test1
    ./mc.exe alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    if ./mc.exe cp mc.exe gateway1_user1/test1/file1
    then
      echo "By default, the user has no read and write permission"
      exit 1
    fi
    ./mc.exe admin policy set gateway1 readwrite user=user1
    if ./mc.exe cp mc.exe gateway1_user1/test1/file1
    then 
      echo "readwrite policy can read and write objects" 
    else
      echo "set readwrite policy fail"
      exit 1
    fi
    ./mc.exe cp gateway2/test1/file1 .
    compare_md5sum file1 mc.exe
    ./mc.exe admin user disable gateway1 user1
    ./mc.exe admin user remove gateway2 user1
    sleep 5
    user=$(./mc.exe admin user list gateway1 | grep user1) || true
    if [ ! -z "$user" ]
    then
      echo "remove user user1 fail"
      echo $user
      exit 1
    fi
}

test_group_management()
{
    prepare_test
    start_two_gateway
    ./mc.exe admin user add gateway1 user1 admin123
    ./mc.exe admin user add gateway1 user2 admin123
    ./mc.exe admin user add gateway1 user3 admin123
    ./mc.exe admin group add gateway1 testcents user1 user2 user3
    result=$(./mc.exe admin group info gateway1 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
    ./mc.exe admin policy set gateway1 readwrite group=testcents
    sleep 5
    ./mc.exe alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    ./mc.exe mb gateway1/test1
    if ./mc.exe cp mc.exe gateway1_user1/test1/file1
    then
      echo "readwrite policy can read write"
    else
      echo "the readwrite group has no read and write permission"
      exit 1
    fi
    ./mc.exe admin policy set gateway1 readonly group=testcents
    sleep 5
    if ./mc.exe cp mc.exe gateway1_user1/test1/file1
    then
      echo "readonly group policy can not write"
      exit 1
    else
      echo "the readonly group has no write permission"
    fi

    ./mc.exe admin group remove gateway1 testcents user1 user2 user3 
    ./mc.exe admin group remove gateway1 testcents
}

test_mult_gateways_set_group()
{
    prepare_test
    start_two_gateway
    ./mc.exe admin user add gateway1 user1 admin123
    ./mc.exe admin user add gateway1 user2 admin123
    ./mc.exe admin user add gateway1 user3 admin123
    ./mc.exe admin group add gateway1 testcents user1 user2 user3
    ./mc.exe admin group disable gateway2 testcents
    sleep 5
    result=$(./mc.exe admin group info gateway2 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
    ./mc.exe admin group enable gateway1 testcents
    ./mc.exe admin user add gateway1 user4 admin123
    ./mc.exe admin group add gateway1 testcents user4
    sleep 1
    ./mc.exe admin group disable gateway2 testcents
    sleep 5
    result=$(./mc.exe admin group info gateway2 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3,user4" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
}

test_user_svcacct_add()
{
    prepare_test
    start_two_gateway
    ./mc.exe admin user add gateway1 user1 admin123
    ./mc.exe admin policy set gateway1 consoleAdmin user=user1
    ./mc.exe alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    ./mc.exe admin user svcacct add gateway1_user1 user1 --access-key 12345678 --secret-key 12345678
    ./mc.exe admin user svcacct info gateway1_user1 12345678
    ./mc.exe admin user svcacct set gateway1_user1 12345678 --secret-key 123456789
    ./mc.exe alias set svcacct1 http://127.0.0.1:9001 12345678 123456789
    ./mc.exe mb svcacct1/test1
    if ./mc.exe cp mc.exe svcacct1/test1/file1
    then
      echo "svcacct user consoleAdmin policy can read write"
    else
      echo "the svcacct user has no read and write permission"
      exit 1
    fi
    ./mc.exe admin user svcacct disable gateway1_user1 12345678
    ./mc.exe admin user svcacct rm gateway1_user1 12345678
}

test_user_admin_svcacct_add()
{
    prepare_test
    start_two_gateway
    ./mc.exe admin user add gateway1 user1 admin123
    ./mc.exe admin policy set gateway1 readwrite user=user1
    ./mc.exe admin user svcacct add gateway1 user1 --access-key 12345678 --secret-key 12345678
    ./mc.exe admin user svcacct info gateway1 12345678
    ./mc.exe admin user svcacct set gateway1 12345678 --secret-key 12345678910
    ./mc.exe alias set svcacct1 http://127.0.0.1:9001 12345678 12345678910
    ./mc.exe mb svcacct1/test1
    if ./mc.exe cp mc.exe svcacct1/test1/file1
    then
      echo "amdin user can do svcacct "
    else
      echo "the svcacct user has no read and write permission"
      exit 1
    fi
    ./mc.exe admin user svcacct disable gateway1 12345678
    ./mc.exe admin user svcacct rm gateway1 12345678
}

test_user_sts()
{
    prepare_test
    start_two_gateway
    ./mc.exe admin user add gateway1 user1 admin123
    ./mc.exe admin policy set gateway1 consoleAdmin user=user1
    ./mc.exe alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    git clone https://github.com/juicedata/minio.git -b gateway-1.1
    ./mc.exe mb gateway1_user1/test1
    ./mc.exe cp mc.exe gateway1_user1/test1/mc
    cd minio
    go run docs/sts/assume-role.go -sts-ep http://127.0.0.1:9001 -u user1 -p admin123 -b test1 -d
    go run docs/sts/assume-role.go -sts-ep http://127.0.0.1:9001 -u user1 -p admin123 -b test1
    cd -
    ./mc.exe admin user remove gateway1 user1     
}


test_change_credentials()
{
    prepare_test
    start_two_gateway
    ./mc.exe mb gateway1/test1
    ./mc.exe cp mc.exe gateway1/test1/file1
    kill_gateway 9001 || true
    kill_gateway 9002 || true
    export MINIO_ROOT_USER=newadmin
    export MINIO_ROOT_PASSWORD=newadmin123
    export MINIO_ROOT_USER_OLD=admin
    export MINIO_ROOT_PASSWORD_OLD=admin123
    nohup ./juicefs.exe gateway $META_URL 127.0.0.1:9001 --multi-buckets --keep-etag --object-tag --log=gateway1.log &
    nohup ./juicefs.exe gateway $META_URL 127.0.0.1:9002 --multi-buckets --keep-etag --object-tag --log=gateway2.log &
    sleep 5
    ./mc.exe alias set gateway1 http://127.0.0.1:9001 newadmin newadmin123
    ./mc.exe alias set gateway2 http://127.0.0.1:9002 newadmin newadmin123
    ./mc.exe cp gateway1/test1/file1 file1
    ./mc.exe cp gateway2/test1/file1 file2
    compare_md5sum file1 mc.exe
    compare_md5sum file2 mc.exe  
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/gc.sh
================================================
#!/bin/bash -e

source .github/scripts/common/common_win.sh
[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1


test_delay_delete_slice_after_compaction(){
    if [[ "$META_URL" != redis* ]]; then
        echo "this test only runs for redis meta engine"
        return
    fi
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs --trash-days 1
    ./juicefs.exe mount -d $META_URL z: --no-usage-report
    redis-cli save
    # don't skip files when gc compact
    export JFS_SKIPPED_TIME=1
    ./juicefs.exe gc --compact --delete $META_URL
    ./juicefs.exe fsck $META_URL
}

test_gc_trash_slices(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z: --no-usage-report
    PATH1=test PATH2=z:\\test python3 .github/scripts/random_read_write.py 
    ./juicefs.exe status --more $META_URL
    ./juicefs.exe config $META_URL --trash-days 0 --yes
    ./juicefs.exe gc $META_URL 
    ./juicefs.exe gc $META_URL --delete
    ./juicefs.exe status --more $META_URL
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/profile.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common_win.sh


[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1


test_profile()
{
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z:
    ./juicefs.exe mdtest $META_URL //d --depth 3 --dirs 3 --files 10 --threads 5 
    timeout 5s ./juicefs profile /z/.accesslog || EXIT_CODE=$?
    if [ "$EXIT_CODE" = "124" ]; then
        echo "juicefs profile success"
    else
        echo "juicefs profile failed"
        exit 1
    fi
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/command-win/quota.sh
================================================
#!/bin/bash -e

[[ -z "$META_URL" ]] && META_URL=redis://127.0.0.1:6379/1

HEARTBEAT_INTERVAL=3
HEARTBEAT_SLEEP=3
DIR_QUOTA_FLUSH_INTERVAL=4
VOLUME_QUOTA_FLUSH_INTERVAL=2
source .github/scripts/common/common_win.sh

test_total_capacity()
{
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs --capacity 1
    ./juicefs.exe mount -d $META_URL z: --heartbeat $HEARTBEAT_INTERVAL --debug
    dd if=/dev/zero of=/z/test1 bs=1G count=1
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    dd if=/dev/zero of=/z/test2 bs=1G count=1  && echo "dd should fail on out of space" && exit 1 || true
    rm /z/test1 -rf
    ./juicefs.exe rmr /z/.trash
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    dd if=/dev/zero of=/z/test2 bs=104857600 count=1
}

test_total_inodes(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs --inodes 1000
    ./juicefs.exe mount -d $META_URL z: --heartbeat $HEARTBEAT_INTERVAL
    set +x
    for i in {1..1000}; do
        echo $i | tee /z/test$i > /dev/null
    done
    set -x
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee /z/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
 #   grep "No space left on device" error.log
    ./juicefs.exe config $META_URL --inodes 2000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1001..2000}; do
        echo $i | tee /z/test$i > /dev/null || (df -i /z && ls /z/ -l | wc -l  && exit 1)
    done
    set -x
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee /z/test2001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
}

test_remove_and_restore(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z: --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /z/d
    ./juicefs.exe quota set $META_URL --path //d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/z/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs.exe quota get $META_URL --path //d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    dd if=/dev/zero of=/z/d/test2 bs=1G count=1 && echo "write should fail on out of space" && exit 1 || true
    echo "remove test1" && rm /z/d/test* -rf
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs.exe quota get $META_URL --path //d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "0%" ]] && echo "used should be 0%" && exit 1 || true

    trash_dir=$(ls /z/.trash)
    ./juicefs.exe restore $META_URL $trash_dir --put-back
    ./juicefs.exe quota get $META_URL --path //d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/z/d/test2 bs=1G count=1 && echo "write should fail on out of space" && exit 1 || true
    echo "remove test1" && rm /z/d/test1 -rf
}

test_dir_capacity(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs
    ./juicefs.exe mount -d $META_URL z: --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /z/d
    ./juicefs.exe quota set $META_URL --path //d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/z/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs.exe quota get $META_URL --path //d
    used=$(./juicefs.exe quota get $META_URL --path //d 2>&1 | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    dd if=/dev/zero of=/z/d/test2 bs=1G count=1 && echo "echo should fail on out of space" && exit 1 || true

    ./juicefs.exe quota set $META_URL --path //d --capacity 2
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=/z/d/test2 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    dd if=/dev/zero of=/z/d/test3 bs=1G count=1 && echo "echo should fail on out of space" && exit 1 || true
    rm -rf /z/d/test1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    used=$(./juicefs.exe quota get $META_URL --path //d 2>&1 | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "50%" ]] && echo "used should be 50%" && exit 1 || true
    dd if=/dev/zero of=/z/d/test3 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs.exe quota check $META_URL --path //d --strict
}

test_dir_inodes(){
    prepare_win_test
    ./juicefs.exe format $META_URL myjfs 
    ./juicefs.exe mount -d $META_URL z: --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p /z/d
    ./juicefs.exe quota set $META_URL --path //d --inodes 1000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1..1000}; do
        echo $i > /z/d/test$i > /dev/null
    done
    set -x
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee /jfs/d/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    #grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm -rf error.log
    ./juicefs.exe quota set $META_URL --path //d --inodes 2000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1001..2000}; do
        echo $i | tee  /z/d/test$i > /dev/null
    done
    set -x
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee  /z/d/test2001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    #grep "Disk quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm /z/d/test1 -rf
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee  /z/d/test2001
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs.exe quota check $META_URL --path //d --strict
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/common/common.sh
================================================
#!/bin/bash -e

# Common variables and initialization
init_platform() {
    case "$(uname -s)" in
        Darwin*)    PLATFORM="mac";;
        Linux*)     PLATFORM="linux";;
        *)          PLATFORM="unknown"
    esac

    # Install jq if missing
    if ! command -v jq &> /dev/null; then
        case "$PLATFORM" in
            mac)    brew install jq;;
            linux)  .github/scripts/apt_install.sh jq;;
            *)      echo "Unsupported platform"; exit 1
        esac
    fi
}

# Platform-agnostic functions with internal branching
prepare_test() {
    case "$PLATFORM" in
        mac)
            ./juicefs umount ~/jfs || true
            umount_jfs ~/jfs "$META_URL"
            sleep 1
            python3 .github/scripts/flush_meta.py "$META_URL"
            rm -rf ~/.juicefs/local/myjfs/ || true
            rm -rf ~/.juicefs/cache || true
            ;;
        linux)
            umount_jfs /jfs "$META_URL"
            python3 .github/scripts/flush_meta.py "$META_URL"
            rm -rf /var/jfs/myjfs || true
            rm -rf /var/jfsCache/myjfs || true
            ;;
    esac
}

umount_jfs() {
    local mp=$1
    local meta_url=$2
    [[ -z "$mp" ]] && echo "mount point is empty" && exit 1
    [[ -z "$meta_url" ]] && echo "meta url is empty" && exit 1
    
    echo "umount_jfs $mp $meta_url"
    [[ ! -f "$mp/.config" ]] && return
    
    ls -l "$mp/.config"
    local status_log="status.log"
    ./juicefs status --log-level error "$meta_url" 2>/dev/null | tee "$status_log"
    
    local pids
    pids=$(jq --arg mp "$mp" '.Sessions[] | select(.MountPoint == $mp) | .ProcessID' "$status_log")
    [[ -z "$pids" ]] && cat "$status_log" && echo "pid is empty" && return
    
    echo "umount is $mp, pids are $pids"
    
    for pid in $pids; do
        case "$PLATFORM" in
            mac)
                if mount | grep -q "$mp"; then
                    diskutil unmount "$mp" || umount "$mp"
                fi
                ;;
            linux)
                umount -l "$mp"
                ;;
        esac
    done
    
    for pid in $pids; do
        wait_mount_process_killed "$pid" 60
    done
}

wait_mount_process_killed() {
    local pid=$1
    local wait_seconds=$2
    [[ -z "$pid" ]] && echo "pid is empty" && exit 1
    [[ -z "$wait_seconds" ]] && echo "wait_seconds is empty" && exit 1
    
    echo "waiting for mount process $pid to exit within $wait_seconds seconds"
    for i in $(seq 1 "$wait_seconds"); do
        case "$PLATFORM" in
            mac)
                if ! ps -p "$pid" > /dev/null; then
                    echo "mount process is killed"
                    break
                fi
                ;;
            linux)
                count=$(ps -ef | grep "juicefs mount" | awk '{print $2}' | grep "^$pid$" | wc -l)
                if [ "$count" -eq 0 ]; then
                    echo "mount process is killed"
                    break
                fi
                ;;
        esac
        
        if [ "$i" -eq "$wait_seconds" ]; then
            case "$PLATFORM" in
                mac)    ps -p "$pid";;
                linux)  ps -ef | grep "juicefs mount" | grep -v "grep";;
            esac
            echo "<FATAL>: mount process is not killed after $wait_seconds"
            exit 1
        fi
        sleep 1
    done
}

compare_md5sum() {
    local file1=$1
    local file2=$2
    
    case "$PLATFORM" in
        mac)
            md51=$(md5 -q "$file1")
            md52=$(md5 -q "$file2")
            ;;
        linux)
            md51=$(md5sum "$file1" | awk '{print $1}')
            md52=$(md5sum "$file2" | awk '{print $1}')
            ;;
    esac
    
    if [ "$md51" != "$md52" ]; then
        echo "md5 are different: $file1 ($md51) vs $file2 ($md52)"
        exit 1
    fi
}

wait_command_success() {
    local command=$1
    local expected=$2
    local timeout=${3:-30}
    
    echo "waiting for command success: cmd='$command', expected='$expected', timeout=$timeout"
    for i in $(seq 1 "$timeout"); do
        result=$(eval "$command" 2>/dev/null | tr -d ' ')
        echo "attempt $i: result=$result"
        
        if [[ "$result" == "$expected" ]]; then
            echo "command succeeded"
            return 0
        fi
        
        if [ "$i" -eq "$timeout" ]; then
            eval "$command"
            echo "command failed after $timeout attempts: $command"
            exit 1
        fi
        sleep 1
    done
}

# macOS specific helper (only defined but used when needed)
ensure_directory() {
    [[ "$PLATFORM" != "mac" ]] && return
    local dir=$1
    if [[ ! -d "$dir" ]]; then
        echo "Creating directory: $dir"
        mkdir -p "$dir"
    fi
}

# Initialize platform detection
init_platform

# Make functions available to subprocesses
export -f prepare_test umount_jfs wait_mount_process_killed compare_md5sum wait_command_success ensure_directory
export PLATFORM META_URL

================================================
FILE: .github/scripts/common/common_win.sh
================================================
#!/bin/bash -e
prepare_win_test()
{
     net start redisredis || true
     ./juicefs.exe umount z: || true
     rm -rf C:\jfs\local/myjfs/  || true
     rm -rf C:\jfsCache\local/myjfs/ || true
     uuid=$(./juicefs.exe status $META_URL | grep UUID | cut -d '"' -f 4) || true
     ./juicefs.exe destroy --force $META_URL $uuid  || true
     redis-cli -h 127.0.0.1 -p 6379 -n 1 FLUSHDB
}

compare_md5sum(){
    file1=$1
    file2=$2
    md51=$(md5sum $file1 | awk '{print $1}')
    md52=$(md5sum $file2 | awk '{print $1}')
    # echo md51 is $md51, md52 is $md52
    if [ "$md51" != "$md52" ] ; then
        echo "md5 are different: md51 is $md51, md52 is $md52"
        exit 1
    fi
}

================================================
FILE: .github/scripts/common/run_test.sh
================================================
#!/bin/bash -e
run_one_test()
{
    test=$1
    test=${test%%(*}
    echo -e "\033[0;34mStart Test: $test\033[0m"
    START_TIME=$(date +%s)    
    set +e 
    ( set -e; "${test}" )
    EXIT_STATUS=$?
    set -e
    echo $test exit with $EXIT_STATUS
    END_TIME=$(date +%s)
    ELAPSED_TIME=$((END_TIME - START_TIME))
    if [[ $EXIT_STATUS -eq 0 ]]; then
        echo -e "\033[0;34mFinish Test: $test in $ELAPSED_TIME seconds\033[0m"
    else
        echo -e "\033[0;31mTest Failed: $0 $test in $ELAPSED_TIME seconds\033[0m"
        exit 1
    fi
}

run_test(){
    START_TIME_ALL=$(date +%s) 
    if [[ ! -z "$@" ]]; then
        # run test functions passed by arguments
        for test in "$@"; do
            if declare -F "$test" > /dev/null; then
                run_one_test $test
            else
                echo -e "\033[0;31mTest $test was not found in $0\033[0m"
                exit 1
            fi
        done
    else
        # Find and run all test functions
        if [[ "$(uname)" == "Darwin" ]]; then
            tests=$(grep -E '^[[:space:]]*test_[[:alnum:]_]+[[:space:]]*\([[:space:]]*\)' "$0")
        else
            tests=$(grep -oP '^\s*test_\w+\s*\(\s*\)' "$0")
        fi
        if [[ -z "$tests" ]]; then
            echo -e "\033[0;31mNo test function found in $0\033[0m"
        else
            for test in ${tests}; do
                run_one_test $test
            done
        fi
    fi
    END_TIME_ALL=$(date +%s)
    ELAPSED_TIME_ALL=$((END_TIME_ALL - START_TIME_ALL))
    echo -e "\033[0;34mAll tests passed in $ELAPSED_TIME_ALL seconds\033[0m"
}

================================================
FILE: .github/scripts/compare_results.sh
================================================
#!/bin/bash
set -e

CURRENT_RESULTS=$1
OLD_RESULTS=$2

extract_metrics() {
    awk '{
        op_description=$1; 
        op_type=$2;
        for(i=3;i<=NF;i++) if($i == ":") break;
        max=$(i+1); min=$(i+2); mean=$(i+3); stddev=$(i+4);
        print op_description, op_type, max, min, mean, stddev
    }' <<< "$1"
}

compare_with_tolerance() {
    local current=$1
    local old=$2

    tolerance=$(echo "$old * 0.1" | bc -l)
    lower_bound=$(echo "$old - $tolerance" | bc -l)
    upper_bound=$(echo "$old + $tolerance" | bc -l)

    if (( $(echo "$current <= $upper_bound && $current >= $lower_bound" | bc -l) )); then
        echo "same"
    elif (( $(echo "$current > $old" | bc -l) )); then
        echo "better"
    else
        echo "worse"
    fi
}

compare_scenario() {
    local scenario=$1
    local current_file="${CURRENT_RESULTS}.${scenario}.summary"
    local old_file="${OLD_RESULTS}.${scenario}.summary"

    echo ""
    echo "===================================================================="
    echo "Detailed Comparison for $scenario (with 10% tolerance)"
    echo "===================================================================="
    printf "%-30s %-12s %-12s %-12s %-12s %-12s\n" "Operation" "Current Max" "Old Max" "Diff" "Status" "Variance"
    echo "--------------------------------------------------------------------"

    while IFS= read -r current_line && IFS= read -r old_line <&3; do
        if [ -z "$current_line" ] || [ -z "$old_line" ]; then
            continue
        fi

        current_metrics=($(extract_metrics "$current_line"))
        old_metrics=($(extract_metrics "$old_line"))

        current_op="${current_metrics[0]} ${current_metrics[1]}"
        old_op="${old_metrics[0]} ${old_metrics[1]}"

        if [ "$current_op" != "$old_op" ]; then
            echo "Warning: Operation mismatch ('$current_op' vs '$old_op'), skipping..."
            continue
        fi

        current_max=${current_metrics[2]}
        old_max=${old_metrics[2]}

        if [[ "$current_max" =~ ^[0-9.]+$ ]] && [[ "$old_max" =~ ^[0-9.]+$ ]]; then
            diff=$(echo "$current_max - $old_max" | bc -l)
            variance=$(echo "scale=2; ($current_max - $old_max)*100/$old_max" | bc -l)

            comparison=$(compare_with_tolerance $current_max $old_max)

            case $comparison in
                "worse") status="❌ Worse" ;;
                "better") status="✅ Better" ;;
                "same") status="⚖️ Same" ;;
                *) status="⚠️ Unknown" ;;
            esac

            printf "%-30s %-12.2f %-12.2f %-12.2f %-12s %-12s%%\n" \
                   "$current_op" "$current_max" "$old_max" "$diff" "$status" "$variance"
        else
            printf "%-30s %-12s %-12s %-12s %-12s %-12s\n" \
                   "$current_op" "N/A" "N/A" "N/A" "⚠️ Invalid" "N/A"
        fi
    done < "$current_file" 3< "$old_file"
}

compare_scenario "scenario1"
compare_scenario "scenario2"

# Check if any scenario has "worse" results
check_regression() {
    local scenario=$1
    local current_file="${CURRENT_RESULTS}.${scenario}.summary"
    local old_file="${OLD_RESULTS}.${scenario}.summary"
    local regression_detected=0

    while IFS= read -r current_line && IFS= read -r old_line <&3; do
        # Skip empty lines
        if [ -z "$current_line" ] || [ -z "$old_line" ]; then
            continue
        fi

        current_metrics=($(extract_metrics "$current_line"))
        old_metrics=($(extract_metrics "$old_line"))

        current_op="${current_metrics[0]} ${current_metrics[1]}"
        old_op="${old_metrics[0]} ${old_metrics[1]}"

        if [ "$current_op" != "$old_op" ]; then
            continue
        fi

        current_max=${current_metrics[2]}
        old_max=${old_metrics[2]}

        if [[ "$current_max" =~ ^[0-9.]+$ ]] && [[ "$old_max" =~ ^[0-9.]+$ ]]; then
            comparison=$(compare_with_tolerance $current_max $old_max)
            if [ "$comparison" == "worse" ]; then
                variance=$(echo "scale=2; ($current_max - $old_max)*100/$old_max" | bc -l)
                echo "Regression detected in $scenario for $current_op: Current $current_max vs Old $old_max (Variance: ${variance}%)"
                regression_detected=1
            fi
        fi
    done < "$current_file" 3< "$old_file"

    return $regression_detected
}

echo ""
echo "===================================================================="
echo "Regression Check Summary (with 10% tolerance)"
echo "===================================================================="

regression_found=0
if ! check_regression "scenario1"; then
    regression_found=1
fi
if ! check_regression "scenario2"; then
    regression_found=1
fi

if [ $regression_found -eq 1 ]; then
    echo ""
    echo "ERROR: Performance regression detected compared to old version!"
    exit 1
else
    echo ""
    echo "SUCCESS: No performance regression detected."
    exit 0
fi

================================================
FILE: .github/scripts/copyFile.js
================================================
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');

if (process.argv.length !== 4) {
  console.error('Usage: node copyFile.js <sourceFile> <destinationFile>');
  process.exit(1);
}

const sourceFile = path.resolve(process.argv[2]);
const destinationFile = path.resolve(process.argv[3]);

fs.copyFile(sourceFile, destinationFile, async (err) => {
  if (err) {
    console.error('Error copying file:', err);
    process.exit(1);
  }
  console.log('File copied successfully.');
});

================================================
FILE: .github/scripts/fio.sh
================================================
#/bin/bash -e 
get_fio_job_options(){
    fio_job_name=$1
    case "$fio_job_name" in
        "big-file-sequential-read") fio_job="big-file-sequential-read:  --rw=read --refill_buffers --bs=256k --size=1G"
        ;;
        "big-file-sequential-write") fio_job="big-file-sequential-write:  --rw=write --refill_buffers --bs=256k  --size=1G"
        ;;
        "big-file-multi-read-1") fio_job="big-file-multi-read-1:  --rw=read --refill_buffers --bs=256k --size=1G --numjobs=1"
        ;;
        "big-file-multi-read-4") fio_job="big-file-multi-read-4:  --rw=read --refill_buffers --bs=256k --size=1G --numjobs=4"
        ;;
        "big-file-multi-read-16") fio_job="big-file-multi-read-16:  --rw=read --refill_buffers --bs=256k --size=1G --numjobs=16"
        ;;
        "big-file-multi-write-1") fio_job="big-file-multi-write-1:       --rw=write --refill_buffers --bs=256k --size=1G --numjobs=1"
        ;;
        "big-file-multi-write-4") fio_job="big-file-multi-write-4:       --rw=write --refill_buffers --bs=256k --size=1G --numjobs=4"
        ;;
        "big-file-multi-write-16") fio_job="big-file-multi-write-16:       --rw=write --refill_buffers --bs=256k --size=1G --numjobs=16"
        ;;
        "big-file-rand-read-4k") fio_job="big-file-rand-read-4k:       --rw=randread --refill_buffers --size=1G --filename=randread.bin --bs=4k"
        ;;
        "big-file-rand-read-256k") fio_job="big-file-rand-read-256k:       --rw=randread --refill_buffers --size=1G --filename=randread.bin --bs=256k"
        ;;
        "big-file-random-write-16k") fio_job="big-file-random-write-16k:    --rw=randwrite --refill_buffers --size=1G --bs=16k"
        ;;
        "big-file-random-write-256k") fio_job="big-file-random-write-256k:    --rw=randwrite --refill_buffers --size=1G --bs=256k"
        ;;
        "small-file-seq-read-4k") fio_job="small-file-seq-read-4k:      --rw=read --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 :--cache-size=0"
        ;;
        "small-file-seq-read-256k") fio_job="small-file-seq-read-256k:      --rw=read --file_service_type=sequential --bs=256k --filesize=256k --nrfiles=10000 :--cache-size=0"
        ;;
        "small-file-seq-write-4k") fio_job="small-file-seq-write-4k:     --rw=write --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 :--writeback"
        ;;
        "small-file-seq-write-256k") fio_job="small-file-seq-write-256k:     --rw=write --file_service_type=sequential --bs=256k --filesize=256k --nrfiles=10000 :--writeback"
        ;;
        "small-file-multi-read-1") fio_job="small-file-multi-read-1:      --rw=read --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 --numjobs=1"
        ;;
        "small-file-multi-read-4") fio_job="small-file-multi-read-4:      --rw=read --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 --numjobs=4"
        ;;
        "small-file-multi-read-16") fio_job="small-file-multi-read-16:      --rw=read --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 --numjobs=16"
        ;;
        "small-file-multi-write-1") fio_job="small-file-multi-write-1:     --rw=write --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 --numjobs=1"
        ;;
        "small-file-multi-write-4") fio_job="small-file-multi-write-4:     --rw=write --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 --numjobs=4"
        ;;
        "small-file-multi-write-16") fio_job="small-file-multi-write-16:     --rw=write --file_service_type=sequential --bs=4k --filesize=4k --nrfiles=10000 --numjobs=16"
        ;;
    esac
    echo $fio_job
}
parse_bandwidth(){
    echo "parse bandwidth"  >&2
    cat fio.log 1>&2
    bw_str=$(tail -1 fio.log | awk '{print $2}' | awk -F '=' '{print $2}' )
    echo bw_str is $bw_str  >&2
    bw=$(echo $bw_str | sed 's/.iB.*//g') 
    if [[ $bw_str == *KiB* ]]; then
        bw=$(echo "scale=2; $bw/1024.0" | bc -l)
    elif [[ $bw_str == *GiB* ]]; then
        bw=$(echo "scale=2; $bw*1024.0" | bc -l)
    fi
    echo bw is $bw  >&2
    echo $bw 
}
          
fio_test()
{
    meta_url=$1
    fio_job_name=$2
    echo "Fio Benchmark"
    fio_job_options=$(get_fio_job_options $fio_job_name)
    echo fio_job_options is $fio_job_options
    name=$(echo $fio_job_options | awk -F: '{print $1}' | xargs)
    fio_arg=$(echo $fio_job_options | awk -F: '{print $2}' | xargs)
    mount_arg=$(echo $fio_job_options | awk -F: '{print $3}' | xargs)
    ./juicefs format --trash-days 0 --storage minio --bucket http://localhost:9000/fio --access-key minioadmin --secret-key minioadmin $meta_url fio
    ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report $mount_arg
    if [[ "$name" =~ ^big-file-rand-read.* ]]; then
        block_size=$(echo $name | awk -F- '{print $NF}' | xargs)
        echo block_size is $block_size
        fio --name=big-file-rand-read-preload --directory=/tmp/jfs --rw=randread --refill_buffers --size=1G --filename=randread.bin --bs=$block_size --pre_read=1
        sudo sync 
        sudo bash -c  "echo 3 > /proc/sys/vm/drop_caches"
    fi
    echo "start fio"
    fio --name=$name --directory=/tmp/jfs $fio_arg | tee "fio.log"
    echo "finish fio"
    ./juicefs umount -f /tmp/jfs
    uuid=$(./juicefs status $meta_url | grep UUID | cut -d '"' -f 4)
    if [ -n "$uuid" ]; then
        sudo ./juicefs destroy --yes $meta_url $uuid
    fi
}
meta_url=$1
name=$2
fio_test $meta_url $name
bandwidth=$(parse_bandwidth)
echo bandwidth is $bandwidth
[[ -z "$bandwidth" ]] && echo "bandwidth is empty" && exit 1
meta=$(echo $meta_url | awk -F: '{print $1}')
echo meta is $meta
[[ -z "$meta" ]] && echo "meta is empty" && exit 1
.github/scripts/save_benchmark.sh --name $name --result $bandwidth --meta $meta --storage minio

================================================
FILE: .github/scripts/flush_meta.py
================================================
import argparse
import os
from posixpath import expanduser
from utils import *

if __name__ == "__main__":
    p = argparse.ArgumentParser()
    p.add_argument("meta_url")
    args = p.parse_args(sys.argv[1:])
    flush_meta(args.meta_url)

================================================
FILE: .github/scripts/fsrand.py
================================================
#!/usr/bin/env python

# Copyright (c) 2015, Bill Zissimopoulos. All rights reserved.
#
# Redistribution  and use  in source  and  binary forms,  with or  without
# modification, are  permitted provided that the  following conditions are
# met:
#
# 1.  Redistributions  of source  code  must  retain the  above  copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions  in binary  form must  reproduce the  above copyright
# notice,  this list  of conditions  and the  following disclaimer  in the
# documentation and/or other materials provided with the distribution.
#
# 3.  Neither the  name  of the  copyright  holder nor  the  names of  its
# contributors may  be used  to endorse or  promote products  derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY  THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND  ANY EXPRESS OR  IMPLIED WARRANTIES, INCLUDING, BUT  NOT LIMITED
# TO,  THE  IMPLIED  WARRANTIES  OF  MERCHANTABILITY  AND  FITNESS  FOR  A
# PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT  SHALL THE  COPYRIGHT
# HOLDER OR CONTRIBUTORS  BE LIABLE FOR ANY  DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL   DAMAGES  (INCLUDING,  BUT  NOT
# LIMITED TO,  PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS  OF USE,
# DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION) HOWEVER CAUSED AND  ON ANY
# THEORY  OF LIABILITY,  WHETHER IN  CONTRACT, STRICT  LIABILITY, OR  TORT
# (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT OF  THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import subprocess
try:
    __import__("xattr")
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
import os, random
import platform
import unicodedata
from xmlrpc.client import boolean
import xattr 
class Devnull(object):
    def write(self, *args):
        pass
devnull = Devnull()
class FsRandomizer(object):
    def __init__(self, path, count, seed):
        self.stdout = devnull
        self.stderr = devnull
        self.verbose = 0
        self.maxofs = 192*1024
        self.maxlen =  64*1024
        self.path = os.path.realpath(path)
        self.count = count
        self.random = random.Random(seed)
        self.dictionary = None
    def __stdout(self, s):
        self.stdout.write(str(s) + "\n")
    def __stderr(self, s):
        self.stderr.write(str(s) + "\n")
    def __getdir_recurse(self, path):
        try:
            n = self.random.choice(sorted(os.listdir(path)))
        except:
            return path
        p = os.path.join(path, n)
        if os.path.isdir(p):
            return self.__getdir_recurse(p)
        else:
            return path
    def __getdir(self):
        path = self.__getdir_recurse(self.path)
        parts = path[len(self.path):].split(os.sep)
        parts = parts[0:self.random.randint(1, len(parts))]
        return os.path.join(self.path, *parts)
    def __getsubpath(self, path):
        try:
            # print("\t".join(sorted(os.listdir(path))))
            n = self.random.choice(sorted(os.listdir(path)))
        except:
            return path
        return os.path.join(path, n)

    def __gen_unicode_name(self, lower_limit=1, upper_limit=64):
        unicodes = ''.join(
            chr(char)
            for char in range(1000)
            # use the unicode categories that don't include control codes
            # if unicodedata.category(chr(char))[0] in ('LMNPSZ') and chr(char) != '/'
            if unicodedata.category(chr(char))[0] in  ('LMNPSZ') and chr(char) != '/'
            )
        assert('/' not in unicodes)
        rand_length = self.random.randint(lower_limit, upper_limit)
        # generate it
        utf_string = ''.join([self.random.choice(unicodes) for i in range(rand_length)])
        if utf_string == '.' or utf_string == '..':
            utf_string = 'ABC'
        assert('/' not in utf_string)
        # print(''.join([unicodedata.category(c) for c in utf_string]))
        return utf_string

    def __gen_ascii_name(self, lower_limit=1, upper_limit=64):
        l = self.random.randint(lower_limit, upper_limit)
        n = [self.random.choice("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") for i in range(l)]
        return "".join(n)

    def __newname(self):
        if self.dictionary:
            return self.random.choice(self.dictionary)
        else:
            if self.ascii:
                return self.__gen_ascii_name()
            else:
                return self.__gen_unicode_name()

    def __newsubpath(self, path):
        while True:
            p = os.path.join(path, self.__newname())
            if not os.path.lexists(p):
                return p
    def __newmode(self, mode):
        return mode | self.random.randint(0, 0o077)
    def __random_write(self, file):
        o = self.random.randint(0, self.maxofs)
        l = self.random.randint(0, self.maxlen)
        # b = bytearray(self.random.getrandbits(8) for _ in range(l))
        # b = self.random.randbytes(l)
        b = bytes('abc', "utf-8")
        file.seek(o)
        file.write(b)
    def __create(self, path):
        assert not os.path.exists(path)
        with open(path, "wb") as f:
            self.__random_write(f)
    def __update(self, path):
        assert os.path.exists(path)
        with open(path, "r+b") as f:
            self.__random_write(f)
    def randomize(self):
        for i in range(self.count):
            op = self.random.choice("CCRUUSL")
            if op == "C":
                path = self.__newsubpath(self.__getdir())
                if self.verbose:
                    self.__stderr("%s, CREATE %s" % (str(i), path))
                if self.random.randint(0, 1):
                    self.__create(path)
                    os.chmod(path, self.__newmode(0o0600))
                else:
                    os.mkdir(path)
                    os.chmod(path, self.__newmode(0o0700))
            elif op == "S":
                src = self.__getsubpath(self.__getdir())
                if not os.path.exists(src):
                    continue
                if os.path.isdir(src):
                    continue
                dest = self.__newsubpath(self.__getdir())
                assert(not os.path.exists(dest))
                if self.verbose:
                    self.__stderr("%s, CREATE SYMLINK from %s to %s" % (str(i), src, dest))
                try:
                    os.symlink(src, dest)
                except: 
                    print("".join([str(ord(c)) for c in src]))
                    print("".join([str(ord(c)) for c in dest]))
                    raise Exception("OS error: {0}".format(err))
            elif op == "L":
                src = self.__getsubpath(self.__getdir())
                if not os.path.exists(src):
                    continue
                if os.path.isdir(src):
                    continue
                dest = self.__newsubpath(self.__getdir())
                assert(not os.path.exists(dest))
                if self.verbose:
                    self.__stderr("%s, CREATE LINK from %s to %s" % (str(i), src, dest))
                try:
                    os.link(src, dest)
                except OSError as err :
                    print("".join([str(ord(c)) for c in src]))
                    print("".join([str(ord(c)) for c in dest]))
                    print("OS error: {0}".format(err))
                    raise Exception("OS error: {0}".format(err))
            elif op == "R":
                path = self.__getsubpath(self.__getdir())
                if os.path.realpath(path) == self.path:
                    continue
                if self.verbose:
                    self.__stderr("%s, REMOVE %s" % (str(i), path))
                if not os.path.isdir(path):
                    os.unlink(path)
                else:
                    try:
                        os.rmdir(path)
                    except:
                        pass

            elif op == "X":
                path = self.__getsubpath(self.__getdir())
                if not os.path.exists(path):
                    continue
                if self.verbose:
                    self.__stderr("%s, SETXATTR %s" % (str(i), path))
                key = self.__gen_unicode_name()
                value = self.__gen_unicode_name()
                if platform.system() == 'Linux':
                    os.system(f'setfattr -n {key} -v {value} {path}')
                else:
                    xattr.setxattr(path, key, bytes(value, "utf-8"))
    
                value_set = xattr.getxattr(path, key)
                assert( bytes(value, 'utf-8') == value_set)

            elif op == "U":
                path = self.__getsubpath(self.__getdir())
                if os.path.realpath(path) == self.path:
                    continue
                if not os.path.exists(path):
                    continue
                if self.verbose:
                    self.__stderr("%s, UPDATE %s" % (str(i), path))
                u = self.random.randint(0, 2)
                if u == 0:
                    if not os.path.isdir(path):
                        os.chmod(path, self.__newmode(0o0600))
                    else:
                        os.chmod(path, self.__newmode(0o0700))
                elif u == 1:
                    if not os.path.isdir(path):
                        self.__update(path)
                else:
                    if not os.path.isdir(path):
                        self.__update(path)
                        os.chmod(path, self.__newmode(0o0600))
                    else:
                        os.chmod(path, self.__newmode(0o0700))
            
if "__main__" == __name__:
    import argparse, sys, time
    def info(s):
        print ("%s: %s" % (os.path.basename(sys.argv[0]), s))
    def warn(s):
        print ("%s: %s" % (os.path.basename(sys.argv[0]), s))
    def fail(s, exitcode = 1):
        warn(s)
        sys.exit(exitcode)
    def main():
        p = argparse.ArgumentParser()
        p.add_argument("-v", "--verbose", action="count", default=0)
        p.add_argument("-c", "--count", type=int, default=100)
        p.add_argument("-s", "--seed", type=int, default=0)
        p.add_argument("-a", "--ascii", action="count", default=0)
        p.add_argument("-d", "--dictionary")
        p.add_argument("path")
        args = p.parse_args(sys.argv[1:])
        if args.seed == 0:
            args.seed = int(time.time())
        if not os.path.isdir(args.path):
            os.mkdir(args.path)
            # fail("path must exist and be a directory")
        if args.dictionary:
            with open(args.dictionary) as f:
                args.dictionary = [l.strip() for l in f]
        info("count=%s seed=%s " % (args.count, args.seed))
        os.umask(0)
        fsrand = FsRandomizer(args.path, args.count, args.seed)
        fsrand.dictionary = args.dictionary
        fsrand.stdout = sys.stdout
        fsrand.stderr = sys.stderr
        fsrand.verbose = args.verbose
        fsrand.ascii = args.ascii
        fsrand.randomize()
        info("create files succeed")
    def __entry():
        try:
            main()
        except EnvironmentError as ex:
            fail(ex)
        except KeyboardInterrupt:
            fail("interrupted", 130)
    __entry()

================================================
FILE: .github/scripts/hypo/command.py
================================================
from difflib import Differ
import json
import os
import re
import subprocess
try: 
    __import__('jsondiff')
except ImportError:
    subprocess.check_call(["pip", "install", "jsondiff"])
from jsondiff import diff
try: 
    __import__('psutil')
except ImportError:
    subprocess.check_call(["pip", "install", "psutil"])
import psutil
try: 
    __import__('fallocate')
except ImportError:
    subprocess.check_call(["pip", "install", "fallocate"])
try: 
    __import__('xattr')
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from hypothesis import HealthCheck, assume, strategies as st, settings, Verbosity
from hypothesis.stateful import rule, precondition, RuleBasedStateMachine, Bundle, initialize, multiple
from hypothesis import Phase, seed
from hypothesis.database import DirectoryBasedExampleDatabase
import random
from common import run_cmd
from strategy import *
from fs_op import FsOperation
from command_op import CommandOperation
from fs import JuicefsMachine
import common

SEED=int(os.environ.get('SEED', random.randint(0, 1000000000)))

SUDO_USERS = ['root', 'user1']
st_sudo_user = st.sampled_from(SUDO_USERS)

@seed(SEED)
class JuicefsCommandMachine(JuicefsMachine):
    Files = Bundle('files')
    Folders = Bundle('folders')
    Entries = Files | Folders
    MP1 = '/tmp/jfs1'
    MP2 = '/tmp/jfs2'
    ROOT_DIR1=os.path.join(MP1, 'fsrand')
    ROOT_DIR2=os.path.join(MP2, 'fsrand')
    EXCLUDE_RULES = ['rebalance_dir', 'rebalance_file', 'config']
    # EXCLUDE_RULES = []
    INCLUDE_RULES = ['dump_load_dump', 'mkdir', 'create_file', 'set_xattr', 'dump']
    cmd1 = CommandOperation('cmd1', MP1, ROOT_DIR1)
    cmd2 = CommandOperation('cmd2', MP2, ROOT_DIR2)
    fsop1 = FsOperation('fs1', ROOT_DIR1)
    fsop2 = FsOperation('fs2', ROOT_DIR2)
    def __init__(self):
        super().__init__()
        
    def get_default_rootdir1(self):
        return os.path.join(self.MP1, 'fsrand')
    
    def get_default_rootdir2(self):
        return os.path.join(self.MP2, 'fsrand')

    def equal(self, result1, result2):
        if type(result1) != type(result2):
            return False
        if isinstance(result1, Exception):
            if 'panic:' in str(result1) or 'panic:' in str(result2):
                return False
            result1 = str(result1)
            result2 = str(result2)
        result1 = common.replace(result1, self.MP1, '***')
        result2 = common.replace(result2, self.MP2, '***')
        # print(f'result1 is {result1}\nresult2 is {result2}')
        return result1 == result2

    def get_client_version(self, mount):
        output = run_cmd(f'{mount} version')
        return output.split()[2]

    def should_run(self, rule):
        if len(self.EXCLUDE_RULES) > 0:
            return rule not in self.EXCLUDE_RULES
        else:
            return rule in self.INCLUDE_RULES

    @rule(
          entry = Entries.filter(lambda x: x != multiple()),
          raw = st.just(True),
          recuisive = st.booleans(),
          strict = st.just(True),
          user = st_sudo_user
          )
    @precondition(lambda self: self.should_run('info'))
    def info(self, entry, raw=True, recuisive=False, strict=True, user='root'):
        result1 = self.cmd1.do_info(entry=entry, user=user, strict=strict, raw=raw, recuisive=recuisive) 
        result2 = self.cmd2.do_info(entry=entry, user=user, strict=strict, raw=raw, recuisive=recuisive)
        assert self.equal(result1, result2), f'\033[31minfo:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(entry = Entries.filter(lambda x: x != multiple()),
          user = st_sudo_user
        )
    @precondition(lambda self: self.should_run('rmr'))
    def rmr(self, entry, user='root'):
        assume(entry != '')
        result1 = self.cmd1.do_rmr(entry=entry, user=user)
        result2 = self.cmd2.do_rmr(entry=entry, user=user)
        assert self.equal(result1, result2), f'\033[31mrmr:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule()
    @precondition(lambda self: self.should_run('status'))
    def status(self):
        result1 = self.cmd1.do_status()
        result2 = self.cmd2.do_status()
        assert result1 == result2, f'\033[31mresult1 is {result1}\nresult2 is {result2}, {diff(result1, result2)}\033[0m'

    @rule(entry = Entries.filter(lambda x: x != multiple()),
        user = st_sudo_user
    )
    @precondition(lambda self: self.should_run('warmup'))
    def warmup(self, entry, user='root'):
        result1 = self.cmd1.do_warmup(entry=entry, user=user)
        result2 = self.cmd2.do_warmup(entry=entry, user=user)
        assert self.equal(result1, result2), f'\033[31mwarmup:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        compact = st.booleans(),
        delete = st.booleans(),
        user = st.just('root'),
    )
    @precondition(lambda self: self.should_run('gc'))
    def gc(self, compact=False, delete=False, user='root'):
        result1 = self.cmd1.do_gc(compact=compact, delete=delete, user=user)
        result2 = self.cmd2.do_gc(compact=compact, delete=delete, user=user)
        assert self.equal(result1, result2), f'\033[31mgc:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        entry = Entries.filter(lambda x: x != multiple()),
        repair = st.booleans(),
        recuisive = st.booleans(),
        user = st_sudo_user, 
    )
    @precondition(lambda self: self.should_run('fsck'))
    def fsck(self, entry, repair=False, recuisive=False, user='root'):
        result1 = self.cmd1.do_fsck(entry=entry, repair=repair, recuisive=recuisive, user=user)
        result2 = self.cmd2.do_fsck(entry=entry, repair=repair, recuisive=recuisive, user=user)
        assert self.equal(result1, result2), f'\033[31mfsck:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        entry = Entries.filter(lambda x: x != multiple()),
        parent = Folders.filter(lambda x: x != multiple()),
        new_entry_name = st_file_name,
        user = st_sudo_user,
        preserve = st.booleans()
    )
    @precondition(lambda self: self.should_run('clone'))
    def clone(self, entry, parent, new_entry_name, preserve=False, user='root'):
        result1 = self.cmd1.do_clone(entry=entry, parent=parent, new_entry_name=new_entry_name, preserve=preserve, user=user)
        result2 = self.cmd2.do_clone(entry=entry, parent=parent, new_entry_name=new_entry_name, preserve=preserve, user=user)
        assert self.equal(result1, result2), f'\033[31mclone:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(folder = Folders.filter(lambda x: x != multiple()),
        fast = st.booleans(),
        skip_trash = st.booleans(),
        threads = st.integers(min_value=1, max_value=10),
        keep_secret_key = st.booleans(),
        user = st.just('root')
    )
    @precondition(lambda self: self.should_run('dump'))
    def dump(self, folder, fast, skip_trash, threads, keep_secret_key, user='root'):
        result1 = self.cmd1.do_dump(folder=folder, fast=fast, skip_trash=skip_trash, threads=threads, keep_secret_key=keep_secret_key, user=user)
        result2 = self.cmd2.do_dump(folder=folder, fast=fast, skip_trash=skip_trash, threads=threads, keep_secret_key=keep_secret_key, user=user)
        d=''
        if isinstance(result1, str) and isinstance(result2, str):
            d=self.diff(result1, result2)
        assert self.equal(result1, result2), f'\033[31mdump:\nresult1 is {result1}\nresult2 is {result2}\ndiff is {d}\033[0m'

    @rule(folder = st.just(''),
        fast = st.booleans(),
        skip_trash = st.booleans(),
        threads = st.integers(min_value=1, max_value=10),
        keep_secret_key = st.booleans(),
        user = st.just('root')
    )
    @precondition(lambda self: self.should_run('dump_load_dump'))
    def dump_load_dump(self, folder, fast=False, skip_trash=False, threads=10, keep_secret_key=False, user='root'):
        result1 = self.cmd1.do_dump_load_dump(folder=folder, fast=fast, skip_trash=skip_trash, threads=threads, keep_secret_key=keep_secret_key, user=user)
        result2 = self.cmd2.do_dump_load_dump(folder=folder, fast=fast, skip_trash=skip_trash, threads=threads, keep_secret_key=keep_secret_key, user=user)
        d=''
        if isinstance(result1, str) and isinstance(result2, str):
            d=self.diff(result1, result2)
        assert self.equal(result1, result2), f'\033[31mdump:\nresult1 is {result1}\nresult2 is {result2}\ndiff is {d}\033[0m'

    def diff(self, str1:str, str2:str):
        differ = Differ()
        diff = differ.compare(str1.splitlines(), str2.splitlines())
        return '\n'.join([line for line in diff])
    
    @rule(
        user = st_sudo_user
    )
    @precondition(lambda self: self.should_run('trash_list') and False)
    def trash_list(self, user='root'):
        result1 = self.cmd1.do_trash_list(user=user)
        result2 = self.cmd2.do_trash_list(user=user)
        assert self.equal(result1, result2), f'\033[31mtrash_list:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        put_back = st.booleans(),
        threads = st.integers(min_value=1, max_value=10),
        user=st_sudo_user
    )
    @precondition(lambda self: self.should_run('restore') and False)
    def restore(self, put_back, threads, user='root'):
        result1 = self.cmd1.do_restore(put_back=put_back, threads=threads, user=user)
        result2 = self.cmd2.do_restore(put_back=put_back, threads=threads, user=user)
        assert self.equal(result1, result2), f'\033[31mrestore:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        entry = Entries.filter(lambda x: x != multiple()),
        threads = st.integers(min_value=1, max_value=10),
        user = st_sudo_user
    )
    @precondition(lambda self: self.should_run('compact'))
    def compact(self, entry, threads, user='root'):
        result1 = self.cmd1.do_compact(entry=entry, threads=threads, user=user)
        result2 = self.cmd2.do_compact(entry=entry, threads=threads, user=user)
        assert self.equal(result1, result2), f'\033[31mcompact:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        capacity = st.integers(min_value=1, max_value=2),
        inodes = st.one_of(st.just(0), st.integers(min_value=50, max_value=100)),
        trash_days = st.integers(min_value=0, max_value=1),
        enable_acl = st.booleans(),
        encrypt_secret = st.booleans(),
        force = st.booleans(),
        yes = st.just(True),
        user = st_sudo_user
    )
    @precondition(lambda self: self.should_run('config'))
    def config(self, capacity, inodes, trash_days, enable_acl, encrypt_secret, force, yes, user='root'):
        result1 = self.cmd1.do_config(capacity=capacity, inodes=inodes, trash_days=trash_days, enable_acl=enable_acl, encrypt_secret=encrypt_secret, force=force, yes=yes, user=user)
        result2 = self.cmd2.do_config(capacity=capacity, inodes=inodes, trash_days=trash_days, enable_acl=enable_acl, encrypt_secret=encrypt_secret, force=force, yes=yes, user=user)
        assert self.equal(result1, result2), f'\033[31mconfig:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    def teardown(self):
        pass

if __name__ == '__main__':
    MAX_EXAMPLE=int(os.environ.get('MAX_EXAMPLE', '100'))
    STEP_COUNT=int(os.environ.get('STEP_COUNT', '50'))
    ci_db = DirectoryBasedExampleDatabase(".hypothesis/examples")    
    settings.register_profile("dev", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain])
    settings.register_profile("schedule", max_examples=500, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=200, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target], 
        database=ci_db)
    settings.register_profile("pull_request", max_examples=100, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=50, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target], 
        database=ci_db)
    
    if os.environ.get('CI'):
        event_name = os.environ.get('GITHUB_EVENT_NAME')
        if event_name == 'schedule':
            profile = 'schedule'
        else:
            profile = 'pull_request'
    else:
        profile = os.environ.get('PROFILE', 'dev')
    print(f'profile is {profile}')
    settings.load_profile(profile)
    
    juicefs_machine = JuicefsCommandMachine.TestCase()
    juicefs_machine.runTest()
    print(json.dumps(FsOperation.stats.get(), sort_keys=True, indent=4))
    
    
================================================
FILE: .github/scripts/hypo/command_op.py
================================================
import json
import os
import pwd
import re
import shlex
import subprocess
try: 
    __import__('xattr')
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
try: 
    __import__('psutil')
except ImportError:
    subprocess.check_call(["pip", "install", "psutil"])
import psutil
from stats import Statistics
import common


class CommandOperation:
    JFS_CONTROL_FILES=['.accesslog', '.config', '.stats']
    stats = Statistics()
    def __init__(self, name, mp, root_dir):
        self.logger = common.setup_logger(f'./{name}.log', name, os.environ.get('LOG_LEVEL', 'INFO'))
        self.name = name
        self.mp = mp
        self.root_dir = root_dir
        self.meta_url = self.get_meta_url(mp)
                
    def guess_password(self, meta_url):
        if '****' not in meta_url:
            return meta_url
        if meta_url.startswith('postgres://'):
            return meta_url.replace('****', 'postgres')
        else:
            return meta_url.replace('****', 'root')

    def get_meta_url(self, mp):
        with open(os.path.join(mp, '.config')) as f:
            config = json.loads(f.read())
            pid = config['Pid']
            process = psutil.Process(pid)
            cmdline = process.cmdline()
            for item in cmdline:
                if ' ' in item:
                    for subitem in item.split(' '):
                        if '://' in subitem:
                            return self.guess_password(subitem)
                elif '://' in item:
                    return self.guess_password(item)
            raise Exception(f'get_meta_url: {cmdline} does not contain meta url')
        
    def run_cmd(self, command:str, stderr=subprocess.STDOUT) -> str:
        self.logger.info(f'run_cmd: {command}')
        if '|' in command or '>' in command or '&' in command:
            ret=os.system(command)
            if ret == 0:
                return ret
            else: 
                raise Exception(f"run command {command} failed with {ret}")
        try:
            output = subprocess.run(command.split(), check=True, stdout=subprocess.PIPE, stderr=stderr)
        except subprocess.CalledProcessError as e:
            raise e
        return output.stdout.decode()

    def seteuid(self, user):
        os.seteuid(pwd.getpwnam(user).pw_uid)
        os.setegid(pwd.getpwnam(user).pw_gid)
    
    def handleException(self, e, action, path, **kwargs):
        if isinstance(e, subprocess.CalledProcessError):
            err = e.output.decode()
        else:
            err = str(e)
        err = '\n'.join([elem.split('<FATAL>:')[-1].split('<ERROR>:')[-1] for elem in err.split('\n')])
        err = re.sub(r'\[\w+\.go:\d+\]', '', err)
        if err.find('setfacl') != -1 and err.find('\n') != -1:
            err = '\n'.join(sorted(err.split('\n')))
        self.stats.failure(action)
        self.logger.info(f'{action} {path} {kwargs} failed: {err}')
        return Exception(err)

    def get_raw(self, size:str):
        # get bytes count from '4.00 KiB (4096 Bytes)' or '3 Bytes'
        if size.find('(') > -1:
            return size.split('(')[1].split(' ')[0]
        else:
            return size.split(' ')[0]

    def parse_info(self, info: str):
        li = info.split('\n')
        if "GOCOVERDIR" in li[0]:
            li = li[1:]
        filename = li[0].split(':')[0].strip()
        # assert li[0].strip().startswith('inode:'), f'parse_info: {li[0]} should start with inode:'
        # inode = li[0].split(':')[1].strip()
        assert li[2].strip().startswith('files:'), f'parse_info: {li[2]} should start with files:'
        files = li[2].split(':')[1].strip()   
        assert li[3].strip().startswith('dirs:'), f'parse_info: {li[3]} should start with dirs:'  
        dirs = li[3].split(':')[1].strip()
        assert li[4].strip().startswith('length:'), f'parse_info: {li[4]} should start with length:'
        length = li[4].split(':')[1].strip()
        length = self.get_raw(length)
        assert li[5].strip().startswith('size:'), f'parse_info: {li[5]} should start with size:'
        size = li[5].split(':')[1].strip()
        size = self.get_raw(size)
        assert li[6].strip().startswith('path'), f'parse_info: {li[6]} should start with path:'
        paths = []
        if li[6].strip().startswith('path:'):
            paths.append(li[6].split(':')[1].strip())
        elif li[6].strip().startswith('paths:'):
            for i in range(7, len(li)):
                if li[i].strip().startswith('/'):
                    paths.append(li[i].strip())
                else:
                    break
        paths = ','.join(sorted(paths))
        return filename, files, dirs, length, size, paths

    def do_info(self, entry, strict=True, user='root', raw=True, recuisive=False):
        abs_path = os.path.join(self.root_dir, entry)
        try:
            cmd = f'sudo -u {user} ./juicefs info --log-level error {abs_path}'
            if raw:
                cmd += ' --raw'
            if recuisive:
                cmd += ' --recursive'
            if strict:
                cmd += ' --strict'
            result = self.run_cmd(cmd)
            if '<ERROR>:' in result or "permission denied" in result:
                return self.handleException(Exception(result), 'do_info', abs_path)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_info', abs_path)
        result = self.parse_info(result)
        self.stats.success('do_info')
        self.logger.info(f'do_info {abs_path} succeed')
        return result 
    
    def do_rmr(self, entry, user='root'):
        abspath = os.path.join(self.root_dir, entry)
        try:
            result = self.run_cmd(f'sudo -u {user} ./juicefs rmr --log-level error {abspath}')
            if '<ERROR>:' in result:
                return self.handleException(Exception(result), 'do_rmr', abspath)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_rmr', abspath)
        assert not os.path.exists(abspath), f'do_rmr: {abspath} should not exist'
        self.stats.success('do_rmr')
        self.logger.info(f'do_rmr {abspath} succeed')
        return True
    
    def do_status(self):
        try:
            result = self.run_cmd(f'./juicefs status {self.meta_url} --log-level error', stderr=subprocess.DEVNULL)
            result = json.loads(result)['Setting']
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_status', '')
        self.stats.success('do_status')
        self.logger.info(f'do_status succeed')
        return result['Storage'], result['Bucket'], result['BlockSize'], result['Compression'], \
            result['EncryptAlgo'], result['TrashDays'], result['MetaVersion'], \
            result['MinClientVersion'], result['DirStats'], result['EnableACL']
    
    def do_dump(self, folder, fast=False, skip_trash=False, threads=1, keep_secret_key=False, user='root'):
        abspath = os.path.join(self.root_dir, folder)
        subdir = os.path.relpath(abspath, self.mp)
        try:
            # compact before dump to avoid slice difference
            self.do_compact(folder)
            cmd=self.get_dump_cmd(self.meta_url, subdir, fast, skip_trash, keep_secret_key, threads, user)
            result = self.run_cmd(cmd, stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            return self.handleException(e,  'do_dump', abspath)
        self.stats.success('do_dump')
        self.logger.info(f'do_dump {abspath} succeed')
        # with open(f'dump_{self.name}.json', 'w') as f:
        #     f.write(self.clean_dump(result))
        return self.clean_dump(result)

    def get_dump_cmd(self, meta_url, subdir, fast, skip_trash, keep_secret_key, threads, user='root'):
        cmd = f'sudo -u {user} ./juicefs dump --log-level error {meta_url} '
        cmd += f' --subdir /{subdir}' if subdir != '' else ''
        cmd += f' --fast' if fast else ''
        cmd += f' --skip-trash' if skip_trash else ''
        cmd += f' --keep-secret-key' if keep_secret_key else ''
        cmd += f' --threads {threads}'
        cmd += f' --log-level error'
        return cmd

    def do_dump_load_dump(self, folder, fast=False, skip_trash=False, threads=1, keep_secret_key=False, user='root'):
        abspath = os.path.join(self.root_dir, folder)
        subdir = os.path.relpath(abspath, self.mp)
        try:
            print(f'meta_url is {self.meta_url}')
            cmd = self.get_dump_cmd(self.meta_url, subdir, fast, skip_trash, keep_secret_key, threads, user)
            result = self.run_cmd(cmd, stderr=subprocess.DEVNULL)
            with open('dump.json', 'w') as f:
                f.write(result)
            if os.path.exists('load.db'):
                os.remove('load.db')
            self.run_cmd(f'sudo -u {user} ./juicefs load sqlite3://load.db dump.json')
            cmd = self.get_dump_cmd('sqlite3://load.db', '', fast, skip_trash, keep_secret_key, threads, user)
            result = self.run_cmd(cmd, stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_dump', abspath)
        self.stats.success('do_dump')
        self.logger.info(f'do_dump {abspath} succeed')
        return self.clean_dump(result)

    def clean_dump(self, dump):
        lines = dump.split('\n')
        new_lines = []
        exclude_keys = ['Name', 'UUID', 'usedSpace', 'usedInodes', 'nextInodes', 'nextChunk', 'nextTrash', 'nextSession']
        reset_keys = ['id', 'inode', 'atimensec', 'mtimensec', 'ctimensec', 'atime', 'ctime', 'mtime']
        for line in lines:
            should_delete = False
            for key in exclude_keys:
                if f'"{key}"' in line:
                    should_delete = True
                    break
            if should_delete:
                continue
            for key in reset_keys:
                if f'"{key}"' in line:
                    pattern = rf'"{key}":(\d+)'
                    line = re.sub(pattern, f'"{key}":0', line)
            new_lines.append(line)
        return '\n'.join(new_lines)

    def do_warmup(self, entry, user='root'):
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.run_cmd(f'sudo -u {user} ./juicefs warmup --log-level error {abspath}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_warmup', abspath)
        self.stats.success('do_warmup')
        self.logger.info(f'do_warmup {abspath} succeed')
        return True

    def do_gc(self, compact:bool,  delete:bool, user:str='root'):
        try:
            cmd = f'sudo -u {user} ./juicefs gc --log-level error {self.meta_url}'
            if compact:
                cmd += ' --compact'
            if delete:
                cmd += ' --delete'
            self.run_cmd(cmd)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_gc', '')
        self.stats.success('do_gc')
        self.logger.info(f'do_gc succeed')
        return True
    
    def do_clone(self, entry, parent, new_entry_name, preserve:bool, user:str='root'):
        abspath = os.path.join(self.root_dir, entry)
        dest_abspath = os.path.join(self.root_dir, parent, new_entry_name)
        try:
            cmd = f'sudo -u {user} ./juicefs clone --log-level error {abspath} {dest_abspath}'
            if preserve:
                cmd += ' --preserve'
            self.run_cmd(cmd)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_clone', '')
        self.stats.success('do_clone')
        self.logger.info(f'do_clone succeed')
        return True    
    
    def do_fsck(self, entry, repair=False, recuisive=False, user='root'):
        abspath = os.path.join(self.root_dir, entry)
        try:
            cmd = f'sudo -u {user} ./juicefs fsck --log-level error {self.meta_url} --path {abspath}'
            if repair:
                cmd += ' --repair'
            if recuisive:
                cmd += ' --recursive'
            self.run_cmd(cmd, stderr=subprocess.DEVNULL)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_fsck', '')
        self.stats.success('do_fsck')
        self.logger.info(f'do_fsck succeed')
        return True
    
    def do_trash_list(self, user='root'):
        abspath = os.path.join(self.mp, '.trash')
        try:
            self.seteuid(user)
            li = os.listdir(abspath) 
            li = sorted(li)
        except Exception as e:
            return self.handleException(e, 'do_trash_list', abspath, user=user)
        finally:
            os.seteuid(0)
            os.setegid(0)
        self.stats.success('do_trash_list')
        self.logger.info(f'do_trash_list succeed')
        return tuple(li)
    
    def do_restore(self, put_back, threads, user='root'):
        abspath = os.path.join(self.mp, '.trash')
        try:
            li = os.listdir(abspath)
            for trash_dir in li:
                cmd = f'sudo -u {user} ./juicefs restore {trash_dir} --threads {threads}'
                if put_back:
                    cmd += ' --put-back'
                self.run_cmd(cmd)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_restore', abspath, user=user)
        self.stats.success('do_restore')
        self.logger.info(f'do_restore succeed')
        return True

    def do_trash_restore(self, index, user='root'):
        trash_list = self.do_trash_list()
        if len(trash_list) == 0:
            return ''
        index = index % len(trash_list)
        trash_file:str = trash_list[index]
        abspath = os.path.join(self.mp, '.trash', shlex.quote(trash_file))
        try:
            self.run_cmd(f'sudo -u {user} mv {abspath} {self.mp}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_trash_restore', abspath, user=user)
        restored_path = os.path.join(self.mp, '/'.join(trash_file.split('|')[1:]))
        restored_path = os.path.relpath(restored_path, self.root_dir)
        self.stats.success('do_trash_restore')
        self.logger.info(f'do_trash_restore succeed')
        return restored_path
    
    def do_compact(self, entry, threads=5, user='root'):
        path = os.path.join(self.root_dir, entry)
        try:
            self.run_cmd(f'sudo -u {user} ./juicefs compact --log-level error {path} --threads {threads}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_compact', path, user=user)
        self.stats.success('do_compact')
        self.logger.info(f'do_compact succeed')
        return True
    
    def do_config(self, capacity, inodes, trash_days, enable_acl, encrypt_secret, force, yes, user):
        try:
            cmd = f'sudo -u {user} ./juicefs config {self.meta_url} --capacity {capacity} --inodes {inodes} --trash-days {trash_days} --enable-acl {enable_acl} --encrypt-secret {encrypt_secret}'
            if force:
                cmd += ' --force'
            if yes:
                cmd += ' --yes'
            self.run_cmd(cmd)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_config', '')
        self.stats.success('do_config')
        self.logger.info(f'do_config succeed')
        return True

================================================
FILE: .github/scripts/hypo/command_test.py
================================================
import unittest
from command import JuicefsCommandMachine

class TestCommand(unittest.TestCase):
    def test_dump(self):
        state = JuicefsCommandMachine()
        folders_0 = state.init_folders()
        files_0 = state.create_file(content='', file_name='aazz', mode='w', parent=folders_0, umask=312, user='root')
        value = ''.join([chr(i) for i in range(256)])
        value = value.encode('latin-1')
        value = b'\x2580q\x2589'
        value = b'M\x25DB'
        state.set_xattr(file=files_0, flag=1, name='\x9d', user='root', value=value)
        state.dump_load_dump(folders_0)
        state.teardown()

    def skip_test_info(self):
        state = JuicefsCommandMachine()
        folders_0 = state.init_folders()
        files_2 = state.create_file(content='0', file_name='mvvd', mode='a', parent=folders_0, umask=293, user='root')
        state.info(entry=folders_0, raw=True, recuisive=True, user='user1')
        state.teardown()

    def test_clone(self):
        state = JuicefsCommandMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content='\x9bcR\xba', file_name='ygbl', mode='x', parent=v1, umask=466, user='root')
        state.chmod(entry=v1, mode=715, user='root')
        state.clone(entry=v2, new_entry_name='drqj', parent=v1, preserve=False, user='user1')
        state.teardown()

    def test_config(self):
        state = JuicefsCommandMachine()
        folders_0 = state.init_folders()
        state.config(capacity=1, enable_acl=True, encrypt_secret=True, force=False, inodes=81, trash_days=0, user='root', yes=True)
        state.teardown()

    def test_clone_4834(self):
        #SEE https://github.com/juicedata/juicefs/issues/4834
        state = JuicefsCommandMachine()
        folders_0 = state.init_folders()
        state.chmod(entry=folders_0, mode=2427, user='root')
        folders_1 = state.mkdir(mode=2931, parent=folders_0, subdir='vhjp', umask=369, user='root')
        state.chmod(entry=folders_1, mode=1263, user='root')
        state.clone(entry=folders_1, new_entry_name='tbim', parent=folders_0, preserve=False, user='user1')
        state.teardown()

if __name__ == '__main__':
    unittest.main()

================================================
FILE: .github/scripts/hypo/common.py
================================================

import grp
import json
import logging
import os
import pwd
import subprocess
import sys
import stat
def red(s):
    return f'\033[31m{s}\033[0m'

def replace(src, old, new):
    if isinstance(src, str):
        return src.replace(old, new)
    elif isinstance(src, list) or isinstance(src, tuple):
        return [replace(x, old, new) for x in src]
    elif isinstance(src, dict):
        return {k: replace(v, old, new) for k, v in src.items()}
    else:
        return src
def run_cmd(command: str) -> str:
    print('run_cmd:'+command)
    if '|' in command or '>' in command:
        ret=os.system(command)
        if ret == 0:
            return ret
        else: 
            raise Exception(f"run command {command} failed with {ret}")
    try:
        output = subprocess.run(command.split(), check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        # print(f'<FATAL>: subprocess run error: {e.output.decode()}')
        raise e
    # print(output.stdout.decode())
    # print('run_cmd succeed')
    return output.stdout.decode()


def setup_logger(log_file_path, logger_name, log_level='INFO'):
    if log_level == 'DEBUG':
        log_level = logging.DEBUG
    elif log_level == 'INFO':
        log_level = logging.INFO
    elif log_level == 'WARNING':
        log_level = logging.WARNING
    elif log_level == 'ERROR':
        log_level = logging.ERROR
    # Create a logger object
    assert os.path.exists(os.path.dirname(log_file_path)), red(f'setup_logger: {log_file_path} should exist')
    print(f'setup_logger {log_file_path}')
    logger = logging.getLogger(logger_name)
    logger.setLevel(logging.DEBUG)
    # Create a file handler for the logger
    file_handler = logging.FileHandler(log_file_path)
    file_handler.setLevel(logging.DEBUG)
    # Create a stream handler for the logger
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(log_level)
    # Create a formatter for the log messages
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(formatter)
    stream_handler.setFormatter(formatter)
    # Add the file and stream handlers to the logger
    logger.addHandler(file_handler)
    logger.addHandler(stream_handler)
    return logger

def is_jfs(path):
    root = get_root(path)
    file = os.path.join(root, '.jfsconfig')
    return os.path.isfile( file )

def get_root(path):
    path = os.path.abspath(path)
    d = path if os.path.isdir(path) else os.path.dirname(path)
    while d != '/':
        try:
            st = os.stat(d)
            if st.st_ino == 1:
                return d
        except:
            pass
        d = os.path.dirname(d)
    return d

def get_volume_name(path):
    root = get_root(path)
    file = os.path.join(root, '.config')
    if os.path.isfile(file):
        with open(file, 'r') as f:
            config = json.load(f)
            try :
                return config['Meta']['Volume']
            except KeyError:
                return config['Format']['Name']

def get_zones(dir):
    zones = []
    root = get_root(dir)
    for i in range(0, 8):
        try:
            zone = os.path.join(root, f'.jfszone{i}')
            os.stat(zone)
            zones.append(f'.jfszone{i}')
        except Exception as e:
            # print(f'zone {zone} not exist, {str(e)}')
            pass
    if len(zones) > 0:
        return zones
    else:
        return ['']   
    
def get_acl(abspath: str):
    s = run_cmd(f'getfacl {abspath}')
    lines = s.split('\n')
    # s = s.replace("# file: ", "# file: /")
    lines = [line for line in lines if not line.startswith("# file: ")]
    s = '\n'.join(lines)
    return s

def support_acl(path):
    root = get_root(path)
    file = os.path.join(root, '.config')
    if os.path.isfile(file):
        with open(file, 'r') as f:
            config = json.load(f)
            if config['Meta'].get('Args', '').find('--enable-acl') != -1:
                return True
            elif config['Format'].get('EnableACL', False):
                return True
            else:
                return False
    else:
        mount_point = subprocess.check_output(["df", root]).decode("utf-8").splitlines()[-1].split()[0]
        mount_options = subprocess.check_output(["sudo", "tune2fs", "-l", mount_point]).decode("utf-8")
        if "acl" not in mount_options:
            return False
        else:
            return True

def get_stat_field(st: os.stat_result):
    if stat.S_ISREG(st.st_mode):
        return st.st_gid, st.st_uid,  st.st_size, oct(st.st_mode), st.st_nlink
    elif stat.S_ISDIR(st.st_mode):
        return st.st_gid, st.st_uid, oct(st.st_mode)
    elif stat.S_ISLNK(st.st_mode):
        return st.st_gid, st.st_uid, oct(st.st_mode)
    else:
        return ()
    
    
def create_group(groupname):
    try:
        grp.getgrnam(groupname)
    except KeyError:
        subprocess.run(['groupadd', groupname], check=True)
        print(f"create Group {groupname}")

def create_user(user):
    try:
        pwd.getpwnam(user)
        subprocess.run(['usermod', '-g', user, '-G', '', user], check=True)
    except KeyError:
        subprocess.run(['useradd', '-g', user, '-G', '', user], check=True)
        print(f"create User {user} with group {user}")

def clean_dir(dir):
    try:
        subprocess.check_call(f'rm -rf {dir}'.split())
        assert not os.path.exists(dir), red(f'clean_dir: {dir} should not exist')
        subprocess.check_call(f'mkdir -p {dir}'.split())
        assert os.path.isdir(dir), red(f'clean_dir: {dir} should be dir')
    except subprocess.CalledProcessError as e:
        print(f'clean_dir {dir} failed:{e}, {e.returncode}, {e.output}')
        sys.exit(1)


def compare_content(dir1, dir2):
    os.system('find /tmp/fsrand  -type l ! -exec test -e {} \; -print > broken_symlink.log ')
    exclude_files = []
    with open('broken_symlink.log', 'r') as f:
        lines = f.readlines()
        for line in lines:
            filename = os.path.basename(line.strip())
            exclude_files.append(filename)
    exclude_options = [f'--exclude="{item}"' for item in exclude_files ]
    exclude_options = ' '.join(exclude_options)
    diff_command = f'diff -ur --no-dereference {dir1} {dir2} {exclude_options} 2>&1 |tee diff.log'
    print(diff_command)
    os.system(diff_command)
    with open('diff.log', 'r') as f:
        lines = f.readlines()
        filtered_lines = [line for line in lines if "recursive directory loop" not in line]
        assert len(filtered_lines) == 0, red(f'found diff: \n' + '\n'.join(filtered_lines))

def compare_stat(dir1, dir2):
    for root, dirs, files in os.walk(dir1):
        for file in files:
            path1 = os.path.join(root, file)
            path2 = os.path.join(dir2, os.path.relpath(path1, dir1))
            stat1 = get_stat_field(os.stat(path1))
            stat2 = get_stat_field(os.stat(path2))
            assert stat1 == stat2, red(f"{path1}: {stat1} and {path2}: {stat2} have different stats")
        for dir in dirs:
            path1 = os.path.join(root, dir)
            path2 = os.path.join(dir2, os.path.relpath(path1, dir1))
            stat1 = get_stat_field(os.stat(path1))
            stat2 = get_stat_field(os.stat(path2))
            assert stat1 == stat2, red(f"{path1}: {stat1} and {path2}: {stat2} have different stats")

def compare_acl(dir1, dir2):
    for root, dirs, files in os.walk(dir1):
        for file in files:
            path1 = os.path.join(root, file)
            path2 = os.path.join(dir2, os.path.relpath(path1, dir1))
            if os.path.exists(path2):
                acl1 = get_acl(path1)
                acl2 = get_acl(path2)
                assert acl1 == acl2, red(f"{path1}: {acl1} and {path2}: {acl2} have different acl")
        for dir in dirs:
            path1 = os.path.join(root, dir)
            path2 = os.path.join(dir2, os.path.relpath(path1, dir1))
            if os.path.exists(path2):
                acl1 = get_acl(path1)
                acl2 = get_acl(path2)
                assert acl1 == acl2, red(f"{path1}: {acl1} and {path2}: {acl2} have different acl")


================================================
FILE: .github/scripts/hypo/context.py
================================================

class Context:
    def __init__(self, root_dir:str, mp:str) -> None:
        self.root_dir = root_dir
        self.mp = mp
        self.meta_url = ''
        

================================================
FILE: .github/scripts/hypo/file.py
================================================
import os
import pwd
import re
import subprocess
import json
import common
from common import red
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from hypothesis import assume, strategies as st, settings, Verbosity
from hypothesis.stateful import rule, precondition, RuleBasedStateMachine, Bundle, initialize, multiple, consumes
from hypothesis import Phase, seed
from hypothesis.database import DirectoryBasedExampleDatabase
from strategy import *
from file_op import FileOperation
import random
import time

SEED=int(os.environ.get('SEED', random.randint(0, 1000000000)))

@seed(SEED)
class JuicefsDataMachine(RuleBasedStateMachine):
    FILE_NAME = 'a'
    fds = Bundle('fd')
    mms = Bundle('mm')
    use_sdk = os.environ.get('USE_SDK', 'false').lower() == 'true'
    meta_url = os.environ.get('META_URL')
    INCLUDE_RULES = []
    EXCLUDE_RULES = ['seek']
    if os.environ.get('EXCLUDE_RULES'):
        EXCLUDE_RULES = os.environ.get('EXCLUDE_RULES').split(',')
    # EXCLUDE_RULES = ['readline', 'readlines', 'truncate', 'seek', 'flush']
    ROOT_DIR1=os.environ.get('ROOT_DIR1', '/tmp/fsrand')
    ROOT_DIR2=os.environ.get('ROOT_DIR2', '/tmp/jfs/fsrand')
    if use_sdk:
        fsop1 = FileOperation(name='fs1', root_dir=ROOT_DIR1, use_sdk=use_sdk, is_jfs=False, volume_name=None)
        fsop2 = FileOperation(name='fs2', root_dir=ROOT_DIR2, use_sdk=use_sdk, is_jfs=True, volume_name='test-volume', meta_url=meta_url)
    else:
        fsop1 = FileOperation(name='fs1', root_dir=ROOT_DIR1)
        fsop2 = FileOperation(name='fs2', root_dir=ROOT_DIR2)

    def __init__(self):
        super(JuicefsDataMachine, self).__init__()
        print(f'__init__')

    def equal(self, result1, result2):
        if type(result1) != type(result2):
            return False
        if isinstance(result1, Exception):
            if 'panic:' in str(result1) or 'panic:' in str(result2):
                return False
            result1 = str(result1)
            result2 = str(result2)
            if self.use_sdk:
                result1 = self.parse_error_message(result1)
                result2 = self.parse_error_message(result2)
        result1 = common.replace(result1, self.fsop1.root_dir, '***')
        result2 = common.replace(result2, self.fsop2.root_dir, '***')
        return result1 == result2

    def parse_error_message(self, err):
        # extract "[Errno 22] Invalid argument" from the following error message
        # [Errno 22] Invalid argument: '/tmp/fsrand/' -> '/tmp/fsrand/izsn/rfnn'
        # [Errno 22] Invalid argument: (b'/fsrand', b'/fsrand/izsn/rfnn', c_uint(0))
        match = re.search(r"\[Errno \d+\] [^:]+", err)
        if match:
            return match.group(0)
        else:
            return err

    def should_run(self, rule):
        if len(self.EXCLUDE_RULES) > 0:
            return rule not in self.EXCLUDE_RULES
        else:
            return rule in self.INCLUDE_RULES

    @initialize(target = fds)
    def init_folders(self):
        self.fsop1.init_rootdir()
        self.fsop2.init_rootdir()
        f1, _ = self.fsop1.do_open(file=self.FILE_NAME, mode='w+', encoding='utf8', errors='strict')
        f2, _ = self.fsop2.do_open(file=self.FILE_NAME, mode='w+', encoding='utf8', errors='strict')
        assert f1 is not None and f2 is not None, red(f'init_folders:\nf1 is {f1}\nf2 is {f2}')
        return (self.FILE_NAME, f1, f2)

    
    @rule( fd = fds.filter(lambda x: x != multiple()), 
          length = st.integers(min_value=0, max_value=MAX_FILE_SIZE))
    @precondition(lambda self: self.should_run('read'))
    def read(self, fd, length):
        result1 = self.fsop1.do_read(fd=fd[1], file=fd[0], length=length)
        result2 = self.fsop2.do_read(fd=fd[2], file=fd[0], length=length)
        assert self.equal(result1, result2), red(f'read:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(
        fd = fds.filter(lambda x: x != multiple()), 
        content = st_content,
    )
    @precondition(lambda self: self.should_run('write'))
    def write(self, fd, content):
        result1 = self.fsop1.do_write(fd=fd[1], file=fd[0], content=content)
        result2 = self.fsop2.do_write(fd=fd[2], file=fd[0], content=content)
        assert self.equal(result1, result2), red(f'write:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(fd = fds.filter(lambda x: x != multiple()), 
        lines = st_lines,
    )
    @precondition(lambda self: self.should_run('writelines'))
    def writelines(self, fd, lines):
        result1 = self.fsop1.do_writelines(fd=fd[1], file=fd[0], lines=lines)
        result2 = self.fsop2.do_writelines(fd=fd[2], file=fd[0], lines=lines)
        assert self.equal(result1, result2), red(f'write:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule(fd = fds.filter(lambda x: x != multiple()), 
        offset = st_offset, 
        whence = st_whence
    )
    @precondition(lambda self: self.should_run('seek'))
    def seek(self, fd, offset, whence):
        result1 = self.fsop1.do_seek(fd=fd[1], file=fd[0], offset=offset, whence=whence)
        result2 = self.fsop2.do_seek(fd=fd[2], file=fd[0], offset=offset, whence=whence)
        assert self.equal(result1, result2), red(f'seek:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(fd = fds.filter(lambda x: x != multiple()))    
    @precondition(lambda self: self.should_run('tell'))
    def tell(self, fd):
        result1 = self.fsop1.do_tell(fd=fd[1], file=fd[0])
        result2 = self.fsop2.do_tell(fd=fd[2], file=fd[0])
        assert self.equal(result1, result2), red(f'tell:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule(
        target = fds,    
        fd = consumes(fds).filter(lambda x: x != multiple()))
    @precondition(lambda self: self.should_run('close'))
    def close(self, fd):
        result1 = self.fsop1.do_close(fd=fd[1], file=fd[0])
        result2 = self.fsop2.do_close(fd=fd[2], file=fd[0])
        assert self.equal(result1, result2), red(f'close:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return fd
        else:
            return multiple()
    @rule(fd = fds.filter(lambda x: x != multiple()))
    @precondition(lambda self: self.should_run('flush_and_fsync'))
    def flush_and_fsync(self, fd):
        result1 = self.fsop1.do_flush_and_fsync(fd=fd[1], file=fd[0])
        result2 = self.fsop2.do_flush_and_fsync(fd=fd[2], file=fd[0])
        assert self.equal(result1, result2), red(f'flush:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(fd = fds.filter(lambda x: x != multiple()),
          offset = st_offset,
          length = st_fallocate_length,
          )
    @precondition(lambda self: self.should_run('fallocate') and not self.use_sdk)
    def fallocate(self, fd, offset, length):
        result1 = self.fsop1.do_fallocate(fd=fd[1], file=fd[0], offset=offset, length=length)
        result2 = self.fsop2.do_fallocate(fd=fd[2], file=fd[0], offset=offset, length=length)
        assert self.equal(result1, result2), red(f'fallocate:\nresult1 is {result1}\nresult2 is {result2}')

    @rule( fd = fds.filter(lambda x: x != multiple()))
    @precondition(lambda self: self.should_run('readlines'))
    def readlines(self, fd):
        result1 = self.fsop1.do_readlines(fd=fd[1], file=fd[0])
        result2 = self.fsop2.do_readlines(fd=fd[2], file=fd[0])
        assert self.equal(result1, result2), red(f'readlines:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule( fd = fds.filter(lambda x: x != multiple()))
    @precondition(lambda self: self.should_run('readline'))
    def readline(self, fd):
        result1 = self.fsop1.do_readline(fd=fd[1], file=fd[0])
        result2 = self.fsop2.do_readline(fd=fd[2], file=fd[0])
        assert self.equal(result1, result2), red(f'readline:\nresult1 is {result1}\nresult2 is {result2}')
    

    @rule(fd=fds.filter(lambda x: x != multiple()), 
          size=st_truncate_length, 
          )
    @precondition(lambda self: self.should_run('truncate'))
    def truncate(self, fd, size):
        result1 = self.fsop1.do_truncate(fd=fd[1], file=fd[0], size=size)
        result2 = self.fsop2.do_truncate(fd=fd[2], file=fd[0], size=size)
        assert self.equal(result1, result2), red(f'truncate:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(
        src=fds.filter(lambda x: x != multiple()),
        dst=fds.filter(lambda x: x != multiple()),
        src_offset = st_offset,
        dst_offset = st_offset,
        length = st_length,
        )
    @precondition(lambda self: self.should_run('copy_file_range') and not self.use_sdk)
    def copy_file_range(self, src, dst, src_offset, dst_offset, length):
        result1 = self.fsop1.do_copy_file_range(src_file=src[0], dst_file=dst[0], src_fd=src[1], dst_fd=dst[1], src_offset=src_offset, dst_offset=dst_offset, length=length)
        result2 = self.fsop2.do_copy_file_range(src_file=src[0], dst_file=dst[0], src_fd=src[2], dst_fd=dst[2], src_offset=src_offset, dst_offset=dst_offset, length=length)
        assert self.equal(result1, result2), red(f'copy_file_range:\nresult1 is {result1}\nresult2 is {result2}')

    def teardown(self):
        pass
        
if __name__ == '__main__':
    MAX_EXAMPLE=int(os.environ.get('MAX_EXAMPLE', '100'))
    STEP_COUNT=int(os.environ.get('STEP_COUNT', '50'))
    ci_db = DirectoryBasedExampleDatabase(".hypothesis/examples")    
    settings.register_profile("dev", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain])
    settings.register_profile("schedule", max_examples=1000, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=200, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target], 
        database=ci_db)
    settings.register_profile("pull_request", max_examples=100, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=50, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target], 
        database=ci_db)
    settings.register_profile("generate", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, \
        phases=[Phase.generate, Phase.target])
    
    if os.environ.get('CI'):
        event_name = os.environ.get('GITHUB_EVENT_NAME')
        if event_name == 'schedule':
            profile = 'schedule'
        else:
            profile = 'pull_request'
    else:
        profile = os.environ.get('PROFILE', 'dev')
    print(f'profile is {profile}')
    settings.load_profile(profile)
    juicefs_machine = JuicefsDataMachine.TestCase()
    juicefs_machine.runTest()
    print(json.dumps(FileOperation.stats.get(), sort_keys=True, indent=4))

================================================
FILE: .github/scripts/hypo/file_op.py
================================================
import hashlib
import io
import mmap
import os
import pwd
import re
import shutil
import stat
import subprocess

try: 
    __import__('xattr')
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
import xattr
from common import get_acl, get_root, red
from typing import Dict
try: 
    __import__('fallocate')
except ImportError:
    subprocess.check_call(["pip", "install", "fallocate"])
import fallocate
from stats import Statistics
import common
from os.path import dirname
import sys
sys.path.append('.')
from sdk.python.juicefs.juicefs import juicefs

class FileOperation:
    JFS_CONTROL_FILES=['.accesslog', '.config', '.stats']
    stats = Statistics()
    Files = {}
    
    def __init__(self, name, root_dir:str, mount_point=None, use_sdk:bool=False, is_jfs=False, volume_name=None, meta_url=None):
        self.logger =common.setup_logger(f'./{name}.log', name, os.environ.get('LOG_LEVEL', 'INFO'))
        self.root_dir = root_dir.rstrip('/')
        self.use_sdk = use_sdk
        self.is_jfs = is_jfs
        if mount_point:
            self.mount_point = mount_point
        else:
            self.mount_point = common.get_root(self.root_dir)
        self.client = None
        if use_sdk and self.is_jfs:
            if meta_url:
                self.client = juicefs.Client(volume_name, meta=meta_url, access_log="/tmp/jfs.log")
            else:
                self.client = juicefs.Client(volume_name, conf_dir='deploy/docker', access_log="/tmp/jfs.log")

    def run_cmd(self, command:str) -> str:
        self.logger.info(f'run_cmd: {command}')
        if '|' in command or '>' in command or '&' in command:
            ret=os.system(command)
            if ret == 0:
                return ret
            else: 
                raise Exception(f"run command {command} failed with {ret}")
        try:
            output = subprocess.run(command.split(), check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            raise e
        return output.stdout.decode()

    def get_zones(self):
        return common.get_zones(self.root_dir)

    def init_rootdir(self):
        if self.client:
            self.logger.debug(f'init_rootdir {self.root_dir} with use_sdk={self.use_sdk}')
            sdk_root_dir = self.get_sdk_path(self.root_dir)
            if self.client.exists(sdk_root_dir):
                self.client.rmr(sdk_root_dir)
                assert not self.client.exists(sdk_root_dir), red(f'{self.root_dir} should not exist')
            self.client.makedirs(sdk_root_dir)
            assert self.client.exists(sdk_root_dir), red(f'{self.root_dir} should exist')
        else:
            if not os.path.exists(self.root_dir):
                os.makedirs(self.root_dir)
            if os.environ.get('PROFILE', 'dev') != 'generate':
                common.clean_dir(self.root_dir)
        
    def handleException(self, e, action, path, **kwargs):
        if isinstance(e, subprocess.CalledProcessError):
            err = e.output.decode()
        else:
            err = type(e).__name__ + ":" + str(e)
        err = '\n'.join([elem.split('<FATAL>:')[-1].split('<ERROR>:')[-1] for elem in err.split('\n')])
        err = re.sub(r'\[\w+\.go:\d+\]', '', err)
        if err.find('setfacl') != -1 and err.find('\n') != -1:
            err = '\n'.join(sorted(err.split('\n')))
        err = self.parse_pysdk_error(err)
        self.stats.failure(action)
        self.logger.info(f'{action} {path} {kwargs} failed: {err}')
        return Exception(err)
    
    def parse_pysdk_error(self, err:str):
        # error message : call jfs_rename failed: [Errno 22] Invalid argument: (b'/fsrand', b'/fsrand/izsn/rfnn', c_uint(0))
        if not err.startswith("call jfs_"):
            return err
        return re.sub(r'call jfs_\w+ failed: ', '', err)
    
    def get_sdk_path(self, abspath):
        return '/'+os.path.relpath(abspath, self.mount_point)

    def do_stat(self, entry):
        self.logger.debug(f'do_stat {self.root_dir} {entry}')
        abspath = os.path.join(self.root_dir, entry)
        try:
            if self.client:
                st = self.client.stat(self.get_sdk_path(abspath))
            else:
                st = os.stat(abspath)
        except Exception as e :
            return self.handleException(e, 'do_stat', abspath, entry=entry)
        finally:
            pass
        self.stats.success('do_stat')
        self.logger.info(f'do_stat {abspath} with succeed')
        self.logger.debug(f'do_stat st is {st}')
        return common.get_stat_field(st)
    
    def do_create_file(self, file, content, mode, encoding, errors):
        self.logger.debug(f'do_create_file {self.root_dir} {file} {mode}')
        abspath = os.path.join(self.root_dir, file)
        f = None
        try:
            if self.client:
                f = self.client.open(self.get_sdk_path(abspath), mode=mode, encoding=encoding, errors=errors)           
            else:
                f = open(abspath, mode=mode, encoding=encoding, errors=errors)
            f.write(content)
            f.flush()
        except Exception as e :
            return f, self.handleException(e, 'do_create_file', abspath, mode=mode)
        finally:
            pass
        self.stats.success('do_create_file')
        self.logger.info(f'do_create_file {abspath} {mode} succeed')
        return f, 'succeed'

    def do_open(self, file, mode, encoding, errors):
        self.logger.debug(f'do_open {self.root_dir} {file} {mode}')
        abspath = os.path.join(self.root_dir, file)
        f = None
        try:
            if self.client:
                f = self.client.open(self.get_sdk_path(abspath), mode=mode, encoding=encoding, errors=errors)           
            else:
                f = open(abspath, mode=mode, encoding=encoding, errors=errors)
        except Exception as e :
            return f, self.handleException(e, 'do_open', abspath, mode=mode)
        finally:
            pass
        self.stats.success('do_open')
        self.logger.info(f'do_open {abspath} {mode} succeed')
        return f, 'succeed'

    def do_write(self, fd, file, content):
        self.logger.debug(f'do_write {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                fd.write(content)
            else:
                fd.write(content)
        except (io.UnsupportedOperation) as e:
            e = Exception(f'io.UnsupportedOperation: write')
            return self.handleException(e, 'do_write', abspath)
        except Exception as e :
            return self.handleException(e, 'do_write', abspath)
        finally:
            pass
        self.stats.success('do_write')
        self.logger.info(f'do_write {abspath} succeed')
        return 'succeed'

    def do_writelines(self, fd, file, lines):
        self.logger.debug(f'do_writelines {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                fd.writelines(lines)
            else:
                fd.writelines(lines)
        except (TypeError,io.UnsupportedOperation) as e:
            self.logger.debug(f'writelines: {str(e)}')
            e = Exception(f'writelines')
            return self.handleException(e, 'do_writelines', abspath)
        except Exception as e :
            return self.handleException(e, 'do_writelines', abspath)
        finally:
            pass
        self.stats.success('do_writelines')
        self.logger.info(f'do_writelines {abspath} succeed')
        return 'succeed'


    def do_seek(self, fd, file, offset, whence):
        self.logger.debug(f'do_seek {self.root_dir} file={file} offset={offset} whence={whence}')
        abspath = os.path.join(self.root_dir, file)
        try:
            pos = fd.seek(offset, whence)
        except Exception as e :
            return self.handleException(e, 'do_seek', abspath, offset=offset, whence=whence)
        finally:
            pass
        self.stats.success('do_seek')
        self.logger.info(f'do_seek {abspath} offset={offset} whence={whence} succeed, pos={pos}')
        return pos

    def do_tell(self, fd, file):
        self.logger.debug(f'do_tell {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            offset = fd.tell()
        except Exception as e :
            return self.handleException(e, 'do_tell', abspath)
        finally:
            pass
        self.stats.success('do_tell')
        self.logger.info(f'do_tell {abspath} succeed, offset={offset}')
        return offset
    
    def do_close(self, fd, file):
        self.logger.debug(f'do_close {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            fd.close()
        except Exception as e :
            return self.handleException(e, 'do_close', abspath)
        finally:
            pass
        self.stats.success('do_close')
        self.logger.info(f'do_close {abspath} succeed')
        return self.do_stat(file)
    
    def do_flush_and_fsync(self, fd, file):
        self.logger.debug(f'do_flush {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                fd.flush()
                fd.fsync()
            else:
                fd.flush()
                os.fsync(fd.fileno())
        except Exception as e :
            return self.handleException(e, 'do_flush', abspath)
        finally:
            pass
        self.stats.success('do_flush')
        self.logger.info(f'do_flush {abspath} succeed')
        return self.do_stat(file)

    def do_fallocate(self, fd, file, offset, length):
        self.logger.debug(f'do_fallocate {self.root_dir} {file} {offset} {length}')
        abspath = os.path.join(self.root_dir, file)
        try:
            file_size = os.stat(abspath).st_size
            if file_size == 0:
                offset = 0
            else:
                offset = offset % file_size
            fallocate.fallocate(fd.fileno(), offset, length)
        except Exception as e :
            return self.handleException(e, 'do_fallocate', abspath, offset=offset, length=length)
        finally:
            pass
        self.stats.success('do_fallocate')
        self.logger.info(f'do_fallocate {abspath} offset={offset} length={length} succeed')
        return self.do_stat(file)
    

    def do_read(self, fd, file, length):
        self.logger.debug(f'do_read {self.root_dir} {file} {length}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                result = fd.read(length)    
            else:
                result = fd.read(length)
            if isinstance(result, str):
                result = result.replace('\r', '\n') # SEE: https://github.com/juicedata/jfs/issues/1472
                result = result.encode()
            self.logger.debug(f'do_read result is {result}')
            result = hashlib.md5(result).hexdigest()
        except io.UnsupportedOperation as e:
            e = Exception(f'io.UnsupportedOperation: read')
            return self.handleException(e, 'do_read', abspath, length=length)
        except Exception as e :
            return self.handleException(e, 'do_read', abspath, length=length)
        finally:
            pass
        self.stats.success('do_read')
        self.logger.info(f'do_read {abspath} length={length} succeed')
        return (result, )

    def do_readlines(self, fd, file):
        self.logger.debug(f'do_readlines {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                result = ''.join(fd.readlines())
            else:
                result = ''.join(fd.readlines())
            if isinstance(result, str):
                result = result.replace('\r', '\n') # SEE: https://github.com/juicedata/jfs/issues/1472
                result = result.encode()
            self.logger.debug(f'do_readlines result is {result}')
            result = hashlib.md5(result).hexdigest()
        except UnicodeDecodeError as e:
            # SEE: https://github.com/juicedata/jfs/issues/1450#issuecomment-2213518638
            self.logger.debug(f'UnicodeDecodeError: {e.encoding} {e.object} {e.start} {e.end} {e.reason}')
            e = UnicodeDecodeError(e.encoding, e.object, 0, 0, e.reason)
            return self.handleException(e, 'do_readlines', abspath)
        except io.UnsupportedOperation as e:
            e = Exception(f'io.UnsupportedOperation: readlines')
            return self.handleException(e, 'do_readlines', abspath)
        except Exception as e :
            return self.handleException(e, 'do_readlines', abspath)
        finally:
            pass
        self.stats.success('do_readlines')
        self.logger.info(f'do_readlines {abspath} succeed')
        return (result, )

    def do_readline(self, fd, file):
        self.logger.debug(f'do_readline {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                result = fd.readline()
            else:
                result = fd.readline()
            if isinstance(result, str):
                result = result.replace('\r', '\n') # SEE: https://github.com/juicedata/jfs/issues/1472
                result = result.encode()
            self.logger.debug(f'do_readline result is {result}')
            result = hashlib.md5(result).hexdigest()
        except UnicodeDecodeError as e:
            # SEE: https://github.com/juicedata/jfs/issues/1450#issuecomment-2213518638
            self.logger.debug(f'UnicodeDecodeError: {e.encoding} {e.object} {e.start} {e.end} {e.reason}')
            e = UnicodeDecodeError(e.encoding, e.object, 0, 0, e.reason)
            return self.handleException(e, 'do_readline', abspath)
        except io.UnsupportedOperation as e:
            e = Exception(f'io.UnsupportedOperation: readline')
            return self.handleException(e, 'do_readline', abspath)
        except Exception as e :
            return self.handleException(e, 'do_readline', abspath)
        finally:
            pass

        self.stats.success('do_readline')
        self.logger.info(f'do_readline {abspath} succeed')
        return (result, )

    def do_truncate(self, fd, file, size):
        self.logger.debug(f'do_truncate {self.root_dir} {file} {size}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                fd.flush()
                fd.truncate(size)
                st = self.client.stat(self.get_sdk_path(abspath))
            else:
                fd.flush()
                os.ftruncate(fd.fileno(), size)
                st = os.stat(abspath)
        except Exception as e :
            return self.handleException(e, 'do_truncate', abspath, size=size)
        finally:
            pass
        assert st.st_size == size, red(f'do_truncate: {abspath} size should be {size} but {st.st_size}')
        self.stats.success('do_truncate')
        self.logger.info(f'do_truncate {abspath} size={size} succeed')
        return 'succeed'

    def do_copy_file_range(self, src_file, dst_file, src_fd, dst_fd, src_offset, dst_offset, length):
        self.logger.debug(f'do_copy_file_range from {self.root_dir}/{src_file} to {self.root_dir}/{dst_file} {src_offset} {dst_offset} {length}')
        src_abspath = os.path.join(self.root_dir, src_file)
        dst_abspath = os.path.join(self.root_dir, dst_file)
        try:
            os.copy_file_range(src_fd, dst_fd, length, src_offset, dst_offset)
        except Exception as e :
            return self.handleException(e, 'do_copy_file_range', src_abspath, src_offset=src_offset, dst_offset=dst_offset, length=length)
        finally:
            pass
        self.stats.success('do_copy_file_range')
        self.logger.info(f'do_copy_file_range {src_abspath} to {dst_abspath} {src_offset} {dst_offset} {length} succeed')
        return os.stat(dst_abspath).st_size

    def do_mmap_create(self, file, fd):
        self.logger.debug(f'do_mmap_create {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            mm = mmap.mmap(fd.fileno(), 0)
        except Exception as e :
            return None, self.handleException(e, 'do_mmap_create', abspath)
        finally:
            pass
        self.stats.success('do_mmap_create')
        self.logger.info(f'do_mmap_create {abspath} succeed')
        return mm, len(mm)

    def do_mmap_read(self, file, mm: mmap.mmap, length):
        self.logger.debug(f'do_mmap_read {self.root_dir} {file} {length}')
        abspath = os.path.join(self.root_dir, file)
        try:
            length = length % mm.size()
            result = mm.read(length)
        except Exception as e :
            return self.handleException(e, 'do_mmap_read', abspath, length=length)
        finally:
            pass
        self.stats.success('do_mmap_read')
        self.logger.info(f'do_mmap_read {abspath} {length} succeed')
        return result

    def do_mmap_read_byte(self, file, mm:mmap.mmap):
        self.logger.debug(f'do_mmap_read_byte {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            result = mm.read_byte()
        except Exception as e :
            return self.handleException(e, 'do_mmap_read_byte', abspath)
        finally:
            pass
        self.stats.success('do_mmap_read_byte')
        self.logger.info(f'do_mmap_read_byte {abspath} succeed')
        return result
    
    def do_mmap_read_line(self, file, mm:mmap.mmap):
        self.logger.debug(f'do_mmap_read_line {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            result = mm.readline()
        except Exception as e :
            return self.handleException(e, 'do_mmap_read_line', abspath)
        finally:
            pass
        self.stats.success('do_mmap_read_line')
        self.logger.info(f'do_mmap_read_line {abspath} succeed')
        return result

    def do_mmap_write(self, file, mm:mmap.mmap, content):
        self.logger.debug(f'do_mmap_write {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            mm.write(content)
        except Exception as e :
            return self.handleException(e, 'do_mmap_write', abspath)
        finally:
            pass
        self.stats.success('do_mmap_write')
        self.logger.info(f'do_mmap_write {abspath} succeed')
        return mm.size(), mm.tell()

    def do_mmap_write_byte(self, file, mm: mmap.mmap, byte):
        self.logger.debug(f'do_mmap_write_byte {self.root_dir}')
        abspath = os.path.join(self.root_dir, file)
        try:
            mm.write_byte(byte)
        except Exception as e :
            return self.handleException(e, 'do_mmap_write_byte', abspath)
        finally:
            pass
        self.stats.success('do_mmap_write_byte')
        self.logger.info(f'do_mmap_write_byte {abspath} succeed')
        return 'succeed'

    def do_mmap_move(self, file, mm: mmap.mmap, dest, src, count):
        self.logger.debug(f'do_mmap_move {self.root_dir} {file} {dest} {src} {count}')
        abspath = os.path.join(self.root_dir, file)
        try:
            dest = dest % mm.size()
            src = src % mm.size()
            count = count % mm.size()
            mm.move(dest, src, count)
        except Exception as e :
            return self.handleException(e, 'do_mmap_move', abspath, dest=dest, src=src, count=count)
        finally:
            pass
        self.stats.success('do_mmap_move')
        self.logger.info(f'do_mmap_move {abspath} {dest} {src} {count} succeed')
        return mm.size(), mm.tell()

    def do_mmap_resize(self, file, mm: mmap.mmap):
        self.logger.debug(f'do_mmap_resize {self.root_dir}')
        abspath = os.path.join(self.root_dir, file)
        try:
            if self.client:
                newsize = self.client.stat(self.get_sdk_path(abspath)).st_size
            else:
                newsize = os.stat(abspath).st_size
            mm.resize(newsize)
        except Exception as e :
            return self.handleException(e, 'do_mmap_resize', self.root_dir)
        finally:
            pass
        self.stats.success('do_mmap_resize')
        self.logger.info(f'do_mmap_resize succeed')
        return mm.size()

    def do_mmap_seek(self, file, mm: mmap.mmap, offset, whence):
        self.logger.debug(f'do_mmap_seek {self.root_dir} {file} {offset} {whence}')
        abspath = os.path.join(self.root_dir, file)
        try:
            assert mm.size() != 0, red(f'do_mmap_seek size should not be 0')
            offset = offset % mm.size()
            mm.seek(offset, whence)
            pos = mm.tell()
        except Exception as e :
            return self.handleException(e, 'do_mmap_seek', abspath, offset=offset, whence=whence)
        finally:
            pass
        self.stats.success('do_mmap_seek')
        self.logger.info(f'do_mmap_seek {abspath} {offset} {whence} succeed')
        return pos
    
    def do_mmap_size(self, file, mm: mmap.mmap):
        self.logger.debug(f'do_mmap_size {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            size = mm.size()
        except Exception as e :
            return self.handleException(e, 'do_mmap_size', abspath)
        finally:
            pass
        self.stats.success('do_mmap_size')
        self.logger.info(f'do_mmap_size {abspath} succeed')
        return size

    def do_mmap_tell(self, file, mm: mmap.mmap):
        self.logger.debug(f'do_mmap_tell {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            pos = mm.tell()
        except Exception as e :
            return self.handleException(e, 'do_mmap_tell', abspath)
        finally:
            pass
        self.stats.success('do_mmap_tell')
        self.logger.info(f'do_mmap_tell {abspath} succeed')
        return pos

    def do_mmap_flush(self, file, mm: mmap.mmap):
        self.logger.debug(f'do_mmap_flush {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            mm.flush()
        except Exception as e :
            return self.handleException(e, 'do_mmap_flush', abspath)
        finally:
            pass
        self.stats.success('do_mmap_flush')
        self.logger.info(f'do_mmap_flush {abspath} succeed')
        return 'succeed'
    
    def do_mmap_close(self, file, mm: mmap.mmap):
        self.logger.debug(f'do_mmap_close {self.root_dir} {file}')
        abspath = os.path.join(self.root_dir, file)
        try:
            mm.close()
        except Exception as e :
            return self.handleException(e, 'do_mmap_close', abspath)
        finally:
            pass
        self.stats.success('do_mmap_close')
        self.logger.info(f'do_mmap_close {abspath} succeed')
        return 'succeed'

================================================
FILE: .github/scripts/hypo/file_test.py
================================================
import unittest
from file import JuicefsDataMachine

class TestPySdk(unittest.TestCase):
    def test_issue_1522_1(self):
        # SEE https://github.com/juicedata/jfs/issues/1522
        state = JuicefsDataMachine()
        v1 = state.init_folders()
        state.write(fd=v1, content='abc')
        state.seek(fd=v1, offset=0, whence=1)
        state.teardown()

    def test_issue_1522_2(self):
        # SEE https://github.com/juicedata/jfs/issues/1522
        state = JuicefsDataMachine()
        v1 = state.init_folders()
        state.seek(fd=v1, offset=1, whence=0)
        state.write(fd=v1, content='')
        state.seek(fd=v1, offset=0, whence=2)
        state.teardown()

    def test_issue_1523(self):
        # SEE https://github.com/juicedata/jfs/issues/1523
        state = JuicefsDataMachine()
        v1 = state.init_folders()
        state.truncate(fd=v1, size=1)
        state.readline(fd=v1)
        state.teardown()

    def skip_test_issue_1533(self):
        # SEE https://github.com/juicedata/jfs/issues/1533
        state = JuicefsDataMachine()
        v1 = state.init_folders()
        state.write(fd=v1, content='ab')
        state.seek(fd=v1, offset=0, whence=0)
        state.read(fd=v1, length=1)
        state.write(content='', fd=v1)
        state.read(fd=v1, length=1)
        state.teardown()

    def skip_test_issue_1548(self):
        # SEE https://github.com/juicedata/jfs/issues/1548
        state = JuicefsDataMachine()
        fd_0 = state.init_folders()
        state.write(fd=fd_0, content='a')
        state.seek(fd=fd_0, offset=0, whence=0)
        state.write(fd=fd_0, content='b')
        state.read(fd=fd_0, length=1)
        state.teardown()

    def skip_test_issue_1548_2(self):
        # SEE https://github.com/juicedata/jfs/issues/1548
        state = JuicefsDataMachine()
        fd_0 = state.init_folders()
        state.truncate(fd=fd_0, size=3)
        state.write(content='a', fd=fd_0)
        state.readline(fd=fd_0)
        state.teardown()

if __name__ == '__main__':
    unittest.main()


================================================
FILE: .github/scripts/hypo/fs.py
================================================
import os
import pwd
import re
import subprocess
import json
import common
from common import red
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from hypothesis import assume, strategies as st, settings, Verbosity
from hypothesis.stateful import rule, precondition, RuleBasedStateMachine, Bundle, initialize, multiple, consumes
from hypothesis import Phase, seed
from hypothesis.database import DirectoryBasedExampleDatabase
from strategy import *
from fs_op import FsOperation
import random
import time

SEED=int(os.environ.get('SEED', random.randint(0, 1000000000)))

@seed(SEED)
class JuicefsMachine(RuleBasedStateMachine):
    Files = Bundle('files')
    Folders = Bundle('folders')
    Entries = Files | Folders
    EntryWithACL = Bundle('entry_with_acl')
    Xattrs = Bundle('xattrs')
    start = time.time()
    use_sdk = os.environ.get('USE_SDK', 'false').lower() == 'true'
    meta_url = os.environ.get('META_URL')
    SUDO_USERS = ['root']
    if use_sdk:
        SUDO_USERS = ['root']
    if os.uname().sysname == 'Darwin':
        USERS=['root']
        GROUPS = ['root']
    else:
        USERS=['root', 'user1', 'user2','user3']
        GROUPS = USERS+['group1', 'group2', 'group3', 'group4']
    group_created = False
    INCLUDE_RULES = []
    if os.getenv('EXCLUDE_RULES'):
        EXCLUDE_RULES = os.getenv('EXCLUDE_RULES').split(',')
    else:
        EXCLUDE_RULES = ['readlines', 'readline']
        # EXCLUDE_RULES = ['rebalance_dir', 'rebalance_file', 'clone_cp_file', 'clone_cp_dir', 'loop_symlink', 'hardlink', 'rename_dir', 'chown']
    ROOT_DIR1=os.environ.get('ROOT_DIR1', '/tmp/fsrand')
    ROOT_DIR2=os.environ.get('ROOT_DIR2', '/tmp/jfs/fsrand')
    if use_sdk:
        fsop1 = FsOperation(name='fs1', root_dir=ROOT_DIR1, use_sdk=use_sdk, is_jfs=False, volume_name=None)
        fsop2 = FsOperation(name='fs2', root_dir=ROOT_DIR2, mount_point='/tmp/jfs', use_sdk=use_sdk, is_jfs=True, volume_name='test-volume', meta_url=meta_url)
    else:
        fsop1 = FsOperation(name='fs1', root_dir=ROOT_DIR1, is_jfs=common.is_jfs(ROOT_DIR1))
        fsop2 = FsOperation(name='fs2', root_dir=ROOT_DIR2, is_jfs=common.is_jfs(ROOT_DIR2))
    check_dangling = os.environ.get('CHECK_DANGLING', 'false').lower() == 'true'
    @initialize(target=Folders)
    def init_folders(self):
        self.fsop1.init_rootdir()
        self.fsop2.init_rootdir()
        return ''
    
    def create_users(self, users):
        for user in users:
            if user != 'root':
                common.create_user(user)

    def get_default_rootdir1(self):
        return '/tmp/fsrand'
    
    def get_default_rootdir2(self):
        return '/tmp/jfs/fsrand'

    def __init__(self):
        super(JuicefsMachine, self).__init__()
        print(f'__init__')
        MAX_RUNTIME=int(os.environ.get('MAX_RUNTIME', '36000'))
        duration = time.time() - self.start
        print(f'duration is {duration}')
        if duration > MAX_RUNTIME:
            raise Exception(f'run out of time: {duration}')
        
        if not self.group_created:
            for group in self.GROUPS:
                if group != 'root':
                    common.create_group(group)
            self.group_created = True
        self.create_users(self.USERS)
        self.remove_dangling_files()

    def remove_dangling_files(self):
        if self.check_dangling:
            self.fsop1.do_remove_dangling_files()
            self.fsop2.do_remove_dangling_files()

    def equal(self, result1, result2):
        if os.getenv('PROFILE', 'dev') == 'generate':
            return True
        if type(result1) != type(result2):
            return False
        # TODO: ignore the diff temp, we should check the difference of result1 and result2 in the future.
        # Ref: https://github.com/juicedata/juicefs/issues/5982
        ignore_diff_errors = os.environ.get('IGNORE_DIFF_ERRORS', 'false').lower() == 'true'
        if ignore_diff_errors and isinstance(result1, Exception) and isinstance(result2, Exception):
            return True
        if isinstance(result1, Exception):
            if 'panic:' in str(result1) or 'panic:' in str(result2):
                return False
            result1 = str(result1)
            result2 = str(result2)
            if self.use_sdk:
                result1 = self.parse_error_message(result1)
                result2 = self.parse_error_message(result2)
        result1 = common.replace(result1, self.fsop1.root_dir, '***')
        result2 = common.replace(result2, self.fsop2.root_dir, '***')
        return result1 == result2

    def parse_error_message(self, err):
        # extract "[Errno 22] Invalid argument" from the following error message
        # [Errno 22] Invalid argument: '/tmp/fsrand/' -> '/tmp/fsrand/izsn/rfnn'
        # [Errno 22] Invalid argument: (b'/fsrand', b'/fsrand/izsn/rfnn', c_uint(0))
        match = re.search(r"\[Errno \d+\] [^:]+", err)
        if match:
            return match.group(0)
        else:
            return err

    def seteuid(self, user):
        os.seteuid(pwd.getpwnam(user).pw_uid)
        # os.setegid(pwd.getpwnam(user).pw_gid)

    def should_run(self, rule):
        if len(self.EXCLUDE_RULES) > 0:
            return rule not in self.EXCLUDE_RULES
        else:
            return rule in self.INCLUDE_RULES

    @rule(
        entry = Entries,
        user = st.sampled_from(SUDO_USERS)
    )
    @precondition(lambda self: self.should_run('stat'))
    def stat(self, entry, user = 'root'):
        result1 = self.fsop1.do_stat(entry=entry, user=user)
        result2 = self.fsop2.do_stat(entry=entry, user=user)
        assert self.equal(result1, result2), red(f'stat:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule(
        entry = Entries,
        user = st.sampled_from(SUDO_USERS)
    )
    @precondition(lambda self: self.should_run('lstat'))
    def lstat(self, entry, user = 'root'):
        result1 = self.fsop1.do_lstat(entry=entry, user=user)
        result2 = self.fsop2.do_lstat(entry=entry, user=user)
        assert self.equal(result1, result2), red(f'lstat:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(
        entry = Entries,
        user = st.sampled_from(SUDO_USERS)
    )
    @precondition(lambda self: self.should_run('exists'))
    def exists(self, entry, user = 'root'):
        result1 = self.fsop1.do_exists(entry=entry, user=user)
        result2 = self.fsop2.do_exists(entry=entry, user=user)
        assert result1 == result2, red(f'exists:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(file = Files.filter(lambda x: x != multiple()), 
          flags = st_open_flags, 
          umask = st_umask,
          mode = st_entry_mode,
          user = st.sampled_from(SUDO_USERS), 
          )
    @precondition(lambda self: self.should_run('open') and not self.use_sdk)
    def open(self, file, flags, mode, user='root', umask=0o022):
        result1 = self.fsop1.do_open(file, flags, umask, mode, user)
        result2 = self.fsop2.do_open(file, flags, umask, mode, user)
        assert self.equal(result1, result2), red(f'open:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule(file = Files.filter(lambda x: x != multiple()), 
        mode = st_open_mode, 
        user = st.sampled_from(SUDO_USERS)
        )
    @precondition(lambda self: self.should_run('open') and not self.use_sdk)
    def open2(self, file, mode, user='root'):
        result1 = self.fsop1.do_open2(file=file, mode=mode, user=user)
        result2 = self.fsop2.do_open2(file=file, mode=mode, user=user)
        assert self.equal(result1, result2), red(f'open:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule(file = Files.filter(lambda x: x != multiple()), 
          offset = st_offset, 
          content = st_content,
          mode = st_open_mode,
          encoding = st_open_encoding, 
          errors = st_open_errors,
          whence = st_whence,
          user = st.sampled_from(SUDO_USERS)
          )
    @precondition(lambda self: self.should_run('write'))
    def write(self, file, offset, content, mode, whence, encoding=None, errors=None, user='root'):
        result1 = self.fsop1.do_write(file=file, offset=offset, content=content, mode=mode, encoding=encoding, errors=errors, whence=whence, user=user)
        result2 = self.fsop2.do_write(file=file, offset=offset, content=content, mode=mode, encoding=encoding, errors=errors, whence=whence, user=user)
        assert self.equal(result1, result2), red(f'write:\nresult1 is {result1}\nresult2 is {result2}')
    
    # TODO: fix hardcode mode
    @rule(file = Files.filter(lambda x: x != multiple()), 
        offset = st_offset, 
        lines = st_lines,
        mode = st_open_mode,
        whence = st_whence,
        user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('writelines'))
    def writelines(self, file, offset, lines, mode, whence, user='root'):
        result1 = self.fsop1.do_writelines(file=file, offset=offset, lines=lines, mode=mode, whence=whence, user=user)
        result2 = self.fsop2.do_writelines(file=file, offset=offset, lines=lines, mode=mode, whence=whence, user=user)
        assert self.equal(result1, result2), red(f'write:\nresult1 is {result1}\nresult2 is {result2}')
    

    @rule(file = Files.filter(lambda x: x != multiple()),
          offset = st.integers(min_value=0, max_value=MAX_FILE_SIZE),
          length = st.integers(min_value=0, max_value=MAX_FALLOCATE_LENGTH),
          mode = st.just(0), 
          user = st.sampled_from(SUDO_USERS)
          )
    @precondition(lambda self: self.should_run('fallocate') and not self.use_sdk)
    def fallocate(self, file, offset, length, mode, user='root'):
        result1 = self.fsop1.do_fallocate(file, offset, length, mode, user)
        result2 = self.fsop2.do_fallocate(file, offset, length, mode, user)
        assert self.equal(result1, result2), red(f'fallocate:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(src = Files.filter(lambda x: x != multiple()),
        dst = Files.filter(lambda x: x != multiple()),
        src_offset = st_offset,
        dst_offset = st_offset,
        count = st_length,
        user = st.sampled_from(SUDO_USERS)
    )
    @precondition(lambda self: self.should_run('copy_file_range') and not self.use_sdk)
    def copy_file_range(self, src, dst, src_offset, dst_offset, count, user):
        result1 = self.fsop1.do_copy_file_range(src=src, dst=dst, src_offset=src_offset, dst_offset=dst_offset, count=count, user=user)
        result2 = self.fsop2.do_copy_file_range(src=src, dst=dst, src_offset=src_offset, dst_offset=dst_offset, count=count, user=user)
        assert self.equal(result1, result2), red(f'copy_file_range:\nresult1 is {result1}\nresult2 is {result2}')

    @rule( file = Files.filter(lambda x: x != multiple()), 
          mode = st_open_mode,
          encoding = st_open_encoding,
          errors = st_open_errors,
          offset = st_offset, 
          length = st.integers(min_value=0, max_value=MAX_FILE_SIZE), 
          whence = st_whence,
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('read'))
    def read(self, file, mode, offset, length, whence=os.SEEK_CUR, encoding=None, errors=None, user='root'):
        result1 = self.fsop1.do_read(file=file, mode=mode, length=length, offset=offset, whence=whence, user=user, encoding=encoding, errors=errors)
        result2 = self.fsop2.do_read(file=file, mode=mode, length=length, offset=offset, whence=whence, user=user, encoding=encoding, errors=errors)
        assert self.equal(result1, result2), red(f'read:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule( file = Files.filter(lambda x: x != multiple()), 
          mode = st_open_mode,
          offset = st_offset, 
          whence = st_whence,
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('readlines'))
    def readlines(self, file, mode, offset, whence=os.SEEK_CUR, user='root'):
        result1 = self.fsop1.do_readlines(file=file, mode=mode, offset=offset, whence=whence, user=user)
        result2 = self.fsop2.do_readlines(file=file, mode=mode, offset=offset, whence=whence, user=user)
        assert self.equal(result1, result2), red(f'readlines:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule( file = Files.filter(lambda x: x != multiple()), 
          mode = st_open_mode,
          offset = st_offset, 
          whence = st_whence,
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('readline'))
    def readline(self, file, mode, offset, whence=os.SEEK_CUR, user='root'):
        result1 = self.fsop1.do_readline(file=file, mode=mode, offset=offset, whence=whence, user=user)
        result2 = self.fsop2.do_readline(file=file, mode=mode, offset=offset, whence=whence, user=user)
        assert self.equal(result1, result2), red(f'readline:\nresult1 is {result1}\nresult2 is {result2}')
    

    @rule(file=Files.filter(lambda x: x != multiple()), 
          size=st.integers(min_value=0, max_value=MAX_TRUNCATE_LENGTH), 
          user=st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('truncate'))
    def truncate(self, file, size, user='root'):
        result1 = self.fsop1.do_truncate(file=file, size=size, user=user)
        result2 = self.fsop2.do_truncate(file=file, size=size, user=user)
        assert self.equal(result1, result2), red(f'truncate:\nresult1 is {result1}\nresult2 is {result2}')
    
    @rule(target=Files, 
          parent = Folders.filter(lambda x: x != multiple()), 
          file_name = st_file_name, 
          content = st_content,
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask)
    @precondition(lambda self: self.should_run('create_file'))
    def create_file(self, parent, file_name, content, mode='xb', buffering=-1, user='root', umask=0o022):
        result1 = self.fsop1.do_create_file(parent=parent, file_name=file_name, mode=mode, buffering=buffering, content=content, user=user, umask=umask)
        result2 = self.fsop2.do_create_file(parent=parent, file_name=file_name, mode=mode, buffering=buffering, content=content, user=user, umask=umask)
        assert self.equal(result1, result2), red(f'create_file:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, file_name)

    @rule(dir = Folders.filter(lambda x: x != multiple()), 
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('listdir'))
    def listdir(self, dir, user='root'):
        result1 = self.fsop1.do_listdir(dir=dir, user=user)
        result2 = self.fsop2.do_listdir(dir=dir, user=user)
        assert self.equal(result1, result2), red(f'listdir:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(
          target = Files,
          file = consumes(Files).filter(lambda x: x != multiple()),
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('unlink'))
    def unlink(self, file, user='root'):
        result1 = self.fsop1.do_unlink(file=file, user=user)
        result2 = self.fsop2.do_unlink(file=file, user=user)
        assert self.equal(result1, result2), red(f'unlink:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return file
        else:
            return multiple()
            
    @rule( target=Files, 
          entry = consumes(Files).filter(lambda x: x != multiple()),
          parent = Folders, 
          new_entry_name = st_file_name, 
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask)
    @precondition(lambda self: self.should_run('rename_file'))
    def rename_file(self, entry, parent, new_entry_name, user='root', umask=0o022):
        result1 = self.fsop1.do_rename(entry=entry, parent=parent, new_entry_name=new_entry_name, user=user, umask=umask)
        result2 = self.fsop2.do_rename(entry=entry, parent=parent, new_entry_name=new_entry_name, user=user, umask=umask)
        assert self.equal(result1, result2), red(f'rename_file:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return entry
        else:
            return os.path.join(parent, new_entry_name)
        
    @rule( target=Folders, 
          entry = consumes(Folders).filter(lambda x: x != multiple()), 
          parent = Folders, 
          new_entry_name = valid_dir_name(),
          user = st.sampled_from(SUDO_USERS),
          umask = st_umask)
    @precondition(lambda self: self.should_run('rename_dir'))
    def rename_dir(self, entry, parent, new_entry_name, user='root', umask=0o022):
        result1 = self.fsop1.do_rename(entry=entry, parent=parent, new_entry_name=new_entry_name, user=user, umask=umask)
        result2 = self.fsop2.do_rename(entry=entry, parent=parent, new_entry_name=new_entry_name, user=user, umask=umask)
        assert self.equal(result1, result2), red(f'rename_dir:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return entry
        else:
            return os.path.join(parent, new_entry_name)
        

    @rule( target=Files, entry = Files.filter(lambda x: x != multiple()),
          parent = Folders.filter(lambda x: x != multiple()),
          new_entry_name = st_file_name, 
          follow_symlinks = st.booleans(),
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask )
    @precondition(lambda self: self.should_run('copy_file') and not self.use_sdk)
    def copy_file(self, entry, parent, new_entry_name, follow_symlinks, user='root',  umask=0o022):
        result1 = self.fsop1.do_copy_file(entry, parent, new_entry_name, follow_symlinks, user, umask)
        result2 = self.fsop2.do_copy_file(entry, parent, new_entry_name, follow_symlinks, user, umask)
        assert self.equal(result1, result2), red(f'copy_file:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, new_entry_name)
    
    @rule( target=Files, entry = Files.filter(lambda x: x != multiple()),
          parent = Folders.filter(lambda x: x != multiple()),
          new_entry_name = st_file_name, 
          preserve = st.just(False),
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask )
    @precondition(lambda self: self.should_run('clone_cp_file') \
                  and (self.fsop1.singlezone or self.fsop2.singlezone))
    def clone_cp_file(self, entry, parent, new_entry_name, preserve, user='root', umask=0o022):
        result1 = self.fsop1.do_clone_entry(entry, parent, new_entry_name, preserve, user, umask)
        result2 = self.fsop2.do_clone_entry(entry, parent, new_entry_name, preserve, user, umask)
        assert type(result1) == type(result2), red(f'clone_cp_file:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            assert result1 == result2, red(f'clone_cp_file:\nresult1 is {result1}\nresult2 is {result2}')
            return os.path.join(parent, new_entry_name)
        
    @rule( target=Folders, 
          entry = Folders.filter(lambda x: x != multiple()),
          parent = Folders.filter(lambda x: x != multiple()),
          new_entry_name = valid_dir_name(), 
          preserve = st.just(False),
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask,
    )
    @precondition(lambda self: self.should_run('clone_cp_dir') \
                  and (self.fsop1.singlezone or self.fsop2.singlezone))
    def clone_cp_dir(self, entry, parent, new_entry_name, preserve, user, umask):
        result1 = self.fsop1.do_clone_entry(entry, parent, new_entry_name, preserve, user, umask)
        result2 = self.fsop2.do_clone_entry(entry, parent, new_entry_name, preserve, user, umask)
        assert self.equal(result1, result2), red(f'clone_cp_dir:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            assert result1 == result2, red(f'clone_cp_dir:\nresult1 is {result1}\nresult2 is {result2}')
            return os.path.join(parent, new_entry_name)

    @rule( target = Folders, 
          parent = Folders.filter(lambda x: x != multiple()),
          subdir = valid_dir_name(),
          mode = st_entry_mode,
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask)
    @precondition(lambda self: self.should_run('mkdir'))
    def mkdir(self, parent, subdir, mode, user='root', umask=0o022):
        result1 = self.fsop1.do_mkdir(parent=parent, subdir=subdir, mode=mode, user=user, umask=umask)
        result2 = self.fsop2.do_mkdir(parent=parent, subdir=subdir, mode=mode, user=user, umask=umask)
        assert self.equal(result1, result2), red(f'mkdir:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, subdir)

    @rule( target = Folders,
          dir = consumes(Folders).filter(lambda x: x != multiple()),
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('rmdir'))
    def rmdir(self, dir, user='root'):
        assume(dir != '')
        result1 = self.fsop1.do_rmdir(dir=dir, user=user)
        result2 = self.fsop2.do_rmdir(dir=dir, user=user)
        assert self.equal(result1, result2), red(f'rmdir:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return dir
        else:
            return multiple()

    @rule(target = Files, 
          src_file = Files.filter(lambda x: x != multiple()), 
          parent = Folders.filter(lambda x: x != multiple()), 
          link_file_name = st_file_name, 
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask)
    @precondition(lambda self: self.should_run('hardlink'))
    def hardlink(self, src_file, parent, link_file_name, user='root', umask=0o022):
        result1 = self.fsop1.do_hardlink(src_file=src_file, parent=parent, link_file_name=link_file_name, user=user, umask=umask)
        result2 = self.fsop2.do_hardlink(src_file=src_file, parent=parent, link_file_name=link_file_name, user=user, umask=umask)
        assert self.equal(result1, result2), red(f'hardlink:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, link_file_name)
    
    @rule(target = Files , 
          src_file = Files.filter(lambda x: x != multiple()), 
          parent = Folders.filter(lambda x: x != multiple()),
          link_file_name = st_file_name, 
          user = st.sampled_from(SUDO_USERS), 
          umask = st_umask)
    @precondition(lambda self: self.should_run('symlink'))
    def symlink(self, src_file, parent, link_file_name, user='root', umask=0o022):
        result1 = self.fsop1.do_symlink(src_file=src_file, parent=parent, link_file_name=link_file_name, user=user, umask=umask)
        result2 = self.fsop2.do_symlink(src_file=src_file, parent=parent, link_file_name=link_file_name, user=user, umask=umask)
        assert self.equal(result1, result2), red(f'symlink:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, link_file_name)

    @rule(target = Files , 
          parent = Folders.filter(lambda x: x != multiple()),
          link_file_name = st_file_name, 
          user = st.sampled_from(SUDO_USERS)
          )
    @precondition(lambda self: self.should_run('loop_symlink'))
    def loop_symlink(self, parent, link_file_name, user='root'):
        result1 = self.fsop1.do_loop_symlink(parent=parent, link_file_name=link_file_name, user=user)
        result2 = self.fsop2.do_loop_symlink(parent=parent, link_file_name=link_file_name, user=user)
        assert self.equal(result1, result2), red(f'loop_symlink:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, link_file_name)

    @rule(file = Files.filter(lambda x: x != multiple()),
          user = st.sampled_from(SUDO_USERS)
    )
    @precondition(lambda self: self.should_run('readlink'))
    def readlink(self, file, user='root'):
        result1 = self.fsop1.do_readlink(file=file, user=user)
        result2 = self.fsop2.do_readlink(file=file, user=user)
        assert self.equal(result1, result2), red(f'read_link:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(target=Xattrs, 
          file = Files.filter(lambda x: x != multiple()), 
          name = st_xattr_name,
          value = st_xattr_value, 
          flag = st_xattr_flag,
          user = st.sampled_from(SUDO_USERS)
        )
    @precondition(lambda self: self.should_run('set_xattr'))
    def set_xattr(self, file, name, value, flag, user='root'):
        result1 = self.fsop1.do_set_xattr(file=file, name=name, value=value, flag=flag, user=user)
        result2 = self.fsop2.do_set_xattr(file=file, name=name, value=value, flag=flag, user=user)
        assert self.equal(result1, result2), red(f'set_xattr:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return (file, name)

    @rule(xattr = Xattrs.filter(lambda x: x != multiple()),
          user = st.sampled_from(SUDO_USERS)
    )
    @precondition(lambda self: self.should_run('get_xattr'))
    def get_xattr(self, xattr, user):
        result1 = self.fsop1.do_get_xattr(file=xattr[0], name=xattr[1], user=user)
        result2 = self.fsop2.do_get_xattr(file=xattr[0], name=xattr[1], user=user)
        assert self.equal(result1, result2), red(f'get_xattr:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(file=Files.filter(lambda x: x != multiple()), 
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('list_xattr'))
    def list_xattr(self, file, user='root'):
        result1 = self.fsop1.do_list_xattr(file=file, user=user)
        result2 = self.fsop2.do_list_xattr(file=file, user=user)
        assert self.equal(result1, result2), red(f'list_xattr:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(
        target = Xattrs,
        xattr = consumes(Xattrs).filter(lambda x: x != multiple()), 
        user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('remove_xattr'))
    def remove_xattr(self, xattr, user='root'):
        result1 = self.fsop1.do_remove_xattr(file=xattr[0], name=xattr[1], user=user)
        result2 = self.fsop2.do_remove_xattr(file=xattr[0], name=xattr[1], user=user)
        assert self.equal(result1, result2), red(f'remove_xattr:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return xattr
        else:
            return multiple()
        
    @rule(user = st.sampled_from(USERS).filter(lambda x: x != 'root'), 
          group = st.sampled_from(GROUPS),
          groups = st.lists(st.sampled_from(GROUPS), unique=True))
    @precondition(lambda self: self.should_run('change_groups') and not self.use_sdk)
    def change_groups(self, user, group, groups):
        self.fsop1.do_change_groups(user, group, groups)
        self.fsop2.do_change_groups(user, group, groups)

    @rule(entry = Entries.filter(lambda x: x != multiple()), 
          mode = st_entry_mode, 
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('chmod'))
    def chmod(self, entry, mode, user='root'):
        result1 = self.fsop1.do_chmod(entry=entry, mode=mode, user=user)
        result2 = self.fsop2.do_chmod(entry=entry, mode=mode, user=user)
        assert self.equal(result1, result2), red(f'chmod:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(entry = Entries.filter(lambda x: x != multiple()))
    @precondition(lambda self: self.should_run('get_acl') and not self.use_sdk)
    def get_acl(self, entry):
        result1 = self.fsop1.do_get_acl(entry)
        result2 = self.fsop2.do_get_acl(entry)
        assert self.equal(result1, result2), red(f'get_acl:\nresult1 is {result1}\nresult2 is {result2}')

    
    @rule(entry = EntryWithACL.filter(lambda x: x != multiple()), 
          option = st.sampled_from(['--remove-all', '--remove-default']),
          user = st.sampled_from(SUDO_USERS)
          )
    @precondition(lambda self: self.should_run('remove_acl') and not self.use_sdk)
    def remove_acl(self, entry: str, option: str, user='root'):
        result1 = self.fsop1.do_remove_acl(entry, option, user)
        result2 = self.fsop2.do_remove_acl(entry, option, user)
        assert self.equal(result1, result2), red(f'remove_acl:\nresult1 is {result1}\nresult2 is {result2}')

    @rule(
          target=EntryWithACL,
          sudo_user = st.sampled_from(SUDO_USERS),
          entry = Entries.filter(lambda x: x != multiple()), 
          user=st.sampled_from(USERS+['']),
          user_perm = st.sets(st.sampled_from(['r', 'w', 'x'])),
          group=st.sampled_from(GROUPS+['']),
          group_perm = st.sets(st.sampled_from(['r', 'w', 'x'])),
          other_perm = st.sets(st.sampled_from(['r', 'w', 'x'])),
          set_mask = st.booleans(),
          mask = st.sets(st.sampled_from(['r', 'w', 'x'])),
          default = st.booleans(),
          recursive = st.booleans(),
          recalc_mask = st.booleans(),
          not_recalc_mask = st.booleans(),
          logical = st.booleans(),
          physical = st.booleans(),
          )
    @precondition(lambda self: self.should_run('set_acl') and not self.use_sdk)
    def set_acl(self, sudo_user, entry, user, user_perm, group, group_perm, other_perm, set_mask, mask, default, recursive, recalc_mask, not_recalc_mask, logical, physical):
        result1 = self.fsop1.do_set_acl(sudo_user, entry, user, user_perm, group, group_perm, other_perm, set_mask, mask, default, recursive, recalc_mask, not_recalc_mask, logical, physical)
        result2 = self.fsop2.do_set_acl(sudo_user, entry, user, user_perm, group, group_perm, other_perm, set_mask, mask, default, recursive, recalc_mask, not_recalc_mask, logical, physical)
        assert self.equal(result1, result2), red(f'set_acl:\nresult1 is {result1}\nresult2 is {result2}')
        if isinstance(result1, Exception):
            return multiple()
        else:
            return entry

    @rule(entry = Entries.filter(lambda x: x != multiple()),
          access_time=st_time, 
          modify_time=st_time, 
          follow_symlinks=st.booleans(), 
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('utime') and False)
    def utime(self, entry, access_time, modify_time, follow_symlinks, user='root'):
        result1 = self.fsop1.do_utime(entry=entry, access_time=access_time, modify_time=modify_time, follow_symlinks=follow_symlinks, user=user)
        result2 = self.fsop2.do_utime(entry=entry, access_time=access_time, modify_time=modify_time, follow_symlinks=follow_symlinks, user=user)
        assert self.equal(result1, result2), red(f'utime:\nresult1 is {result1}\nresult2 is {result2}')


    @rule(entry = Entries.filter(lambda x: x != multiple()), 
          owner= st.sampled_from(USERS), 
          user = st.sampled_from(SUDO_USERS))
    @precondition(lambda self: self.should_run('chown'))
    def chown(self, entry, owner, user='root'):
        result1 = self.fsop1.do_chown(entry=entry, owner=owner, user=user)
        result2 = self.fsop2.do_chown(entry=entry, owner=owner, user=user)
        assert self.equal(result1, result2), red(f'chown:\nresult1 is {result1}\nresult2 is {result2}')
     
    @rule( dir =Folders, vdirs = st.integers(min_value=2, max_value=31) )
    @precondition(lambda self: self.should_run('split_dir') \
                  and (self.fsop1.is_jfs or self.fsop2.is_jfs) \
                  and not self.use_sdk
    )
    def split_dir(self, dir, vdirs):
        self.fsop1.do_split_dir(dir, vdirs)
        self.fsop2.do_split_dir(dir, vdirs)

    @rule(dir = Folders)
    @precondition(lambda self: self.should_run('merge_dir') \
                 and (self.fsop1.is_jfs or self.fsop2.is_jfs) \
                 and not self.use_sdk
    )
    def merge_dir(self, dir):
        self.fsop1.do_merge_dir(dir)
        self.fsop2.do_merge_dir(dir)
    
    @rule(dir = Folders,
          zone1=st.sampled_from(common.get_zones(ROOT_DIR1)),
          zone2=st.sampled_from(common.get_zones(ROOT_DIR2)),
          is_vdir=st.booleans())
    @precondition(lambda self: self.should_run('rebalance_dir') \
                   and (self.fsop1.is_jfs or self.fsop2.is_jfs) \
                   and not self.use_sdk \
                   and os.getenv('PROFILE', 'dev') != 'generate'
    )
    def rebalance_dir(self, dir, zone1, zone2, is_vdir, pysdk=True):
        self.fsop1.do_rebalance(entry=dir, zone=zone1, is_vdir=is_vdir, pysdk=pysdk)
        self.fsop2.do_rebalance(entry=dir, zone=zone2, is_vdir=is_vdir, pysdk=pysdk)

    @rule(file = Files, 
          zone1=st.sampled_from(common.get_zones(ROOT_DIR1)),
          zone2=st.sampled_from(common.get_zones(ROOT_DIR2)),
          )
    @precondition(lambda self: self.should_run('rebalance_file') \
                   and (self.fsop1.is_jfs or self.fsop2.is_jfs) \
                   and not self.use_sdk \
                   and os.getenv('PROFILE', 'dev') != 'generate'
    )
    def rebalance_file(self, file, zone1, zone2, pysdk=True):
        self.fsop1.do_rebalance(entry=file, zone=zone1, is_vdir=False, pysdk=pysdk)
        self.fsop2.do_rebalance(entry=file, zone=zone2, is_vdir=False, pysdk=pysdk)

    def teardown(self):
        if self.check_dangling:
            self.fsop1.do_check_dangling_files()
            self.fsop2.do_check_dangling_files()

if __name__ == '__main__':
    MAX_EXAMPLE=int(os.environ.get('MAX_EXAMPLE', '100'))
    STEP_COUNT=int(os.environ.get('STEP_COUNT', '50'))
    ci_db = DirectoryBasedExampleDatabase(".hypothesis/examples")    
    settings.register_profile("dev", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain])
    settings.register_profile("schedule", max_examples=1000, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=200, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain], 
        database=ci_db)
    settings.register_profile("pull_request", max_examples=100, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=50, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain], 
        database=ci_db)
    settings.register_profile("generate", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, \
        phases=[Phase.generate, Phase.target])
    
    if os.environ.get('CI'):
        event_name = os.environ.get('GITHUB_EVENT_NAME')
        if event_name == 'schedule':
            profile = 'schedule'
        else:
            profile = 'pull_request'
    else:
        profile = os.environ.get('PROFILE', 'dev')
    print(f'profile is {profile}')
    settings.load_profile(profile)
    juicefs_machine = JuicefsMachine.TestCase()
    juicefs_machine.runTest()
    print(json.dumps(FsOperation.stats.get(), sort_keys=True, indent=4))

================================================
FILE: .github/scripts/hypo/fs_acl_test.py
================================================
import unittest
from fs import JuicefsMachine

class TestFsrand2(unittest.TestCase):
    def test_acl_913(self):
        # See: https://github.com/juicedata/jfs/issues/913
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='aaaa', mode='w', parent=v1, user='root')
        v3 = state.set_acl(default=False, entry=v1, group='root', group_perm=set(), logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=False, set_mask=False, sudo_user='root', user='user1', user_perm=set())
        state.chmod(entry=v1, mode=4, user='root')
        state.set_acl(default=False, entry=v1, group='root', group_perm=set(), logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=True, set_mask=False, sudo_user='user1', user='root', user_perm=set())
        state.teardown()

    def test_acl_1004(self):
        # SEE https://github.com/juicedata/jfs/issues/1004
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.listdir(dir=v1, user='root')
        state.change_groups(group='root', groups=[], user='user1')
        v2 = state.set_acl(default=False, entry=v1, group='root', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=False, set_mask=False, sudo_user='root', user='root', user_perm=set())
        state.listdir(dir=v1, user='user1')
        state.teardown()

    def test_acl_1006(self):
        # SEE https://github.com/juicedata/jfs/issues/1006
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.create_file(content=b'', file_name='aaaa', mode='w', parent=v1, umask=0, user='root')
        state.set_acl(default=False, entry=v1, group='root', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=True, set_mask=False, sudo_user='root', user='root', user_perm=set())
        state.set_acl(default=False, entry=v1, group='user1', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=False, set_mask=False, sudo_user='root', user='root', user_perm=set())
        state.set_acl(default=False, entry=v1, group='root', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=True, set_mask=False, sudo_user='user1', user='root', user_perm=set())
        state.teardown()

    def test_acl_1011(self):
        # SEE https://github.com/juicedata/jfs/issues/1011
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.chmod(entry=v1, mode=0, user='root')
        state.split_dir(dir=v1, vdirs=2)
        state.change_groups(group='root', groups=[], user='user1')
        v2 = state.set_acl(default=False, entry=v1, group='root', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=False, set_mask=False, sudo_user='root', user='root', user_perm=set())
        v3 = state.create_file(content=b'', file_name='aaaa', mode='w', parent=v1, umask=0, user='root')
        state.listdir(dir=v1, user='user1')
        state.teardown()

    def test_acl_1015(self):
        # SEE: https://github.com/juicedata/jfs/issues/1015
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='aaaa', mode='w', parent=v1, umask=0, user='root')
        state.set_acl(default=False, entry=v1, group='root', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm={'r', 'w', 'x'}, physical=False, recalc_mask=False, recursive=True, set_mask=True, sudo_user='root', user='user1', user_perm={'r', 'w', 'x'})
        state.set_acl(default=False, entry=v1, group='root', group_perm={'r'}, logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=True, set_mask=False, sudo_user='user1', user='root', user_perm=set())
        state.teardown()

    def test_acl_1022(self):
        # SEE https://github.com/juicedata/jfs/issues/1022
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.create_file(content=b'\xda\x07', file_name='lbca', mode='w', parent=v1, umask=103, user='root')
        state.set_acl(default=False, entry=v1, group='user1', group_perm={'r', 'w'}, logical=False, mask={'r', 'w', 'x'}, not_recalc_mask=True, other_perm=set(), physical=True, recalc_mask=True, recursive=True, set_mask=True, sudo_user='root', user='root', user_perm={'r', 'w', 'x'})
        state.chmod(entry=v1, mode=0o4004, user='root')
        state.set_acl(default=True, entry=v1, group='group4', group_perm={'x'}, logical=False, mask={'w', 'x'}, not_recalc_mask=False, other_perm=set(), physical=True, recalc_mask=False, recursive=True, set_mask=True, sudo_user='user1', user='user2', user_perm=set())
        state.teardown()

    def test_acl_1044(self):
        # SEE: https://github.com/juicedata/jfs/issues/1044
        state = JuicefsMachine()
        v1 = state.init_folders()
        v3 = state.create_file(content=b'', file_name='aaca', mode='wb', parent=v1, umask=0, user='root')
        v4 = state.set_xattr(file=v3, flag=2, name='user.0', user='root', value=b"abc")
        v5 = state.set_acl(default=False, entry=v3, group='root', group_perm={'r'}, logical=False, mask={'r'}, not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=False, set_mask=False, sudo_user='root', user='root', user_perm={'r'})
        state.remove_acl(entry=v3, option='--remove-all', user='root')
        state.list_xattr(file=v3, user='root')
        state.teardown()

    def test_acl_4458(self):
        # SEE: https://github.com/juicedata/juicefs/issues/4458
        state = JuicefsMachine()
        v1 = state.init_folders()
        v3 = state.set_acl(default=True, entry=v1, group='root', group_perm=set(), logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=True, recursive=True, set_mask=True, sudo_user='root', user='user1', user_perm={v1, 'r', 'w', 'x'})
        state.create_file(content=b'', file_name='afds', mode='w', parent=v1, umask=295, user='root')
        state.teardown()

    def test_acl_4472(self):
        # SEE: https://github.com/juicedata/juicefs/issues/4472
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='stsn', mode='xb', parent=v1, umask=464, user='root')
        v3 = state.set_acl(default=True, entry=v1, group='group4', group_perm={'x'}, logical=False, mask={'w'}, not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=True, recursive=True, set_mask=True, sudo_user='root', user='root', user_perm={'r'})
        v8 = state.create_file(content=b'', file_name='qpyt', mode='wb', parent=v1, umask=233, user='root')
        v9 = state.copy_file(entry=v2, follow_symlinks=False, new_entry_name='knmh', parent=v1, umask=23, user='root')
        state.open(file=v8, flags=[512], mode=2579, umask=34, user='root')
        state.teardown()

    def test_acl_4483(self):
        # SEE https://github.com/juicedata/juicefs/issues/4483
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.set_acl(default=True, entry=v1, group='root', group_perm={'r'}, logical=True, mask={'r'}, not_recalc_mask=False, other_perm={'r', 'x'}, physical=False, recalc_mask=False, recursive=True, set_mask=True, sudo_user='user1', user='user2', user_perm={'r', 'w', 'x'})
        v4 = state.create_file(content=b'\xe65', file_name='abha', mode='ab', parent=v1, umask=3, user='root')
        v5 = state.set_acl(default=False, entry=v4, group='user3', group_perm={'x'}, logical=False, mask={'x'}, not_recalc_mask=True, other_perm=set(), physical=True, recalc_mask=True, recursive=False, set_mask=False, sudo_user='root', user='user1', user_perm=set())
        state.list_xattr(file=v4, user='root')
        state.teardown()

    def test_acl_4496(self):
        # SEE https://github.com/juicedata/juicefs/issues/4496
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.chmod(entry=v1, mode=3291, user='root')
        state.remove_acl(entry=v1, option='--remove-default', user='user1')
        v40 = state.mkdir(mode=1122, parent=v1, subdir='uopt', umask=367, user='root')
        state.chown(entry=v40, owner='user1', user='root')
        state.change_groups(group='group4', groups=['group2'], user='user1')
        state.set_acl(default=False, entry=v40, group='group2', group_perm={'r', 'w', 'x'}, logical=True, mask={'r', 'w', 'x'}, not_recalc_mask=True, other_perm={'x'}, physical=False, recalc_mask=True, recursive=False, set_mask=False, sudo_user='user1', user=v1, user_perm=set())
        state.teardown()

    def test_acl_4663(self):
        #SEE https://github.com/juicedata/juicefs/issues/4663
        state = JuicefsMachine()
        v1 = state.init_folders()
        v3 = state.set_acl(default=True, entry=v1, group=v1, group_perm=set(), logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=False, recursive=False, set_mask=False, sudo_user='root', user=v1, user_perm={'r'})
        state.mkdir(mode=0, parent=v1, subdir='aaaa', umask=0, user='root')
        state.teardown()

    def skip_test_acl_2044(self):
        #SEE https://github.com/juicedata/jfs/issues/2044
        for i in range(5):
            state = JuicefsMachine()
            folders_0 = state.init_folders()
            files_0 = state.create_file(content=b'$\xca<', file_name='f', parent=folders_0, umask=18, user='root')
            files_1 = state.rename_file(entry=files_0, new_entry_name='yedw', parent=folders_0, umask=18, user='root')
            state.set_acl(default=True, entry=files_1, group='user2', group_perm=set(), logical=False, mask=set(), not_recalc_mask=False, other_perm=set(), physical=False, recalc_mask=True, recursive=False, set_mask=False, sudo_user='root', user='root', user_perm=set())
            state.open(file=files_1, flags=[0, 64, 2, 512, 4096, 1, 1052672, 1024, 128], mode=231, umask=18, user='root')
            state.rebalance_file(file=files_1, zone1=folders_0, zone2='.jfszone1')
            files_2 = state.hardlink(link_file_name='a', parent=folders_0, src_file=files_1, umask=18, user='root')
            folders_1 = state.mkdir(mode=76, parent=folders_0, subdir='j', umask=18, user='root')
            files_3 = state.hardlink(link_file_name='v', parent=folders_0, src_file=files_1, umask=18, user='root')
            files_4 = state.rename_file(entry=files_1, new_entry_name='ypzn', parent=folders_0, umask=18, user='root')
            files_5 = state.copy_file(entry=files_2, follow_symlinks=True, new_entry_name='iydv', parent=folders_1, umask=18, user='root')
            state.open(file=files_2, flags=[4096, 128], mode=250, umask=18, user='root')
            entry_with_acl_0 = state.set_acl(default=False, entry=files_4, group='group1', group_perm=set(), logical=False, mask=set(), not_recalc_mask=False, other_perm={'x'}, physical=True, recalc_mask=True, recursive=False, set_mask=True, sudo_user='root', user='user1', user_perm=set())
            state.unlink(file=files_2, user='root')
            state.fallocate(file=files_4, length=66667, mode=0, offset=6713, user='root')
            state.open(file=files_5, flags=[64], mode=441, umask=18, user='root')
            state.set_acl(default=True, entry=files_4, group='group3', group_perm={'x'}, logical=False, mask={'r', 'w', 'x'}, not_recalc_mask=False, other_perm={'r', 'w', 'x'}, physical=True, recalc_mask=True, recursive=False, set_mask=True, sudo_user='root', user='user3', user_perm=set())
            state.remove_acl(entry=entry_with_acl_0, option='--remove-all', user='root')
            files_7 = state.rename_file(entry=files_3, new_entry_name='fgq', parent=folders_0, umask=18, user='root')
            state.chmod(entry=files_7, mode=433, user='root')
            state.remove_acl(entry=entry_with_acl_0, option='--remove-default', user='root')
            state.teardown()

if __name__ == '__main__':
    unittest.main()

================================================
FILE: .github/scripts/hypo/fs_op.py
================================================
import io
import os
import pwd
import re
import shutil
import stat
import subprocess

try: 
    __import__('xattr')
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
import xattr
from common import get_acl, get_root, red
from typing import Dict
try: 
    __import__('fallocate')
except ImportError:
    subprocess.check_call(["pip", "install", "fallocate"])
import fallocate
from stats import Statistics
import common
from os.path import dirname
import sys
sys.path.append('.')
from sdk.python.juicefs.juicefs import juicefs

class FsOperation:
    JFS_CONTROL_FILES=['.accesslog', '.config', '.stats']
    stats = Statistics()
    
    def __init__(self, name, root_dir:str, mount_point=None, use_sdk:bool=False, is_jfs=False, volume_name=None, meta_url=None):
        self.logger =common.setup_logger(f'./{name}.log', name, os.environ.get('LOG_LEVEL', 'INFO'))
        self.root_dir = root_dir.rstrip('/')
        self.use_sdk = use_sdk
        self.is_jfs = is_jfs
        self.singlezone = False
        if is_jfs:
            self.singlezone = len(common.get_zones(root_dir)) == 1
        if mount_point:
            self.mount_point = mount_point
        else:
            self.mount_point = common.get_root(self.root_dir)
        self.client = None
        if use_sdk and self.is_jfs:
            if meta_url:
                self.client = juicefs.Client(volume_name, meta_url, access_log="/tmp/jfs.log")
            else:
                self.client = juicefs.Client(volume_name, conf_dir='deploy/docker', access_log="/tmp/jfs.log")
        self.client2 = None

    def get_client_for_rebalance(self):
        if self.client2 == None:
            self.client2 = juicefs.Client(common.get_volume_name(self.root_dir), 
                                          conf_dir='deploy/docker', 
                                          access_log="/tmp/rebalance.log", 
                                          attr_cache="0s",
                                          entry_cache="0s", 
                                          dir_entry_cache="0s",)
        return self.client2

    def run_cmd(self, command:str) -> str:
        self.logger.info(f'run_cmd: {command}')
        if '|' in command or '>' in command or '&' in command:
            ret=os.system(command)
            if ret == 0:
                return ret
            else: 
                raise Exception(f"run command {command} failed with {ret}")
        try:
            output = subprocess.run(command.split(), check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            raise e
        return output.stdout.decode()

    def get_zones(self):
        return common.get_zones(self.root_dir)

    def init_rootdir(self):
        if self.client:
            self.logger.debug(f'init_rootdir {self.root_dir} with use_sdk={self.use_sdk}')
            sdk_root_dir = self.get_sdk_path(self.root_dir)
            if self.client.exists(sdk_root_dir):
                self.client.rmr(sdk_root_dir)
                assert not self.client.exists(sdk_root_dir), red(f'{self.root_dir} should not exist')
            self.client.makedirs(sdk_root_dir)
            assert self.client.exists(sdk_root_dir), red(f'{self.root_dir} should exist')
        else:
            if not os.path.exists(self.root_dir):
                os.makedirs(self.root_dir)
            if os.environ.get('PROFILE', 'dev') != 'generate':
                common.clean_dir(self.root_dir)

    def seteuid(self, user, action=''):
        if self.client:
            return
        uid = pwd.getpwnam(user).pw_uid
        gid = pwd.getpwnam(user).pw_gid
        os.setegid(gid)
        os.seteuid(uid)
        self.logger.debug(f'{action} seteuid uid={uid} gid={gid} succeed')

    def reset_euid(self, action=''):
        if self.client:
            return
        os.setegid(0) 
        os.seteuid(0)
        self.logger.debug(f'{action} reset euid and egid succeed')
        
    def handleException(self, e, action, path, **kwargs):
        if isinstance(e, subprocess.CalledProcessError):
            err = e.output.decode()
        else:
            err = type(e).__name__ + ":" + str(e)
        err = '\n'.join([elem.split('<FATAL>:')[-1].split('<ERROR>:')[-1] for elem in err.split('\n')])
        err = re.sub(r'\[\w+\.go:\d+\]', '', err)
        if err.find('setfacl') != -1 and err.find('\n') != -1:
            err = '\n'.join(sorted(err.split('\n')))
        err = self.parse_pysdk_error(err)
        self.stats.failure(action)
        self.logger.info(f'{action} {path} {kwargs} failed: {err}')
        return Exception(err)
    
    def parse_pysdk_error(self, err:str):
        # error message : call jfs_rename failed: [Errno 22] Invalid argument: (b'/fsrand', b'/fsrand/izsn/rfnn', c_uint(0))
        if not err.startswith("call jfs_"):
            return err
        return re.sub(r'call jfs_\w+ failed: ', '', err)
    
    def get_sdk_path(self, abspath):
        return '/'+os.path.relpath(abspath, self.mount_point)

    def do_remove_dangling_files(self):
        if not self.is_jfs or self.use_sdk:
            self.logger.debug(f'do_remove_dangling_files {self.mount_point} skip')
            return
        self.logger.debug(f'do_remove_dangling_files {self.mount_point}')
        zones = common.get_zones(self.mount_point)
        for zone in zones:
            zone_dir = os.path.join(self.mount_point, zone)
            entries = os.listdir(zone_dir)
            for entry in entries:
                if 'dangling' in entry:
                    abspath = os.path.join(zone_dir, entry)
                    if os.path.isdir(abspath):
                        shutil.rmtree(abspath)
                    elif os.path.isfile(abspath):
                        os.unlink(abspath)
            backup_dir = os.path.join(zone_dir, '.backup')
            if os.path.exists(backup_dir):
                entries = os.listdir(backup_dir)
                for entry in entries:
                    if 'dangling' in entry:
                        abspath = os.path.join(backup_dir, entry)
                        if os.path.isdir(abspath):
                            shutil.rmtree(abspath)
                        elif os.path.isfile(abspath):
                            os.unlink(abspath)

        self.logger.info(f'do_remove_dangling_files {self.mount_point} succeed')

    def do_check_dangling_files(self):
        if not self.is_jfs or self.use_sdk:
            self.logger.debug(f'do_check_dangling_files {self.mount_point} skip')
            return
        self.logger.debug(f'do_check_dangling_files {self.mount_point}')
        zones = common.get_zones(self.mount_point)
        for zone in zones:
            zone_dir = os.path.join(self.mount_point, zone)
            entries = os.listdir(zone_dir)
            for entry in entries:
                if 'dangling' in entry:
                    assert False, red(f'{entry} should not exist in {zone_dir}')
            backup_dir = os.path.join(zone_dir, '.backup')
            if os.path.exists(backup_dir):
                entries = os.listdir(backup_dir)
                for entry in entries:
                    if 'dangling' in entry:
                        assert False, red(f'{entry} should not exist in {backup_dir}')
        self.logger.info(f'do_check_dangling_files {self.mount_point} succeed')

    def do_stat(self, entry, user):
        self.logger.debug(f'do_stat {self.root_dir} {entry}')
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.seteuid(user, action='do_stat')
            if self.client:
                st = self.client.stat(self.get_sdk_path(abspath))
            else:
                st = os.stat(abspath)
        except Exception as e :
            return self.handleException(e, 'do_stat', abspath, entry=entry, user=user)
        finally:
            self.reset_euid(action='do_stat')
        self.stats.success('do_stat')
        self.logger.info(f'do_stat {abspath} with user={user} succeed')
        self.logger.debug(f'do_stat st is {st}')
        return common.get_stat_field(st)
   
    def do_lstat(self, entry, user):
        self.logger.debug(f'do_lstat {self.root_dir} {entry}')
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.seteuid(user)
            if self.client:
                st = self.client.lstat(self.get_sdk_path(abspath))
            else:
                st = os.lstat(abspath)
        except Exception as e :
            return self.handleException(e, 'do_lstat', abspath, entry=entry, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_lstat')
        self.logger.info(f'do_lstat {abspath} with user={user} succeed')
        return common.get_stat_field(st)

    def do_exists(self, entry, user):
        self.logger.debug(f'do_exists {self.root_dir} {entry}')
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.seteuid(user)
            if self.client:
                exists = self.client.exists(self.get_sdk_path(abspath))
            else:
                exists = os.path.exists(abspath)
        except Exception as e :
            return self.handleException(e, 'do_exists', abspath, entry=entry, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_exists')
        self.logger.info(f'do_exists {abspath} with user={user} succeed')
        return exists

    def do_open(self, file, flags, mask, mode, user):
        self.logger.debug(f'do_open {self.root_dir} {file} {flags} {mode} {user}')
        abspath = os.path.join(self.root_dir, file)
        flag = 0
        fd = -1
        for f in flags:
            flag |= f
        try:
            old_mask = os.umask(mask)
            self.seteuid(user)
            fd = os.open(abspath, flags=flag, mode=mode)
        except Exception as e :
            return self.handleException(e, 'do_open', abspath, flags=flags, mode=mode, user=user)
        finally:
            self.reset_euid()
            os.umask(old_mask)
            if fd > 0:
                os.close(fd)
        self.stats.success('do_open')
        self.logger.info(f'do_open {abspath} {flags} {mode} succeed')
        return self.do_stat(file, user)
    
    def do_open2(self, file, mode, user):
        self.logger.debug(f'do_open2 {self.root_dir} {file} {mode} {user}')
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode) as f:
                    pass
            else:
                with open(abspath, mode) as f:
                    pass
        except Exception as e :
            return self.handleException(e, 'do_open2', abspath, mode=mode, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_open2')
        self.logger.info(f'do_open2 {abspath} {mode} succeed')
        return self.do_stat(file, user)

    def do_write(self, file, content, mode:str, encoding, errors, offset, whence, user):
        self.logger.debug(f'do_write {self.root_dir} {file} {offset}')
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                size = self.client.stat(self.get_sdk_path(abspath)).st_size
            else:
                size = os.stat(abspath).st_size
            if size == 0:
                offset = 0
            else:
                offset = offset % size
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode, encoding=encoding, errors=errors) as f:
                    f.seek(offset, whence)
                    count=f.write(content)
            else:
                with open(abspath, mode, encoding=encoding, errors=errors) as f:
                    f.seek(offset, whence)
                    count=f.write(content)
        except (io.UnsupportedOperation) as e:
            e = Exception(f'io.UnsupportedOperation: write')
            return self.handleException(e, 'do_write', abspath, offset=offset, whence=whence, mode=mode, user=user)
        except Exception as e :
            return self.handleException(e, 'do_write', abspath, offset=offset, whence=whence, mode=mode, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_write')
        self.logger.info(f'do_write {abspath} offset={offset} whence={whence} mode={mode} user={user} succeed')
        return count, self.do_stat(file, user)
        
    def do_writelines(self, file, lines, mode, offset, whence, user):
        self.logger.debug(f'do_writelines {self.root_dir} {file} {offset}')
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                size = self.client.stat(self.get_sdk_path(abspath)).st_size
            else:
                size = os.stat(abspath).st_size
            if size == 0:
                offset = 0
            else:
                offset = offset % size
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode) as f:
                    f.seek(offset, whence)
                    f.writelines(lines)
            else:
                with open(abspath, mode) as f:
                    # f.seek(offset, whence)
                    f.seek(offset, whence)
                    f.writelines(lines)
        except (TypeError,io.UnsupportedOperation) as e:
            self.logger.debug(f'writelines: {str(e)}')
            e = Exception(f'writelines')
            return self.handleException(e, 'do_writelines', abspath, offset=offset, whence=whence, mode=mode, user=user)
        except Exception as e :
            return self.handleException(e, 'do_writelines', abspath, offset=offset, whence=whence, mode=mode, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_writelines')
        self.logger.info(f'do_writelines {abspath} offset={offset} whence={whence} mode={mode} user={user} succeed')
        return self.do_stat(file, user)

    def do_fallocate(self, file, offset, length, mode, user):
        self.logger.debug(f'do_fallocate {self.root_dir} {file} {offset} {length} {mode} {user}')
        abspath = os.path.join(self.root_dir, file)
        fd = -1
        try:
            self.seteuid(user)
            file_size = os.stat(abspath).st_size
            if file_size == 0:
                offset = 0
            else:
                offset = offset % file_size
            fd = os.open(abspath, os.O_RDWR)
            fallocate.fallocate(fd, offset, length, mode)
        except Exception as e :
            return self.handleException(e, 'do_fallocate', abspath, offset=offset, length=length, mode=mode, user=user)
        finally:
            if fd > 0:
                os.close(fd)
            self.reset_euid()
        self.stats.success('do_fallocate')
        self.logger.info(f'do_fallocate {abspath} offset={offset} length={length} mode={mode} user={user} succeed')
        return self.do_stat(file, user)

    def do_copy_file_range(self, src, dst, src_offset, dst_offset, count, user):
        self.logger.debug(f'do_copy_file_range {self.root_dir} {src} {dst} {src_offset} {dst_offset} {count} {user}')
        src_abspath = os.path.join(self.root_dir, src)
        dst_abspath = os.path.join(self.root_dir, dst)
        src_fd = -1
        dst_fd = -1
        try:
            self.seteuid(user)
            src_fd = os.open(src_abspath, os.O_RDONLY)
            dst_fd = os.open(dst_abspath, os.O_WRONLY)
            os.copy_file_range(src_fd, dst_fd, count, src_offset, dst_offset)
        except Exception as e :
            return self.handleException(e, 'do_copy_file_range', src_abspath, dst_abspath=dst_abspath, src_offset=src_offset, dst_offset=dst_offset, count=count, user=user)
        finally:
            if src_fd > 0:
                os.close(src_fd)
            if dst_fd > 0:
                os.close(dst_fd)
            self.reset_euid()
        self.stats.success('do_copy_file_range')
        self.logger.info(f'do_copy_file_range {src_abspath} {dst_abspath} src_offset={src_offset} dst_offset={dst_offset} count={count} user={user} succeed')
        return self.do_stat(dst, user)

    def do_read(self, file, length, mode, offset, whence, user, encoding, errors):
        self.logger.debug(f'do_read {self.root_dir} {file} {mode} {length} {offset} {whence}')
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                size = self.client.stat(self.get_sdk_path(abspath)).st_size
            else:
                size = os.stat(abspath).st_size
            if size == 0:
                offset = 0
            else:
                offset = offset % size
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode, encoding=encoding, errors=errors) as f:
                    f.seek(offset, whence)
                    result = f.read(length)    
            else:
                with open(abspath, mode, encoding=encoding, errors=errors) as f: 
                    # f.seek(offset, whence)
                    f.seek(offset, whence)
                    result = f.read(length)
            if isinstance(result, str):
                result = result.replace('\r', '\n') # SEE: https://github.com/juicedata/jfs/issues/1472
                result = result.encode()
            # result = binascii.hexlify(result)
        except UnicodeDecodeError as e:
            # SEE: https://github.com/juicedata/jfs/issues/1450#issuecomment-2213518638
            self.logger.debug(f'UnicodeDecodeError: {e.encoding} {e.object} {e.start} {e.end} {e.reason}')
            e = UnicodeDecodeError(e.encoding, e.object, 0, 0, e.reason)
            return self.handleException(e, 'do_read', abspath, offset=offset, length=length, whence=whence, user=user)
        except io.UnsupportedOperation as e:
            e = Exception(f'io.UnsupportedOperation: read')
            return self.handleException(e, 'do_read', abspath, offset=offset, length=length, whence=whence, user=user)
        except Exception as e :
            return self.handleException(e, 'do_read', abspath, offset=offset, length=length, whence=whence, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_read')
        self.logger.info(f'do_read {abspath} mode={mode} length={length} offset={offset} whence={whence} user={user} succeed')
        return (result, )

    def do_readlines(self, file, mode, offset, whence, user):
        self.logger.debug(f'do_readlines {self.root_dir} {file} {mode} {offset} {whence}')
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                size = self.client.stat(self.get_sdk_path(abspath)).st_size
            else:
                size = os.stat(abspath).st_size
            if size == 0:
                offset = 0
            else:
                offset = offset % size
            self.logger.debug(f'do_readlines offset={offset} size={size}')
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode) as f:
                    f.seek(offset, whence)
                    result = ''.join(f.readlines())
            else:
                with open(abspath, mode) as f:
                    # f.seek(offset, whence)
                    f.seek(offset, whence)
                    result = ''.join(f.readlines())
            if isinstance(result, str):
                result = result.replace('\r', '\n') # SEE: https://github.com/juicedata/jfs/issues/1472
                result = result.encode()
            # result = binascii.hexlify(result)
        except UnicodeDecodeError as e:
            # SEE: https://github.com/juicedata/jfs/issues/1450#issuecomment-2213518638
            self.logger.debug(f'UnicodeDecodeError: {e.encoding} {e.object} {e.start} {e.end} {e.reason}')
            e = UnicodeDecodeError(e.encoding, e.object, 0, 0, e.reason)
            return self.handleException(e, 'do_readlines', abspath, offset=offset, whence=whence, user=user)
        except io.UnsupportedOperation as e:
            e = Exception(f'io.UnsupportedOperation: readlines')
            return self.handleException(e, 'do_readlines', abspath, offset=offset, whence=whence, user=user)
        except Exception as e :
            return self.handleException(e, 'do_readlines', abspath, offset=offset, whence=whence, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_readlines')
        self.logger.info(f'do_readlines {abspath} mode={mode} offset={offset} whence={whence} user={user} succeed')
        return (result, )

    def do_readline(self, file, mode, offset, whence, user):
        self.logger.debug(f'do_readline {self.root_dir} {file} {mode} {offset} {whence}')
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                size = self.client.stat(self.get_sdk_path(abspath)).st_size
            else:
                size = os.stat(abspath).st_size
            if size == 0:
                offset = 0
            else:
                offset = offset % size
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode) as f:
                    f.seek(offset, whence)
                    result = f.readline()
            else:
                with open(abspath, mode) as f:
                    # f.seek(offset, whence)
                    f.seek(offset, whence)
                    result = f.readline()
            if isinstance(result, str):
                result = result.replace('\r', '\n') # SEE: https://github.com/juicedata/jfs/issues/1472
                result = result.encode()
            # result = binascii.hexlify(result)
        except UnicodeDecodeError as e:
            # SEE: https://github.com/juicedata/jfs/issues/1450#issuecomment-2213518638
            self.logger.debug(f'UnicodeDecodeError: {e.encoding} {e.object} {e.start} {e.end} {e.reason}')
            e = UnicodeDecodeError(e.encoding, e.object, 0, 0, e.reason)
            return self.handleException(e, 'do_readline', abspath, offset=offset, whence=whence, user=user)
        except io.UnsupportedOperation as e:
            e = Exception(f'io.UnsupportedOperation: readline')
            return self.handleException(e, 'do_readline', abspath, offset=offset, whence=whence, user=user)
        except Exception as e :
            return self.handleException(e, 'do_readline', abspath, offset=offset, whence=whence, user=user)
        finally:
            self.reset_euid()

        self.stats.success('do_readline')
        self.logger.info(f'do_readline {abspath} mode={mode} offset={offset} whence={whence} user={user} succeed')
        return (result, )

    def do_truncate(self, file, size, user):
        self.logger.debug(f'do_truncate {self.root_dir} {file} {size}')
        abspath = os.path.join(self.root_dir, file)
        fd = -1
        try:
            self.seteuid(user, action='do_truncate')
            if self.client:
                st = self.client.stat(self.get_sdk_path(abspath))
            else:
                st = os.stat(abspath)
            if st.st_size == 0:
                size = 0
            else:
                size = size % st.st_size
            if self.client:
                self.client.truncate(self.get_sdk_path(abspath), size)
                st = self.client.stat(self.get_sdk_path(abspath))
            else:
                os.truncate(abspath, size)
                st = os.stat(abspath)
        except Exception as e :
            return self.handleException(e, 'do_truncate', abspath, size=size, user=user)
        finally:
            if fd > 0:
                os.close(fd)
            self.reset_euid()
        assert st.st_size == size, red(f'do_truncate: {abspath} size should be {size} but {st.st_size}')
        self.stats.success('do_truncate')
        self.logger.info(f'do_truncate {abspath} size={size} user={user} succeed')
        return self.do_stat(file, user)

    def do_create_file(self, parent, file_name, content, mode='xb', user='root', umask=0o022, buffering=-1):
        relpath = os.path.join(parent, file_name)
        abspath = os.path.join(self.root_dir, relpath)
        try:
            old_umask = os.umask(umask)
            self.seteuid(user, action='do_create_file')
            if self.client:
                with self.client.open(self.get_sdk_path(abspath), mode, buffering=buffering) as f:
                    f.write(content)
                    count=f.write(content)
            else:
                with open(abspath, mode, buffering=buffering) as f:
                    f.write(content)
                    count=f.write(content)
        except Exception as e :
            return self.handleException(e, 'do_create_file', abspath, mode=mode, user=user)
        finally:
            self.reset_euid(action='do_create_file')
            os.umask(old_umask)
        self.stats.success('do_create_file')
        self.logger.info(f'do_create_file {abspath} with mode {mode} succeed')
        return count, self.do_stat(relpath, user)
    
    def do_listdir(self, dir, user):
        abspath = os.path.join(self.root_dir, dir)
        try:
            self.seteuid(user)
            if self.client:
                li = self.client.listdir(self.get_sdk_path(abspath))
            else:
                li = os.listdir(abspath) 
            li = sorted(list(filter(lambda x: x not in self.JFS_CONTROL_FILES, li)))
        except Exception as e:
            return self.handleException(e, 'do_listdir', abspath, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_listdir')
        self.logger.info(f'do_listdir {abspath} with user={user} succeed')
        return tuple(li)

    def do_unlink(self, file, user):
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                self.client.unlink(self.get_sdk_path(abspath))
            else:
                os.unlink(abspath)
        except Exception as e:
            return self.handleException(e, 'do_unlink', abspath, user=user)
        finally:
            self.reset_euid()
        assert not os.path.exists(abspath), red(f'do_unlink: {abspath} should not exist')
        self.stats.success('do_unlink')
        self.logger.info(f'do_unlink {abspath} with user={user} succeed')
        return True 

    def do_rename(self, entry, parent, new_entry_name, user, umask):
        abspath = os.path.join(self.root_dir, entry)
        new_relpath = os.path.join(parent, new_entry_name)
        new_abspath = os.path.join(self.root_dir, new_relpath)
        try:
            self.seteuid(user)
            old_umask = os.umask(umask)
            if self.client:
                path = self.get_sdk_path(abspath)
                new_path = self.get_sdk_path(new_abspath)
                self.client.rename(path, new_path)
            else:
                os.rename(abspath, new_abspath)
        except Exception as e:
            return self.handleException(e, 'do_rename', abspath, new_abspath=new_abspath, user=user)
        finally:
            self.reset_euid()
            os.umask(old_umask)
        if not self.use_sdk:
            assert os.path.lexists(new_abspath), red(f'do_rename: {new_abspath} should exist')
        self.stats.success('do_rename')
        self.logger.info(f'do_rename {abspath} {new_abspath} with user={user} succeed')
        return self.do_stat(new_relpath, user)

    def do_copy_file(self, entry, parent, new_entry_name, follow_symlinks, user, umask):
        abspath = os.path.join(self.root_dir, entry)
        new_relpath = os.path.join(parent, new_entry_name)
        new_abspath = os.path.join(self.root_dir, new_relpath)
        try:
            old_umask = os.umask(umask)
            self.seteuid(user)
            shutil.copy(abspath, new_abspath, follow_symlinks=follow_symlinks)
        except Exception as e:
            return self.handleException(e, 'do_copy_file', abspath, new_abspath=new_abspath, user=user, follow_symlinks=follow_symlinks, umask=umask)
        finally:
            self.reset_euid()
            os.umask(old_umask)
        assert os.path.lexists(new_abspath), red(f'do_copy_file: {new_abspath} should exist')
        self.stats.success('do_copy_file')
        self.logger.info(f'do_copy_file {abspath} {new_abspath} with follow_symlinks={follow_symlinks} user={user} umask={umask} succeed')
        return self.do_stat(new_relpath, user)

    def can_clone(self, src_dir, dst_dir):
        if os.path.commonpath([src_dir]) == os.path.commonpath([src_dir, dst_dir]) or \
              os.path.commonpath([dst_dir]) == os.path.commonpath([src_dir, dst_dir]):
            return False
        if os.path.exists(dst_dir):
            return False
        return True
        
    def do_clone_entry(self,  entry, parent, new_entry_name, preserve, user='root', umask=0o022, mount='./juicefs'):
        root_dir = self.root_dir
        abspath = os.path.join(root_dir, entry)
        new_relpath = os.path.join(parent, new_entry_name)
        new_abspath = os.path.join(root_dir, new_relpath)
        if not self.can_clone(abspath, new_abspath):
            return self.handleException(Exception(f'can not clone {abspath} to {new_abspath}'), 'do_clone_entry', abspath, new_abspath=new_abspath, user=user)
        try:
            old_umask = os.umask(umask)
            if self.is_jfs:
                if preserve:
                    self.run_cmd(f'sudo -u {user} {mount} clone {abspath} {new_abspath} --preserve')
                else:
                    self.run_cmd(f'sudo -u {user} {mount} clone {abspath} {new_abspath}')
            else:
                if preserve:
                    self.run_cmd(f'sudo -u {user} cp -r {abspath} {new_abspath} -L --preserve=all')
                else:
                    self.run_cmd(f'sudo -u {user} cp -r {abspath} {new_abspath} -L')
        except subprocess.CalledProcessError as e:
            self.logger.error(f'run command failed: {e.output.decode()}')
            return self.handleException(Exception(f'do_clone_entry failed'), 'do_clone_entry', abspath, new_abspath=new_abspath, user=user)
        finally:
            os.umask(old_umask)
        assert os.path.lexists(new_abspath), red(f'do_clone_entry: {new_abspath} should exist')
        self.stats.success('do_clone_entry')
        self.logger.info(f'do_clone_entry {abspath} {new_abspath} succeed')
        return self.do_stat(new_relpath, user)
    
    def do_copy_tree(self, entry, parent, new_entry_name, symlinks, ignore_dangling_symlinks, dir_exist_ok, user, umask):
        abspath = os.path.join(self.root_dir, entry)
        new_relpath = os.path.join(parent, new_entry_name)
        new_abspath = os.path.join(self.root_dir, new_relpath)
        try:
            old_mask = os.umask(umask)
            self.seteuid(user)
            shutil.copytree(abspath, new_abspath, \
                            symlinks=symlinks, \
                            ignore_dangling_symlinks=ignore_dangling_symlinks, \
                            dirs_exist_ok=dir_exist_ok)
        except Exception as e:
            return self.handleException(e, 'do_copy_tree', abspath, new_abspath=new_abspath, user=user)
        finally:
            self.reset_euid()
            os.umask(old_mask)
        assert os.path.lexists(new_abspath), red(f'do_copy_tree: {new_abspath} should exist')
        self.stats.success('do_copy_tree')
        self.logger.info(f'do_copy_tree {abspath} {new_abspath} succeed')
        return self.do_stat(new_relpath, user)

    def do_mkdir(self, parent, subdir, mode, user, umask):
        relpath = os.path.join(parent, subdir)
        abspath = os.path.join(self.root_dir, relpath)
        try:
            self.seteuid(user)
            old_mask = os.umask(umask)
            if self.client:
                sdk_path = self.get_sdk_path(abspath)
                self.client.mkdir(sdk_path, mode)
                st = self.client.stat(sdk_path)
            else:
                os.mkdir(abspath, mode)
                st = os.stat(abspath)
        except Exception as e:
            return self.handleException(e, 'do_mkdir', abspath, mode=mode, user=user)
        finally:
            self.reset_euid()
            os.umask(old_mask)
        assert stat.S_ISDIR(st.st_mode), red(f'do_mkdir: {abspath} should be dir')
        self.stats.success('do_mkdir')
        self.logger.info(f'do_mkdir {abspath} with mode={oct(mode)} user={user} succeed')
        return self.do_stat(entry=relpath, user=user)
    
    def do_rmdir(self, dir, user):
        abspath = os.path.join(self.root_dir, dir)
        try:
            self.seteuid(user)
            if self.client:
                self.client.rmdir(self.get_sdk_path(abspath))
                exist = self.client.exists(self.get_sdk_path(abspath))
            else:
                os.rmdir(abspath)
                exist = os.path.exists(abspath)
        except Exception as e:
            return self.handleException(e, 'do_rmdir', abspath, user=user)
        finally:
            self.reset_euid()
        assert not exist, red(f'do_rmdir: {abspath} should not exist')
        self.stats.success('do_rmdir')
        self.logger.info(f'do_rmdir {abspath} with user={user} succeed')
        return True

    def do_hardlink(self, src_file, parent, link_file_name, user, umask):
        src_abs_path = os.path.join(self.root_dir, src_file)
        link_rel_path = os.path.join(parent, link_file_name)
        link_abs_path = os.path.join(self.root_dir, link_rel_path)
        try:
            self.seteuid(user)
            old_mask = os.umask(umask)
            if self.client:
                path = self.get_sdk_path(src_abs_path)
                link_path = self.get_sdk_path(link_abs_path)
                self.client.link(path, link_path)
            else:
                os.link(src_abs_path, link_abs_path)
        except Exception as e:
            return self.handleException(e, 'do_hardlink', src_abs_path, link_abs_path=link_abs_path, user=user)
        finally:
            self.reset_euid()
            os.umask(old_mask)
        # time.sleep(0.005)
        # assert st.st_nlink > 1, red(f'do_hardlink: nlink({st.st_nlink}) of {link_abs_path} should greater than 1')
        self.stats.success('do_hardlink')
        self.logger.info(f'do_hardlink {src_abs_path} {link_abs_path} with user={user} umask={oct(umask)} succeed')
        return self.do_stat(link_rel_path, user)

    def do_symlink(self, src_file, parent, link_file_name, user, umask):
        src_abs_path = os.path.join(self.root_dir, src_file)
        link_rel_path = os.path.join(parent, link_file_name)
        link_abs_path = os.path.join(self.root_dir, link_rel_path)
        relative_path = os.path.relpath(src_abs_path, os.path.dirname(link_abs_path))
        try:
            self.seteuid(user)
            old_mask = os.umask(umask)
            if self.client:
                path = self.get_sdk_path(src_abs_path)
                link_path = self.get_sdk_path(link_abs_path)
                self.client.symlink(path, link_path)
                st = self.client.lstat(link_path)
            else:
                os.symlink(relative_path, link_abs_path)
                st = os.lstat(link_abs_path)
        except Exception as e:
            return self.handleException(e, 'do_symlink', src_abs_path, link_abs_path=link_abs_path, user=user)
        finally:
            self.reset_euid()
            os.umask(old_mask)
        assert stat.S_ISLNK(st.st_mode), red(f'do_symlink: {link_abs_path} should be link')
        self.stats.success('do_symlink')
        self.logger.info(f'do_symlink {src_abs_path} {link_abs_path} with user={user} umask={oct(umask)} succeed')
        return self.do_stat(link_rel_path, user)
    
    def do_readlink(self, file, user):
        link_abs_path = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                dest = self.client.readlink(self.get_sdk_path(link_abs_path))
            else:
                dest = os.readlink(link_abs_path)
        except Exception as e:
            return self.handleException(e, 'do_read_link', link_abs_path, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_read_link')
        self.logger.info(f'do_read_link {link_abs_path} with user={user} succeed')
        return (dest,)

    def do_loop_symlink(self, parent, link_file_name, user='root'):
        link_abs_path = os.path.join(self.root_dir, parent, link_file_name)
        try:
            self.seteuid(user)
            if self.client:
                sdk_path = self.get_sdk_path(link_abs_path)
                self.client.symlink(sdk_path, sdk_path)
            else:
                os.symlink(link_file_name, link_abs_path)
        except Exception as e:
            return self.handleException(e, 'do_loop_symlink', link_abs_path)
        finally:
            self.reset_euid()
        self.stats.success('do_loop_symlink')
        self.logger.info(f'do_loop_symlink {link_abs_path} succeed')
        return True
    
    def do_set_xattr(self, file, name, value, flag, user):
        xattr_map = {0:0, xattr.XATTR_CREATE: juicefs.XATTR_CREATE, xattr.XATTR_REPLACE: juicefs.XATTR_REPLACE}
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                flag = xattr_map[flag]
                self.client.setxattr(self.get_sdk_path(abspath), name, value, flag)
            else:
                xattr.setxattr(abspath, name, value, flag)
        except Exception as e:
            return self.handleException(e, 'do_set_xattr', abspath, name=name, value=value, flag=flag, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_set_xattr')
        self.logger.info(f"do_set_xattr {abspath} with name={name} value={value} flag={flag} user={user} succeed")
        return 'succeed'

    def do_get_xattr(self, file, name, user):
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                value = self.client.getxattr(self.get_sdk_path(abspath), name)
            else:
                value = xattr.getxattr(abspath, name)
        except Exception as e:
            return self.handleException(e, 'do_get_xattr', abspath, name=name, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_get_xattr')
        self.logger.info(f"do_get_xattr {abspath} with name={name} user={user} succeed")
        return (value,)

    def do_list_xattr(self, file, user):
        abspath = os.path.join(self.root_dir, file)
        xattr_list = []
        try:
            self.seteuid(user)    
            if self.client:
                path = self.get_sdk_path(abspath)
                xattrs = self.client.listxattr(path)
            else:
                xattrs = xattr.listxattr(abspath)
            xattr_list = []
            for attr in xattrs:
                if self.client:
                    path = self.get_sdk_path(abspath)
                    value = self.client.getxattr(path, attr)
                else:
                    value = xattr.getxattr(abspath, attr)
                xattr_list.append((attr, value))
            xattr_list.sort()  # Sort the list based on xattr names
        except Exception as e:
            return self.handleException(e, 'do_list_xattr', abspath, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_list_xattr')
        self.logger.info(f"do_list_xattr {abspath} with user={user} succeed")
        return xattr_list

    def do_remove_xattr(self, file, name, user):
        abspath = os.path.join(self.root_dir, file)
        try:
            self.seteuid(user)
            if self.client:
                self.client.removexattr(self.get_sdk_path(abspath), name)
            else:
                xattr.removexattr(abspath, name)
            # self.run_cmd(f'sudo -u {user} setfattr -x {name} {abspath}', root_dir)
        except Exception as e:
            return self.handleException(e, 'do_remove_xattr', abspath, name=name, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_remove_xattr')
        self.logger.info(f"do_remove_xattr {abspath} name={name} user={user} succeed")
        return 'succeed'
    
    def do_change_groups(self, user, group, groups):
        try:
            subprocess.run(['usermod', '-g', group, '-G', ",".join(groups), user], check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            self.stats.failure('do_change_groups')
            self.logger.info(f"do_change_groups {user} {group} {groups} failed: {e.output.decode()}")
            return
        self.stats.success('do_change_groups')
        self.logger.info(f"do_change_groups {user} {group} {groups} succeed")

    def do_chmod(self, entry, mode, user):
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.seteuid(user)
            if self.client:
                self.client.chmod(self.get_sdk_path(abspath), mode)
            else:
                os.chmod(abspath, mode)
            # self.run_cmd(f'sudo -u {user} chmod {mode} {abspath}', root_dir)
        except Exception as e:
            return self.handleException(e, 'do_chmod', abspath, mode=mode, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_chmod')
        self.logger.info(f"do_chmod {abspath} mode={oct(mode)} user={user} succeed")
        return self.do_stat(entry, user)

    def do_get_acl(self,  entry: str):
        abspath = os.path.join(self.root_dir, entry)
        try:
            acl = get_acl(abspath)
        except Exception as e:
            return self.handleException(e, 'do_get_acl', abspath)
        self.stats.success('do_get_acl')
        self.logger.info(f"do_get_acl {abspath} succeed")
        return acl

    def do_remove_acl(self,  entry: str, option: str, user: str):
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.run_cmd(f'sudo -u {user} setfacl {option} {abspath} ')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_acl', abspath, option=option,user=user)
        self.stats.success('do_remove_acl')
        self.logger.info(f"do_remove_acl {abspath} with {option} succeed")
        return get_acl(abspath)
    
    def do_set_acl(self, sudo_user, entry, user, user_perm, group, group_perm, other_perm, set_mask, mask, default, recursive, recalc_mask, not_recalc_mask, logical, physical):
        abspath = os.path.join(self.root_dir, entry)
        user_perm = ''.join(user_perm) == '' and '-' or ''.join(user_perm)
        group_perm = ''.join(group_perm) == '' and '-' or ''.join(group_perm)
        other_perm = ''.join(other_perm) == '' and '-' or ''.join(other_perm)
        mask = ''.join(mask) == '' and '-' or ''.join(mask)
        default = default and '-d' or ''
        recursive = recursive and '-R' or ''
        recalc_mask = recalc_mask and '--mask' or ''
        not_recalc_mask = not_recalc_mask and '--no-mask' or ''
        logical = (recursive and logical) and '-L' or ''
        physical = (recursive and physical) and '-P' or ''
        try:
            text = f'u:{user}:{user_perm},g:{group}:{group_perm},o::{other_perm}'
            if set_mask:
                text += f',m::{mask}'
            self.run_cmd(f'sudo -u {sudo_user} setfacl {default} {recursive} {recalc_mask} {not_recalc_mask} {logical} {physical} -m {text} {abspath}')
            acl = get_acl(abspath)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_set_acl', abspath, user_perm=user_perm, group_perm=group_perm, other_perm=other_perm)
        self.stats.success('do_set_acl')
        self.logger.info(f"do_set_acl {abspath} with {text} succeed")
        return (acl,)

    def do_utime(self, entry, access_time, modify_time, follow_symlinks, user):
        abspath = os.path.join(self.root_dir, entry)
        try:
            self.seteuid(user)
            if self.client:
                self.client.utime(self.get_sdk_path(abspath), (access_time, modify_time))
            else:
                os.utime(abspath, (access_time, modify_time), follow_symlinks=follow_symlinks)
                # self.run_cmd(f'sudo -u {user} touch -a -t {access_time} {abspath}', root_dir)
                # self.run_cmd(f'sudo -u {user} touch -m -t {modify_time} {abspath}', root_dir)
        except Exception as e:
            return self.handleException(e, 'do_utime', abspath, access_time=access_time, modify_time=modify_time, follow_symlinks=follow_symlinks, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_utime')
        self.logger.info(f"do_utime {abspath} with access_time={access_time} modify_time={modify_time} follow_symlinks={follow_symlinks} user={user} succeed")
        return self.do_stat(entry, user)

    def do_chown(self, entry, owner, user):
        abspath = os.path.join(self.root_dir, entry)
        info = pwd.getpwnam(owner)
        uid = info.pw_uid
        gid = info.pw_gid
        try:
            self.seteuid(user)
            if self.client:
                self.client.chown(self.get_sdk_path(abspath), uid, gid)
            else:
                os.chown(abspath, uid, gid)
                # self.run_cmd(f'sudo -u {user} chown {owner} {abspath}', root_dir)
        except Exception as e:
            return self.handleException(e, 'do_chown', abspath, owner=owner, user=user)
        finally:
            self.reset_euid()
        self.stats.success('do_chown')
        self.logger.info(f"do_chown {abspath} with owner={owner} user={user} succeed")
        return self.do_stat(entry, user)

    def do_split_dir(self, dir, vdirs):
        relpath = os.path.join(dir, f'.jfs_split#{vdirs}')
        abspath = os.path.join(self.root_dir, relpath)
        if not self.is_jfs:
            return 
        try:
            subprocess.check_call(['touch', abspath], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        except Exception as e:
            self.stats.failure('do_split_dir')
            self.logger.info(f"do_split_dir {abspath} {vdirs} failed: {str(e)}")
            return
        self.stats.success('do_split_dir')
        self.logger.info(f"do_split_dir {abspath} {vdirs} succeed")

    def do_merge_dir(self, dir):
        relpath = os.path.join(dir, f'.jfs_split#1')
        abspath = os.path.join(self.root_dir, relpath)
        if not self.is_jfs:
            return 
        try:
            subprocess.run(['touch', abspath], check=True, capture_output=True, text=True)
        except subprocess.CalledProcessError as e:
            error = f'{e.cmd} exit with {e.returncode}, {e.stderr}'.strip()
            self.stats.failure('do_merge_dir')
            self.logger.info(f"do_merge_dir {abspath} failed: {error}")
            return
        self.stats.success('do_merge_dir')
        self.logger.info(f"do_merge_dir {abspath} succeed")

    def do_rebalance_with_pysdk(self, entry, zone, is_vdir):
        if zone == '':
            # print(f'{self.root_dir} is not multizoned, skip rebalance')
            return
        abspath = os.path.join(self.root_dir, entry)
        vdir_relpath = os.path.join(entry, '.jfs#1')
        vdir_abspath = os.path.join(self.root_dir, vdir_relpath)
        if is_vdir and os.path.isfile( vdir_abspath ):
            abspath = vdir_abspath
        try :
            dest = os.path.join(get_root(abspath), zone, os.path.basename(abspath.rstrip('/')))
            os.rename(abspath, dest)
        except Exception as e:
            self.stats.failure('do_rebalance')
            self.logger.info(f"do_rebalance {abspath} {dest} failed: {str(e)}")
            return
        self.stats.success('do_rebalance')
        self.logger.info(f"do_rebalance {abspath} {dest} succeed")

    def do_rebalance(self, entry, zone, is_vdir, pysdk=True):
        if zone == '':
            # print(f'{self.root_dir} is not multizoned, skip rebalance')
            return
        abspath = os.path.join(self.root_dir, entry)
        vdir_relpath = os.path.join(entry, '.jfs#1')
        vdir_abspath = os.path.join(self.root_dir, vdir_relpath)
        if is_vdir and os.path.isfile( vdir_abspath ):
            abspath = vdir_abspath
        try :
            dest = os.path.join(get_root(abspath), zone, os.path.basename(abspath.rstrip('/')))
            if pysdk:
                client = self.get_client_for_rebalance()
                if client.exists(self.get_sdk_path(abspath)):
                    client.rename(self.get_sdk_path(abspath), self.get_sdk_path(dest))
            else:
                if os.path.exists(abspath):
                    os.rename(abspath, dest)
        except OSError as e:
            self.stats.failure('do_rebalance')
            self.logger.info(f"do_rebalance {abspath} {dest} failed: {str(e)}")
        self.stats.success('do_rebalance')
        self.logger.info(f"do_rebalance {abspath} {dest} succeed")

================================================
FILE: .github/scripts/hypo/fs_sdk_test.py
================================================
import unittest
import subprocess
try: 
    __import__('xattr')
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
import xattr
from fs import JuicefsMachine

class TestPySdk(unittest.TestCase):
    def test_issue_1331(self):
        # SEE: https://github.com/juicedata/jfs/issues/1331
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.mkdir(mode=0o2164, parent=v1, subdir='ouyz', umask=0o022,  user='root')
        state.teardown()

    def test_issue_1339(self):
        # SEE: https://github.com/juicedata/jfs/issues/1339
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.exists(entry=v1,  user='root')
        v2 = state.loop_symlink(link_file_name='kydl', parent=v1)
        state.exists(entry=v2,  user='root')
        state.teardown()
        
    def test_issue_1349(self):
        # SEE: https://github.com/juicedata/jfs/issues/1349
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.loop_symlink(link_file_name='pmjl', parent=v1)
        state.set_xattr(file=v2, flag=xattr.XATTR_CREATE, name='user.abc',  user='root', value=b'def')
        state.teardown()

    def test_issue_1359(self):
        # SEE: https://github.com/juicedata/jfs/issues/1359
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.merge_dir(dir=v1)
        state.create_file(buffering=0, content=b'\x16\x0cu\x01\x01\x01\x01\x01\x01', file_name='bbbb', mode='ab', parent=v1, umask=18, user='root')
        state.teardown()

    def test_issue_1361(self):
        # SEE: https://github.com/juicedata/jfs/issues/1361
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(buffering=1, content=b'', file_name='abbc', mode='ab', parent=v1, umask=18, user='root')
        state.hardlink(src_file=v2, link_file_name='aaaa', parent=v1, umask=18, user='root')
        state.teardown()

    def test_issue_1362(self):
        #SEE: https://github.com/juicedata/jfs/issues/1362
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(buffering=-1, content=b'abc', file_name='iazj', mode='ab', parent=v1, umask=18, user='root')
        state.read(file=v2, length=4949, mode='w+', offset=1, user='root', whence=2)
        state.teardown()

    def test_issue_1364(self):
        # SEE: https://github.com/juicedata/jfs/issues/1364
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(buffering=10, content=b'', file_name='abag', mode='wb', parent=v1, umask=18, user='root')
        state.set_xattr(file=v2, flag=xattr.XATTR_REPLACE, name='user.abc', user='root', value=b'def')
        state.teardown()

    def test_issue_1365(self):
        # SEE: https://github.com/juicedata/jfs/issues/1365
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.mkdir(mode=15, parent=v1, subdir='coue', umask=18, user='root')
        state.rmdir(dir=v2, user='root')
        state.teardown()
    
    def test_issue_1369(self):
        # SEE: https://github.com/juicedata/jfs/issues/1369
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(buffering=-1, content=b'', file_name='aaaa', mode='wb', parent=v1, umask=18,  user='root')
        state.write(content=b'abcd', file=v2, mode='rb', offset=0,  user='root', whence=0)
        state.teardown()

    def test_issue_1369_2(self):
        # SEE: https://github.com/juicedata/jfs/issues/1369
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(buffering=-1, content=b'\x00', file_name='aaaa', mode='wb', parent=v1, umask=18,  user='root')
        state.write(content=b'', file=v2, mode='xb', offset=0,  user='root', whence=0)
        state.teardown()

    def test_issue_1369_3(self):
        # SEE: https://github.com/juicedata/jfs/issues/1369
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'a', file_name='aaaa', mode='wb', parent=v1, umask=18,  user='root')
        state.write(content=b'b', file=v2, mode='ab', offset=0,  user='root', whence=0)
        state.teardown()

    def skip_test_issue_1370(self):
        # SEE: https://github.com/juicedata/jfs/issues/1370
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(buffering=-1, content=b'', file_name='aaab', mode='wb', parent=v1, umask=18,  user='root')
        v3 = state.symlink(src_file=v2, link_file_name='aaaa', parent=v1, umask=18, user='root')
        state.unlink(file=v2,  user='root')
        state.open2(file=v3, mode='w+',  user='root')
        state.teardown()

    def test_issue_1419(self):
        # SEE: https://github.com/juicedata/jfs/issues/1419
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.lstat(entry=v1,  user='root')
        v2 = state.create_file(content=b'^\x85\n\xa1;1*ek\xc8', file_name='d', parent=v1, umask=18,  user='root')
        state.readline(file=v2, mode='a+', offset=9070,  user='root', whence=0)
        state.teardown()

    def test_issue_1422(self):
        # SEE: https://github.com/juicedata/jfs/issues/1422
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'a', file_name='p', parent=v1, umask=18,  user='root')
        v3 = state.symlink(src_file=v2, link_file_name='ab', parent=v1, umask=18,  user='root')
        state.hardlink(src_file=v3, link_file_name='a', parent=v1, umask=18,  user='root')
        state.teardown()

    def test_issue_1424(self):
        # SEE: https://github.com/juicedata/jfs/issues/1424
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content='²', mode='a', file_name='w', parent=v1, umask=18,  user='root')
        state.readline(file=v2, mode='r', offset=1708,  user='root', whence=0)
        state.teardown()

    def test_issue_1425(self):
        # SEE: https://github.com/juicedata/jfs/issues/1425
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'a', file_name='a', parent=v1, umask=18,  user='root')
        v3 = state.mkdir(mode=0, parent=v1, subdir='b', umask=18,  user='root')
        state.rename_dir(entry=v3, new_entry_name=v2, parent=v1, umask=18,  user='root')
        state.teardown()

    def test_issue_1442(self):
        # SEE: https://github.com/juicedata/jfs/issues/1442
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'aqdranzfk', file_name='fz', parent=v1, umask=18, user='root')
        state.set_xattr(file=v2, flag=0, name='user.0', user='root', value=b'\x01\x01\x00\x01')
        state.teardown()

    def test_issue_1443(self):
        # SEE: https://github.com/juicedata/jfs/issues/1443
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'bcb', file_name='bcba', parent=v1, umask=18, user='root')
        v3 = state.hardlink(src_file=v2, link_file_name='a', parent=v1, umask=18, user='root')
        state.rename_file(entry=v2, new_entry_name=v3, parent=v1, umask=18, user='root')
        state.teardown()

    def test_issue_1449(self):
        # SEE: https://github.com/juicedata/jfs/issues/1449
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'\x1d\x00', file_name='b', parent=v1, umask=18, user='root')
        state.listdir(dir=v1, user='root')
        state.readline(file=v2, mode='r', offset=0, user='root', whence=0)
        state.teardown()

    def test_issue_1450(self):
        # SEE: https://github.com/juicedata/jfs/issues/1450
        state = JuicefsMachine()
        v1 = state.init_folders()
        v6 = state.create_file(content=b'\xa5\x08\xee', file_name='mzeg', parent=v1, umask=18, user='root')
        state.readline(file=v6, mode='r+', offset=1, user='root', whence=0)
        state.teardown()

    def skip_test_issue_1450_2(self):
        # SEE: https://github.com/juicedata/jfs/issues/1450 
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'10', file_name='v', parent=v1, umask=18, user='root')
        state.read(encoding='utf-16', errors='strict', file=v2, length=2, mode='r', offset=0, user='root', whence=0)
        state.teardown()

    def test_issue_1457(self):
        # SEE: https://github.com/juicedata/jfs/issues/1457
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content='\ufeff', mode='x', file_name='v', parent=v1, umask=18, user='root')
        state.teardown()
    
    def skip_test_issue_1465(self):
        # SEE: https://github.com/juicedata/jfs/issues/1465
        state = JuicefsMachine()
        v1 = state.init_folders()
        v38 = state.create_file(content=b'\x05{\xf3\x9bg\x93\x00\ry0', file_name='kfhg', parent=v1, umask=18, user='root')
        state.readline(file=v38, mode='rb', offset=7694, user='root', whence=1)
        state.teardown()

    def test_issue_1481(self):
        # SEE: https://github.com/juicedata/jfs/issues/1481
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='b', parent=v1, umask=18, user='root')
        state.set_xattr(file=v2, flag=0, name='user.\uda5d', user='root', value=b'!')
        state.teardown()

    def test_issue_x(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'\x035\x03\x02\x00', file_name='a', parent=v1, umask=18, user='root')
        state.lstat(entry=v1, user='root')
        v3 = state.create_file(content=b'10', file_name='v', parent=v1, umask=18, user='root')
        state.write(content=b'\x01\x01', encoding='utf-8', errors='ignore', file=v2, mode='r', offset=258, user='root', whence=1)
        state.teardown()

    def test_issue_y(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='a', parent=v1, umask=18, user='root')
        state.readlines(file=v2, mode='a', offset=0, user='root', whence=0)
        state.teardown()

    def test_issue_z(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'1}26B', file_name='gv', parent=v1, umask=18, user='root')
        # state.write(content='\x04', encoding='utf-8', errors='ignore', file=v2, mode='a', offset=4900, user='root', whence=1)
        state.writelines(file=v2, lines=['hp', 'uwq'], mode='a+b', offset=160, user='root', whence=2)
        state.teardown()

    def test_issue_a(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='a', parent=v1, umask=18, user='root')
        state.writelines(file=v2, lines=[''], mode='rb', offset=9841, user='root', whence=1)
        state.teardown()

    def test_issue_b(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        v7 = state.create_file(content=b'', file_name='vkg', parent=v1, umask=18, user='root')
        state.write(content='í', encoding='ascii', errors='strict', file=v7, mode='r', offset=5117, user='root', whence=1)
        state.teardown()

    def test_issue_c(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.create_file(content='\udfc5', mode='xb', file_name='a', parent=v1, umask=18, user='root')
        state.teardown()

    def test_issue_d(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        state.exists(entry=v1, user='root')
        v3 = state.create_file(content=b'\x10\x1ata\xd6', file_name='x', parent=v1, umask=18, user='root')
        # v15 = state.create_file(content=b'', file_name='nbln', parent=v1, umask=18, user='root')
        # v20 = state.create_file(content=b'\x82\xd7\xc0\xff\xac\x94\xe5\x8f\x03\x10', file_name='exc', parent=v1, umask=18, user='root')
        # state.write(content=b'', encoding='utf-8', errors='backslashreplace', file=v20, mode='w+', offset=3658, user='root', whence=1)
        # state.write(content=b'7q\x0b\xe4\x9f\xb4b', encoding='latin-1', errors='namereplace', file=v15, mode='r+b', offset=6691, user='root', whence=2)
        state.write(content='È\U000c3fe7𧶤÷\x89\x00𭊦cç¤Ìk', encoding='latin-1', errors='strict', file=v3, mode='r', offset=10240, user='root', whence=0)
        state.teardown()

    def test_issue_e(self):
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'abc\r', file_name='a', parent=v1, umask=18, user='root')
        state.read(file=v2, mode='r', offset=0, user='root', whence=0, length=4)
        state.teardown()

    def test_issue_f(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        files_0 = state.create_file(content=b'', file_name='b', parent=folders_0, umask=18, user='root')
        state.chown(entry=folders_0, owner='user1', user='root')
        state.create_file(content=b'', file_name='a', parent=folders_0, umask=18, user='root')
        state.teardown()

    def test_rename_invalid_arg(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        files_0 = state.loop_symlink(link_file_name='aa', parent=folders_0, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir='a', umask=18, user='root')
        folders_2 = state.mkdir(mode=0, parent=folders_1, subdir='b', umask=18, user='root')
        state.exists(entry=files_0, user='root')
        state.rename_dir(entry=folders_1, new_entry_name=folders_1, parent=folders_0, umask=18, user='root')
        state.rename_dir(entry=folders_0, new_entry_name=folders_1, parent=folders_1, umask=18, user='root')
        state.rename_dir(entry=folders_2, new_entry_name=folders_1, parent=folders_0, umask=18, user='root')
        state.teardown()

    def test_rename_to_dir_not_exist(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        files_0 = state.create_file(content=b'', file_name='aa', parent=folders_0, umask=18, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir='a', umask=18, user='root')
        state.rename_dir(entry=folders_0, new_entry_name='a/a', parent=folders_1, umask=18, user='root')
        state.teardown()

    def test_rmdir_check_exist(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir='a', umask=18, user='root')
        files_0 = state.loop_symlink(link_file_name=folders_1, parent=folders_1, user='root')
        state.exists(entry=files_0, user='root')
        state.rmdir(dir=folders_1, user='root')
        state.teardown()

    def test_truncate(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        files_0 = state.loop_symlink(link_file_name='a', parent=folders_0, user='root')
        files_1 = state.rename_file(entry=files_0, new_entry_name='b', parent=folders_0, umask=18, user='root')
        state.listdir(dir=folders_0, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir=files_0, umask=18, user='root')
        state.listdir(dir=files_0, user='root')
        state.truncate(file=files_0, size=0, user='root')
        state.teardown()

    def test_unlink(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        state.listdir(dir=folders_0, user='root')
        files_0 = state.loop_symlink(link_file_name='a', parent=folders_0, user='root')
        files_1 = state.rename_file(entry=files_0, new_entry_name='b', parent=folders_0, umask=18, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir=files_0, umask=18, user='root')
        state.unlink(file=files_0, user='root')
        state.teardown()

    def test_read_utf8(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        files_0 = state.create_file(content=b'', file_name='aa', parent=folders_0, umask=18, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir='a', umask=18, user='root')
        state.rename_dir(entry=folders_0, new_entry_name='a/a', parent=folders_1, umask=18, user='root')
        state.teardown()

    def test_create_isdir(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        state.listdir(dir=folders_0, user='root')
        files_0 = state.loop_symlink(link_file_name='a', parent=folders_0, user='root')
        state.chmod(entry=files_0, mode=0, user='root')
        files_1 = state.rename_file(entry=files_0, new_entry_name='b', parent=folders_0, umask=18, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir=files_0, umask=18, user='root')
        state.truncate(file=files_0, size=0, user='root')
        state.readlink(file=files_0, user='root')
        state.writelines(file=files_0, lines=['dua', 'hbixuhv'], mode='wb', offset=8344, user='root', whence=1)
        state.teardown()

    def test_rename_file(self):
        state = JuicefsMachine()
        folders_0 = state.init_folders()
        files_0 = state.create_file(content=b'', file_name='q', parent=folders_0, umask=18, user='root')
        state.list_xattr(file=files_0, user='root')
        files_1 = state.create_file(content=b'', file_name='ezmt', parent=folders_0, umask=18, user='root')
        files_2 = state.create_file(content=b'', file_name='a', parent=folders_0, umask=18, user='root')
        state.list_xattr(file=files_2, user='root')
        files_3 = state.rename_file(entry=files_2, new_entry_name='buln', parent=folders_0, umask=18, user='root')
        state.list_xattr(file=files_3, user='root')
        state.list_xattr(file=files_3, user='root')
        files_4 = state.create_file(content=b'', file_name='sj', parent=folders_0, umask=18, user='root')
        state.list_xattr(file=files_4, user='root')
        files_5 = state.create_file(content=b'', file_name=files_2, parent=folders_0, umask=18, user='root')
        state.list_xattr(file=files_2, user='root')
        files_6 = state.create_file(content=b'', file_name='alwr', parent=folders_0, umask=18, user='root')
        files_7 = state.create_file(content=b'', file_name='m', parent=folders_0, umask=18, user='root')
        files_8 = state.symlink(link_file_name='rd', parent=folders_0, src_file=files_7, umask=18, user='root')
        folders_1 = state.mkdir(mode=0, parent=folders_0, subdir='n', umask=18, user='root')
        state.list_xattr(file=files_8, user='root')
        state.list_xattr(file=files_8, user='root')
        folders_2 = state.mkdir(mode=0, parent=folders_1, subdir=files_2, umask=18, user='root')
        files_9 = state.symlink(link_file_name='g', parent=folders_2, src_file=files_8, umask=18, user='root')
        files_10 = state.symlink(link_file_name=files_2, parent=folders_2, src_file=files_9, umask=18, user='root')
        files_11 = state.rename_file(entry=files_3, new_entry_name=files_2, parent=folders_2, umask=18, user='root')
        state.rename_file(entry=files_9, new_entry_name=files_2, parent=folders_0, umask=18, user='root')
        state.teardown()


if __name__ == '__main__':
    unittest.main()


================================================
FILE: .github/scripts/hypo/fs_test.py
================================================
import os
import unittest
from fs import JuicefsMachine

class TestFsrand2(unittest.TestCase):
    def test_issue_910(self):
        # See: https://github.com/juicedata/jfs/issues/910
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='aaaa', mode='wb', parent=v1, user='root')
        state.chmod(entry=v1, mode=32, user='root')
        state.listdir(dir=v1, user='root')
        state.change_groups(group='root', groups=['root'], user='user1')
        state.listdir(dir=v1, user='user1')
        state.teardown()

    def test_issue_914(self):
        # See: https://github.com/juicedata/jfs/issues/914
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'yl\xff{', file_name='tadj', mode='xb', parent=v1, user='root')
        state.fallocate(file=v2, length=22911, mode=0, offset=7849, user='root')
        state.copy_file(entry=v2, follow_symlinks=True, new_entry_name='npyn', parent=v1, user='root')
        state.teardown()

    def skip_test_issue_918(self):
        # See: https://github.com/juicedata/jfs/issues/918
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='lcka', mode='wb', parent=v1, user='root')
        v3 = state.clone_cp_file(entry=v2, new_entry_name='bbbb', parent=v1, preserve=True, user='root')
        state.chmod(entry=v3, mode=258, user='root')
        v5 = state.clone_cp_file(entry=v3, new_entry_name='mbbb', parent=v1, preserve=True, user='root')
        state.teardown()

    def test_x(self):
        # See: https://github.com/juicedata/jfs/issues/918
        state = JuicefsMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='lcka', mode='wb', parent=v1, user='root')
        state.teardown()

if __name__ == '__main__':
    unittest.main()

================================================
FILE: .github/scripts/hypo/readme.md
================================================
1. format juicefs with trash day of 0. 
   ./juicefs format sqlite3://test.db myjfs
2. mount juicefs with xatrr enable.
   ./juicefs mount sqlite3://test.db /tmp/jfs --enable-xattr
3. run the test.
   python3 .github/scripts/hypo/fs.py
4. run the test with custom examples and step count to reach deep bugs.
   MAX_EXAMPLE=1000 STEP_COUNT=500 .github/scripts/hypo/fs.py
5. you can modify EXCLUDE_RULES to skip running some operations.

================================================
FILE: .github/scripts/hypo/s3.py
================================================
import json
import os
from string import ascii_lowercase
import subprocess
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from hypothesis import assume, settings, Verbosity
from hypothesis.stateful import rule, precondition, RuleBasedStateMachine, Bundle, initialize, multiple, consumes
from hypothesis import Phase, seed
from hypothesis import strategies as st
from hypothesis.database import DirectoryBasedExampleDatabase
import random
from s3_op import S3Client
from s3_strategy import *
from s3_contant import *
import common

SEED=int(os.environ.get('SEED', random.randint(0, 1000000000)))
@seed(SEED)
class S3Machine(RuleBasedStateMachine):
    aliases = Bundle('aliases')
    buckets = Bundle('buckets')
    objects = Bundle('objects')
    users = Bundle('users')
    groups = Bundle('groups')
    policies = Bundle('policies')
    user_policies = Bundle('user_policy')
    group_policies = Bundle('group_policy')
    PREFIX1 = 'minio'
    PREFIX2 = 'juice'
    URL1 = 'localhost:9000'
    URL2 = 'localhost:9005'
    URL3 = 'localhost:9006'
    client1 = S3Client(prefix=PREFIX1, url=URL1)
    client2 = S3Client(prefix=PREFIX2, url=URL2, url2=URL3)
    EXCLUDE_RULES = []

    def __init__(self):
        super().__init__()
        self.client1.remove_all_aliases()
        self.client2.remove_all_aliases()
        self.client1.do_set_alias(ROOT_ALIAS, DEFAULT_ACCESS_KEY, DEFAULT_SECRET_KEY, self.URL1)
        self.client2.do_set_alias(ROOT_ALIAS, DEFAULT_ACCESS_KEY, DEFAULT_SECRET_KEY, self.URL2)
        self.client1.remove_all_buckets()
        self.client2.remove_all_buckets()
        self.client1.remove_all_users()
        self.client2.remove_all_users()
        self.client1.remove_all_groups()
        self.client2.remove_all_groups()
        self.client1.remove_all_policies()
        self.client2.remove_all_policies()

    @initialize(target=aliases)
    def init_aliases(self):
        return ROOT_ALIAS

    @initialize(target=policies)
    def init_policies(self):
        return multiple(*BUILD_IN_POLICIES)

    def equal(self, result1, result2):
        if os.getenv('PROFILE', 'dev') == 'generate':
            return True
        if type(result1) != type(result2):
            return False
        if isinstance(result1, Exception):
            result1 = str(result1)
            result2 = str(result2)
        result1 = common.replace(result1, self.PREFIX1, '***')
        result1 = common.replace(result1, self.URL1, '***')
        result2 = common.replace(result2, self.PREFIX2, '***')
        result2 = common.replace(result2, self.URL2, '***')
        # print(f'result1 is {result1}\nresult2 is {result2}')
        return result1 == result2

    @rule(alias = aliases)
    @precondition(lambda self: False)
    def info(self, alias=ROOT_ALIAS):
        result1 = self.client1.do_info(alias)
        result2 = self.client2.do_info(alias)
        assert self.equal(result1, result2), f'\033[31minfo:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(alias = aliases)
    @precondition(lambda self: 'list_buckets' not in self.EXCLUDE_RULES)
    def list_buckets(self, alias=ROOT_ALIAS):
        result1 = self.client1.do_list_buckets(alias)
        result2 = self.client2.do_list_buckets(alias)
        assert self.equal(result1, result2), f'\033[31mdo_list_buckets:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target = buckets,
        alias = aliases,
        bucket_name = st_bucket_name)
    @precondition(lambda self: 'create_bucket' not in self.EXCLUDE_RULES)
    def create_bucket(self, bucket_name, alias = ROOT_ALIAS):
        result1 = self.client1.do_create_bucket(bucket_name, alias)
        result2 = self.client2.do_create_bucket(bucket_name, alias)
        assert self.equal(result1, result2), f'\033[31mcreate_bucket:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return bucket_name
    @rule(
        target = buckets, 
        bucket_name = consumes(buckets),
        alias = aliases
    )
    @precondition(lambda self: 'remove_bucket' not in self.EXCLUDE_RULES)
    def remove_bucket(self, bucket_name, alias = ROOT_ALIAS):
        result1 = self.client1.do_remove_bucket(bucket_name, alias)
        result2 = self.client2.do_remove_bucket(bucket_name, alias)
        assert self.equal(result1, result2), f'\033[31mremove_bucket:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return bucket_name
        else:
            return multiple()

    @rule(
        alias = aliases,
        bucket_name = buckets.filter(lambda x: x != multiple()),
        policy = st.sampled_from(['public', 'download', 'upload', 'none'])
    )
    @precondition(lambda self: 'set_bucket_policy' not in self.EXCLUDE_RULES)
    def set_bucket_policy(self, bucket_name, policy, alias=ROOT_ALIAS):
        result1 = self.client1.do_set_bucket_policy(bucket_name, policy, alias)
        result2 = self.client2.do_set_bucket_policy(bucket_name, policy, alias)
        assert self.equal(result1, result2), f'\033[31mset_bucket_policy:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        bucket_name = buckets,
        alias = aliases
    )
    @precondition(lambda self: 'get_bucket_policy' not in self.EXCLUDE_RULES)
    def get_bucket_policy(self, bucket_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_get_bucket_policy(bucket_name, alias)
        result2 = self.client2.do_get_bucket_policy(bucket_name, alias)
        assert self.equal(result1, result2), f'\033[31mget_bucket_policy:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        bucket_name = buckets,
        alias = aliases, 
        recursive = st.booleans()
    )
    def list_bucket_policy(self, bucket_name, alias=ROOT_ALIAS, recursive=False):
        result1 = self.client1.do_list_bucket_policy(bucket_name, alias, recursive)
        result2 = self.client2.do_list_bucket_policy(bucket_name, alias, recursive)
        assert self.equal(result1, result2), f'\033[31mlist_bucket_policy:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target=objects,
        bucket_name = buckets,
        object_name = st_object_name, 
        data = st_content,
        use_part_size = st.booleans(),
        part_size = st_part_size
    )
    @precondition(lambda self: 'put_object' not in self.EXCLUDE_RULES)
    def put_object(self, bucket_name, object_name, data, use_part_size=False, part_size=5*1024*1024):
        if use_part_size:
            result1 = self.client1.do_put_object(bucket_name, object_name, data, -1, part_size=part_size)
            result2 = self.client2.do_put_object(bucket_name, object_name, data, -1, part_size=part_size)
        else:
            result1 = self.client1.do_put_object(bucket_name, object_name, data, len(data))
            result2 = self.client2.do_put_object(bucket_name, object_name, data, len(data))
        assert self.equal(result1, result2), f'\033[31mput_object:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return f'{bucket_name}:{object_name}'

    @rule(
        obj = objects,
        offset = st_offset, 
        length = st_length
    )
    @precondition(lambda self: 'get_object' not in self.EXCLUDE_RULES)
    def get_object(self, obj:str, offset=0, length=0):
        bucket_name = obj.split(':')[0]
        object_name = obj.split(':')[1]
        result1 = self.client1.do_get_object(bucket_name, object_name, offset=offset, length=length)
        result2 = self.client2.do_get_object(bucket_name, object_name, offset=offset, length=length)
        assert self.equal(result1, result2), f'\033[31mget_object:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target=objects,
        alias = aliases,
        bucket_name = buckets,
        object_name = st_object_name)
    @precondition(lambda self: 'fput_object' not in self.EXCLUDE_RULES)
    def fput_object(self, bucket_name, object_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_fput_object(bucket_name, object_name, 'README.md', alias)
        result2 = self.client2.do_fput_object(bucket_name, object_name, 'README.md', alias)
        assert self.equal(result1, result2), f'\033[31mfput_object:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return f'{bucket_name}:{object_name}'

    @rule(
        obj = objects,
        alias = aliases,
        file_path = st.just('/tmp/file')
    )
    @precondition(lambda self: 'fget_object' not in self.EXCLUDE_RULES)
    def fget_object(self, obj:str, file_path, alias = ROOT_ALIAS):
        bucket_name = obj.split(':')[0]
        object_name = obj.split(':')[1]
        result1 = self.client1.do_fget_object(bucket_name, object_name, file_path, alias)
        result2 = self.client2.do_fget_object(bucket_name, object_name, file_path, alias)
        assert self.equal(result1, result2), f'\033[31mfget_object:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target = objects,
        alias = aliases,
        obj = consumes(objects)
    )
    @precondition(lambda self: 'remove_object' not in self.EXCLUDE_RULES)
    def remove_object(self, obj:str, alias=ROOT_ALIAS):
        bucket_name = obj.split(':')[0]
        object_name = obj.split(':')[1]
        result1 = self.client1.do_remove_object(bucket_name, object_name, alias)
        result2 = self.client2.do_remove_object(bucket_name, object_name, alias)
        assert self.equal(result1, result2), f'\033[31mremove_object:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return obj
        else:
            return multiple()
        
    @rule(
        obj = objects, 
        alias = aliases
    )
    @precondition(lambda self: 'stat_object' not in self.EXCLUDE_RULES)
    def stat_object(self, obj:str, alias=ROOT_ALIAS):
        bucket_name = obj.split(':')[0]
        object_name = obj.split(':')[1]
        result1 = self.client1.do_stat_object(bucket_name, object_name, alias)
        result2 = self.client2.do_stat_object(bucket_name, object_name, alias)
        assert self.equal(result1, result2), f'\033[31mstat_object:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
          bucket_name = buckets,
          prefix = st.just(None), # st.one_of(st_object_prefix, st.just(None)),
          start_after = st.one_of(st_object_name, st.just(None)),
          include_user_meta = st.booleans(),
          include_version = st.just(False),
          use_url_encoding_type = st.booleans(),
          recursive=st.booleans())
    @precondition(lambda self: 'list_objects' not in self.EXCLUDE_RULES)
    def list_objects(self, bucket_name, prefix=None, start_after=None, include_user_meta=False, include_version=False, use_url_encoding_type=True, recursive=False):
        result1 = self.client1.do_list_objects(bucket_name=bucket_name, prefix=prefix, start_after=start_after, include_user_meta=include_user_meta, include_version=include_version, use_url_encoding_type=use_url_encoding_type, recursive=recursive)
        result2 = self.client2.do_list_objects(bucket_name=bucket_name, prefix=prefix, start_after=start_after, include_user_meta=include_user_meta, include_version=include_version, use_url_encoding_type=use_url_encoding_type, recursive=recursive)
        assert self.equal(result1, result2), f'\033[31mlist_objects:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target = users,
        alias = aliases,
        user_name = st_user_name, 
    )
    @precondition(lambda self: 'add_user' not in self.EXCLUDE_RULES)
    def add_user(self, user_name, secret_key=DEFAULT_SECRET_KEY, alias = ROOT_ALIAS):
        result1 = self.client1.do_add_user(user_name, secret_key, alias)
        result2 = self.client2.do_add_user(user_name, secret_key, alias)
        assert self.equal(result1, result2), f'\033[31madd_user:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return user_name
        
    @rule(
        target = users,
        alias = aliases,
        user_name = consumes(users).filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'remove_user' not in self.EXCLUDE_RULES)
    def remove_user(self, user_name, alias = ROOT_ALIAS):
        result1 = self.client1.do_remove_user(user_name, alias)
        result2 = self.client2.do_remove_user(user_name, alias)
        assert self.equal(result1, result2), f'\033[31mremove_user:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return user_name
        else:
            return multiple()

    @rule(
        alias = aliases,
        user_name = users.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'enable_user' not in self.EXCLUDE_RULES)
    def enable_user(self, user_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_enable_user(user_name, alias)
        result2 = self.client2.do_enable_user(user_name, alias)
        assert self.equal(result1, result2), f'\033[31menable_user:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        alias = aliases,
        user_name = users.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'disable_user' not in self.EXCLUDE_RULES)
    def disable_user(self, user_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_disable_user(user_name, alias)
        result2 = self.client2.do_disable_user(user_name, alias)
        assert self.equal(result1, result2), f'\033[31mdisable_user:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        alias = aliases,
        user_name = users.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'user_info' not in self.EXCLUDE_RULES)
    def user_info(self, user_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_user_info(user_name, alias)
        result2 = self.client2.do_user_info(user_name, alias)
        assert self.equal(result1, result2), f'\033[31muser_info:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(alias = aliases)
    @precondition(lambda self: 'list_users' not in self.EXCLUDE_RULES)
    def list_users(self, alias=ROOT_ALIAS):
        result1 = self.client1.do_list_users(alias)
        result2 = self.client2.do_list_users(alias)
        assert self.equal(result1, result2), f'\033[31mlist_users:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(alias = aliases)
    @precondition(lambda self: 'list_groups' not in self.EXCLUDE_RULES)
    def list_groups(self, alias=ROOT_ALIAS):
        result1 = self.client1.do_list_groups(alias)
        result2 = self.client2.do_list_groups(alias)
        assert self.equal(result1, result2), f'\033[31mlist_groups:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target = groups,    
        alias = aliases,
        group_name=st_group_name, 
        members = st.lists(users, min_size=1, max_size=3)
    )
    @precondition(lambda self: 'add_group' not in self.EXCLUDE_RULES)
    def add_group(self, group_name, members, alias=ROOT_ALIAS):
        result1 = self.client1.do_add_group(group_name, members, alias)
        result2 = self.client2.do_add_group(group_name, members, alias)
        assert self.equal(result1, result2), f'\033[31madd_group:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return group_name
        
    @rule(
        group_name = groups, 
        alias = aliases)
    @precondition(lambda self: 'group_info' not in self.EXCLUDE_RULES)
    def group_info(self, group_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_group_info(group_name, alias)
        result2 = self.client2.do_group_info(group_name, alias)
        assert self.equal(result1, result2), f'\033[31mgroup_info:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
    
    @rule(
        target = groups,
        alias = aliases,
        group_name=consumes(groups).filter(lambda x: x != multiple()),
        group_members = st_group_members
    )
    @precondition(lambda self: 'remove_group' not in self.EXCLUDE_RULES)
    def remove_group(self, group_name, group_members, alias=ROOT_ALIAS):
        result1 = self.client1.do_remove_group(group_name, group_members, alias)
        result2 = self.client2.do_remove_group(group_name, group_members, alias)
        assert self.equal(result1, result2), f'\033[31mremove_group:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return group_name
        else:
            return multiple()
        
    @rule(
        alias = aliases,
        group_name=groups.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'disable_group' not in self.EXCLUDE_RULES)
    def disable_group(self, group_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_disable_group(group_name, alias)
        result2 = self.client2.do_disable_group(group_name, alias)
        assert self.equal(result1, result2), f'\033[31mdisable_group:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        alias = aliases,
        group_name=groups.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'enable_group' not in self.EXCLUDE_RULES)
    def enable_group(self, group_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_enable_group(group_name, alias)
        result2 = self.client2.do_enable_group(group_name, alias)
        assert self.equal(result1, result2), f'\033[31menable_group:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target = policies,
        alias = st.just(ROOT_ALIAS),
        policy_name = st_policy_name,
        policy_document = st_policy
    )
    @precondition(lambda self: 'add_policy' not in self.EXCLUDE_RULES)
    def add_policy(self, policy_name, policy_document, alias=ROOT_ALIAS):
        result1 = self.client1.do_add_policy(policy_name, policy_document, alias)
        result2 = self.client2.do_add_policy(policy_name, policy_document, alias)
        assert self.equal(result1, result2), f'\033[31madd_policy:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return policy_name
    
    @rule(
        target = policies,
        alias = st.just(ROOT_ALIAS),
        policy_name = consumes(policies).filter(lambda x: x != multiple()).filter(lambda x: x not in BUILD_IN_POLICIES)
    )
    @precondition(lambda self: 'remove_policy' not in self.EXCLUDE_RULES)
    def remove_policy(self, policy_name, alias=ROOT_ALIAS):
        assume(policy_name not in BUILD_IN_POLICIES)
        assert policy_name not in BUILD_IN_POLICIES, f'policy_name {policy_name} is in BUILD_IN_POLICIES'
        result1 = self.client1.do_remove_policy(policy_name, alias)
        result2 = self.client2.do_remove_policy(policy_name, alias)
        assert self.equal(result1, result2), f'\033[31mremove_policy:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return policy_name
        else:
            return multiple()

    @rule(
        alias = st.just(ROOT_ALIAS),
        policy_name = policies.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'policy_info' not in self.EXCLUDE_RULES)
    def policy_info(self, policy_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_policy_info(policy_name, alias)
        result2 = self.client2.do_policy_info(policy_name, alias)
        assert self.equal(result1, result2), f'\033[31mpolicy_info:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
    
    @rule(alias = st.just(ROOT_ALIAS))
    @precondition(lambda self: 'list_policies' not in self.EXCLUDE_RULES)
    def list_policies(self, alias=ROOT_ALIAS):
        result1 = self.client1.do_list_policies(alias)
        result2 = self.client2.do_list_policies(alias)
        assert self.equal(result1, result2), f'\033[31mlist_policies:\nresult1 is {result1}\nresult2 is {result2}\033[0m'

    @rule(
        target = user_policies,
        alias = st.just(ROOT_ALIAS),
        user_name = users.filter(lambda x: x != multiple()),
        policy_name = policies.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'set_policy_to_user' not in self.EXCLUDE_RULES)
    def set_policy_to_user(self, policy_name, user_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_set_policy_to_user(policy_name, user_name, alias)
        result2 = self.client2.do_set_policy_to_user(policy_name, user_name, alias)
        assert self.equal(result1, result2), f'\033[31mset_policy_to_user:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return f'{user_name}:{policy_name}'

    @rule(
        target = group_policies, 
        alias = st.just(ROOT_ALIAS),
        group_name = groups.filter(lambda x: x != multiple()),
        policy_name = policies.filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'set_policy_to_group' not in self.EXCLUDE_RULES)
    def set_policy_to_group(self, group_name, policy_name, alias=ROOT_ALIAS):
        result1 = self.client1.do_set_policy_to_group(policy_name, group_name, alias)
        result2 = self.client2.do_set_policy_to_group(policy_name, group_name, alias)
        assert self.equal(result1, result2), f'\033[31mset_policy_to_group:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return f'{group_name}:{policy_name}'
        
    @rule(
        target = user_policies,
        alias = st.just(ROOT_ALIAS),
        user_policy = consumes(user_policies).filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'unset_policy_from_user' not in self.EXCLUDE_RULES)
    def unset_policy_from_user(self, user_policy:str, alias=ROOT_ALIAS):
        user_name = user_policy.split(':')[0]
        policy_name = user_policy.split(':')[1]
        result1 = self.client1.do_unset_policy_from_user(policy_name, user_name, alias)
        result2 = self.client2.do_unset_policy_from_user(policy_name, user_name, alias)
        assert self.equal(result1, result2), f'\033[31munset_policy_from_user:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return user_policy
        else:
            return multiple()
        
    @rule(
        target = group_policies,
        alias = st.just(ROOT_ALIAS),
        group_policy = consumes(group_policies).filter(lambda x: x != multiple())
    )
    @precondition(lambda self: 'unset_policy_from_group' not in self.EXCLUDE_RULES)
    def unset_policy_from_group(self,  group_policy:str, alias=ROOT_ALIAS):
        group_name = group_policy.split(':')[0]
        policy_name = group_policy.split(':')[1]
        result1 = self.client1.do_unset_policy_from_group(policy_name, group_name, alias)
        result2 = self.client2.do_unset_policy_from_group(policy_name, group_name, alias)
        assert self.equal(result1, result2), f'\033[31munset_policy_from_group:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return group_policy
        else:
            return multiple()

    @rule(
        target=aliases, 
        alias = st_alias_name,
        user_name = st_user_name,
        url1=st.just(URL1),
        url2=st.sampled_from([URL2])
    )
    @precondition(lambda self: 'set_alias' not in self.EXCLUDE_RULES)
    def set_alias(self, alias, user_name, url1=URL1, url2=URL2):
        result1 = self.client1.do_set_alias(alias, user_name, DEFAULT_SECRET_KEY, url1)
        result2 = self.client2.do_set_alias(alias, user_name, DEFAULT_SECRET_KEY, url2)
        assert self.equal(result1, result2), f'\033[31mset_alias:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return alias

    @rule(
        target = aliases,
        alias = consumes(aliases)
    )
    @precondition(lambda self: 'remove_alias' not in self.EXCLUDE_RULES)
    def remove_alias(self, alias):
        assume(alias != ROOT_ALIAS)
        result1 = self.client1.do_remove_alias(alias)
        result2 = self.client2.do_remove_alias(alias)
        assert self.equal(result1, result2), f'\033[31mremove_alias:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return alias
        else:
            return multiple()
    
    def teardown(self):
        pass

if __name__ == '__main__':
    MAX_EXAMPLE=int(os.environ.get('MAX_EXAMPLE', '100'))
    STEP_COUNT=int(os.environ.get('STEP_COUNT', '50'))
    ci_db = DirectoryBasedExampleDatabase(".hypothesis/examples") 
    settings.register_profile("dev", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target])
    settings.register_profile("schedule", max_examples=500, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=200, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target], 
        database=ci_db)
    settings.register_profile("pull_request", max_examples=100, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=30, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target], 
        database=ci_db)
    if os.environ.get('CI'):
        event_name = os.environ.get('GITHUB_EVENT_NAME')
        if event_name == 'schedule' or event_name == 'workflow_dispatch':
            profile = 'schedule'
        else:
            profile = 'pull_request'
    else:
        profile = os.environ.get('PROFILE', 'dev')
    print(f'profile is {profile}')
    settings.load_profile(profile)
    
    s3machine = S3Machine.TestCase()
    s3machine.runTest()
    print(json.dumps(S3Client.stats.get(), sort_keys=True, indent=4))
    
    
================================================
FILE: .github/scripts/hypo/s3_contant.py
================================================
ROOT_ALIAS = 'admin'
ROOT_ACCESS_KEY = 'minioadmin'
ROOT_SECRET_KEY = 'minioadmin'
DEFAULT_ACCESS_KEY = ROOT_ACCESS_KEY
DEFAULT_SECRET_KEY = ROOT_SECRET_KEY
BUILD_IN_POLICIES = ('consoleAdmin', 'readonly', 'readwrite', 'writeonly')

================================================
FILE: .github/scripts/hypo/s3_op.py
================================================
import hashlib
import json
import os
import re
import subprocess
try: 
    __import__('xattr')
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
try: 
    __import__('minio')
except ImportError:
    subprocess.check_call(["pip", "install", "minio"])
try: 
    __import__('fallocate')
except ImportError:
    subprocess.check_call(["pip", "install", "fallocate"])
from stats import Statistics
from minio.error import S3Error
import common
from minio import Minio
import io
from s3_contant import *

class S3Client():
    stats = Statistics()
    def __init__(self, prefix, url, url2=None):
        self.prefix = prefix
        self.url = url
        self.url2 = url2
        log_level=os.environ.get('LOG_LEVEL', 'INFO')
        self.logger = common.setup_logger(f'./{prefix}.log', f'{prefix}', log_level)

    def run_cmd(self, command:str, stderr=subprocess.STDOUT) -> str:
        self.logger.info(f'run_cmd: {command}')
        if '|' in command or '>' in command or '&' in command:
            ret=os.system(command)
            if ret == 0:
                return ret
            else: 
                raise Exception(f"run command {command} failed with {ret}")
        try:
            output = subprocess.run(command.split(), check=True, stdout=subprocess.PIPE, stderr=stderr)
        except subprocess.CalledProcessError as e:
            raise e
        return output.stdout.decode()
    
    def sort_dict(self, obj):
        if isinstance(obj, dict):
            return {k: self.sort_dict(v) for k, v in obj.items()}
        elif isinstance(obj, list) and all(isinstance(elem, (int, float, str)) for elem in obj):
            return sorted(obj)
        elif isinstance(obj, list) and all(isinstance(elem, dict) for elem in obj):
            return [self.sort_dict(elem) for elem in obj]
        else:
            return obj
        
    def handleException(self, e, action, **kwargs):
        self.stats.failure(action)
        if isinstance(e, S3Error):
            self.logger.info(f'{action} {kwargs} failed: {e}')
            return Exception(f'code:{e.code} message:{e.message}')
        elif isinstance(e, subprocess.CalledProcessError):
            self.logger.info(f'{action} {kwargs} failed: {e.output.decode()}')
            try:
                output = json.loads(e.output.decode())
                message = output.get('error', {}).get('message', 'error message not found')
                return Exception(f'returncode:{e.returncode} {message}')
            except ValueError as ve:
                output = e.output.decode()
                output = re.sub(r'\b\d+\.\d+\b|\b\d+\b', '***', output)
                return Exception(f'returncode:{e.returncode} output:{output}')
        else:
            self.logger.info(f'{action} {kwargs} failed: {e}')
            return e
        
    def do_info(self, alias):
        try:
            self.run_cmd(f'mc admin info {self.get_alias(alias)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_info')
        self.stats.success('do_info')
        self.logger.info(f'do_info succeed')
        return True

    def remove_all_buckets(self):
        client=Minio(self.url,access_key=ROOT_ACCESS_KEY,secret_key=ROOT_SECRET_KEY,secure=False)
        buckets = client.list_buckets()
        for bucket in buckets:
            bucket_name = bucket.name
            objects = client.list_objects(bucket_name, recursive=True)
            for obj in objects:
                client.remove_object(bucket_name, obj.object_name)
            client.remove_bucket(bucket_name)
            print(f"Bucket '{bucket_name}' removed successfully.")
        
    def do_list_buckets(self, alias):
        try:
            result = self.run_cmd(f'mc ls {self.get_alias(alias)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_list_buckets')
        self.stats.success('do_list_buckets')
        self.logger.info(f'do_list_buckets succeed')
        result = [item.split()[-1][:-1] for item in result.split("\n") if item.strip()]
        # print(result)
        return sorted(result)
    
    def do_remove_bucket(self, bucket_name:str, alias):
        try:
            self.run_cmd(f'mc rb {self.get_alias(alias)}/{bucket_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_bucket', bucket_name=bucket_name, alias=alias)
        self.stats.success('do_remove_bucket')
        self.logger.info(f'do_remove_bucket {alias} {bucket_name} succeed')
        return True

    def do_create_bucket(self, bucket_name:str, alias):
        try:
            self.run_cmd(f'mc mb {self.get_alias(alias)}/{bucket_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_create_bucket', bucket_name=bucket_name)
        self.stats.success('do_create_bucket')
        self.logger.info(f'do_create_bucket {bucket_name} succeed')
        return True

    def do_set_bucket_policy(self, bucket_name:str, policy:str, alias):
        try:
            self.run_cmd(f'mc policy set {policy} {self.get_alias(alias)}/{bucket_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_set_bucket_policy', bucket_name=bucket_name, policy=policy)
        self.stats.success('do_set_bucket_policy')
        self.logger.info(f'do_set_bucket_policy {bucket_name} {policy} succeed')
        return True
    
    def do_get_bucket_policy(self, bucket_name:str, alias):
        try:
            result = self.run_cmd(f'mc policy get {self.get_alias(alias)}/{bucket_name} --json')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_get_bucket_policy', bucket_name=bucket_name)
        self.stats.success('do_get_bucket_policy')
        self.logger.info(f'do_get_bucket_policy {bucket_name} succeed')
        return self.sort_dict(json.loads(result))

    def do_list_bucket_policy(self, bucket_name:str, alias, recursive=False):
        try:
            cmd = f'mc policy list {self.get_alias(alias)}/{bucket_name}'
            if recursive:
                cmd += ' --recursive'
            result = self.run_cmd(cmd)
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_list_bucket_policy', bucket_name=bucket_name)
        self.stats.success('do_list_bucket_policy')
        self.logger.info(f'do_list_bucket_policy {bucket_name} succeed')
        return sorted(result.split("\n"))

    def do_stat_object(self, bucket_name:str, object_name:str, alias):
        try:
            result = self.run_cmd(f'mc stat {self.get_alias(alias)}/{bucket_name}/{object_name} ')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_stat_object', bucket_name=bucket_name, object_name=object_name)
        stat = {}
        for line in result.split('\n'):
            if line.strip() and ':' in line:
                key, value = line.split(':', 1)
                stat[key.strip()] = value.strip()
        self.stats.success('do_stat_object')
        self.logger.info(f'do_stat_object {bucket_name} {object_name} succeed')
        # print(stat)
        return stat['Name'], stat['Size'], stat['ETag'], stat['Type']

    def do_put_object(self, bucket_name:str, object_name:str, data, length, content_type='application/octet-stream', part_size=5*1024*1024):
        client=Minio(self.url,access_key=ROOT_ACCESS_KEY,secret_key=ROOT_SECRET_KEY,secure=False)
        try:
            client.put_object(bucket_name, object_name, io.BytesIO(data), length=length, content_type=content_type, part_size=part_size)
        except S3Error as e:
            return self.handleException(e, 'do_put_object', bucket_name=bucket_name, object_name=object_name, length=length, part_size=part_size)
        self.stats.success('do_put_object')
        self.logger.info(f'do_put_object {bucket_name} {object_name} succeed')
        return self.do_stat_object(bucket_name, object_name, alias=ROOT_ALIAS)

    def do_get_object(self, bucket_name:str, object_name:str, offset=0, length=0):
        client=Minio(self.url,access_key=ROOT_ACCESS_KEY,secret_key=ROOT_SECRET_KEY,secure=False)
        try:
            stat = client.stat_object(bucket_name, object_name)
            if stat.size == 0:
                offset = 0
            else:
                offset = offset % stat.size
            if length > stat.size - offset:
                length = stat.size - offset
            response = client.get_object(bucket_name, object_name, offset=offset, length=length)
            md5_hash = hashlib.md5()
            for data in response.stream(32*1024):
                md5_hash.update(data)
            md5_hex = md5_hash.hexdigest()
        except S3Error as e:
            return self.handleException(e, 'do_get_object', bucket_name=bucket_name, object_name=object_name, offset=offset, length=length)
        self.stats.success('do_get_object')
        self.logger.info(f'do_get_object {bucket_name} {object_name} succeed')
        return md5_hex

    def do_fput_object(self, bucket_name:str, object_name:str, src_path:str, alias):
        try:
            self.run_cmd(f'mc cp {src_path} {self.get_alias(alias)}/{bucket_name}/{object_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_fput_object', bucket_name=bucket_name, object_name=object_name, src_path=src_path)
        self.stats.success('do_fput_object')
        self.logger.info(f'do_fput_object {bucket_name} {object_name} {src_path} succeed')
        return self.do_stat_object(bucket_name, object_name, alias=ROOT_ALIAS)
    
    def do_fget_object(self, bucket_name:str, object_name:str, file_path:str, alias):
        try:
            self.run_cmd(f'mc cp {self.get_alias(alias)}/{bucket_name}/{object_name} {file_path}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_fget_object', bucket_name=bucket_name, object_name=object_name, file_path=file_path)
        self.stats.success('do_fget_object')
        self.logger.info(f'do_fget_object {bucket_name} {object_name} {file_path} succeed')
        return os.stat(file_path).st_size

    def object_exists(self, bucket_name:str, object_name:str, alias):
        try:
            self.run_cmd(f'mc stat {self.get_alias(alias)}/{bucket_name}/{object_name}')
        except subprocess.CalledProcessError as e:
            return False
        return True

    def do_remove_object(self, bucket_name:str, object_name:str, alias):
        try:
            self.run_cmd(f'mc rm {self.get_alias(alias)}/{bucket_name}/{object_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_object', bucket_name=bucket_name, object_name=object_name)
        assert not self.object_exists(bucket_name, object_name, ROOT_ALIAS)
        self.stats.success('do_remove_object')
        self.logger.info(f'do_remove_object {bucket_name} {object_name} succeed')
        return True
    
    def do_list_objects(self, bucket_name, prefix, start_after, include_user_meta, include_version, use_url_encoding_type, recursive):
        client=Minio(self.url,access_key=ROOT_ACCESS_KEY,secret_key=ROOT_SECRET_KEY,secure=False)
        try:
            objects = client.list_objects(bucket_name, prefix=prefix, start_after=start_after, include_user_meta=include_user_meta, include_version=include_version, use_url_encoding_type=use_url_encoding_type, recursive=recursive)
        except S3Error as e:
            return self.handleException(e, 'do_list_objects', bucket_name=bucket_name, prefix=prefix, start_after=start_after, include_user_meta=include_user_meta, include_version=include_version, use_url_encoding_type=use_url_encoding_type, recursive=recursive)
        self.stats.success('do_list_objects')
        self.logger.info(f'do_list_objects {bucket_name} {prefix} {start_after} {include_user_meta} {include_version} {use_url_encoding_type} {recursive} succeed')
        result = '\n'.join([f'{obj.object_name} {obj.size} {obj.etag}' for obj in objects])
        return result
    
    def get_alias(self, alias):
        return self.prefix + '_' + alias

    def do_add_user(self, access_key, secret_key, alias):
        try:
            self.run_cmd(f'mc admin user add {self.get_alias(alias)} {access_key} {secret_key}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_add_user', access_key=access_key, secret_key=secret_key)
        self.stats.success('do_add_user')
        self.logger.info(f'do_add_user {access_key} succeed')
        return True
    
    def do_remove_user(self, access_key, alias):
        try:
            self.run_cmd(f'mc admin user remove {self.get_alias(alias)} {access_key}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_user', access_key=access_key)
        self.stats.success('do_remove_user')
        self.logger.info(f'do_remove_user {access_key} succeed')
        return True

    def do_enable_user(self, access_key, alias):
        try:
            self.run_cmd(f'mc admin user enable {self.get_alias(alias)} {access_key}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_enable_user', access_key=access_key)
        self.stats.success('do_enable_user')
        self.logger.info(f'do_enable_user {access_key} succeed')
        return True
    
    def do_disable_user(self, access_key, alias):
        try:
            self.run_cmd(f'mc admin user disable {self.get_alias(alias)} {access_key}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_disable_user', access_key=access_key)
        self.stats.success('do_disable_user')
        self.logger.info(f'do_disable_user {access_key} succeed')
        return True
    
    def do_user_info(self, access_key, alias):
        try:
            self.run_cmd(f'mc admin user info {self.get_alias(alias)} {access_key}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_user_info', access_key=access_key)
        self.stats.success('do_user_info')
        self.logger.info(f'do_user_info {access_key} succeed')
        return True
    
    def do_list_users(self, alias):
        try:
            result = self.run_cmd(f'mc admin user list {self.get_alias(alias)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_list_users')
        self.stats.success('do_list_users')
        self.logger.info(f'do_list_users succeed')
        return sorted(result.split("\n"))

    def remove_all_users(self, alias=ROOT_ALIAS):
        lines = self.run_cmd(f'mc admin user list {self.get_alias(alias)}').split("\n")
        for line in lines:
            if not line.strip():
                continue
            user = line.split()[1]
            self.run_cmd(f'mc admin user remove {self.get_alias(alias)} {user}')
            print(f"User '{user}' removed successfully.")

    def do_list_groups(self, alias):
        try:
            result = self.run_cmd(f'mc admin group list {self.get_alias(alias)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_list_groups')
        self.stats.success('do_list_groups')
        self.logger.info(f'do_list_groups succeed')
        return sorted(result.split("\n"))

    def do_add_group(self, group_name, members, alias):
        try:
            self.run_cmd(f'mc admin group add {self.get_alias(alias)} {group_name} {" ".join(members)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_add_group', group_name=group_name, members=members)
        self.stats.success('do_add_group')
        self.logger.info(f'do_add_group {group_name} {members} succeed')
        return self.do_group_info(group_name, alias)

    def do_remove_group(self, group_name, members, alias):
        try:
            self.run_cmd(f'mc admin group remove {self.get_alias(alias)} {group_name} {" ".join(members)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_group', group_name=group_name)
        self.stats.success('do_remove_group')
        self.logger.info(f'do_remove_group {group_name} succeed')
        return True

    def do_disable_group(self, group_name, alias):
        try:
            self.run_cmd(f'mc admin group disable {self.get_alias(alias)} {group_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_disable_group', group_name=group_name)
        self.stats.success('do_disable_group')
        self.logger.info(f'do_disable_group {group_name} succeed')
        return True
    
    def do_enable_group(self, group_name, alias):
        try:
            self.run_cmd(f'mc admin group enable {self.get_alias(alias)} {group_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_enable_group', group_name=group_name)
        self.stats.success('do_enable_group')
        self.logger.info(f'do_enable_group {group_name} succeed')
        return True

    def do_group_info(self, group_name, alias):
        try:
            self.run_cmd(f'mc admin group info {self.get_alias(alias)} {group_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_group_info', group_name=group_name)
        self.stats.success('do_group_info')
        self.logger.info(f'do_group_info {group_name} succeed')
        return True
    
    def remove_all_groups(self, alias=ROOT_ALIAS):
        groups = self.run_cmd(f'mc admin group list {self.get_alias(alias)}').split("\n")
        for group in groups:
            if not group.strip():
                continue
            self.run_cmd(f'mc admin group remove {self.get_alias(alias)} {group}')
            print(f"Group '{group}' removed successfully.")
    
    def do_add_policy(self, policy_name, policy_document, alias):
        policy = json.dumps(policy_document)
        print(policy)
        policy_path = 'policy.json'
        with open(policy_path, 'w') as f:
            f.write(policy)
        try:
            self.run_cmd(f'mc admin policy add {self.get_alias(alias)} {policy_name} {policy_path}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_add_policy', policy_name=policy_name)
        self.stats.success('do_add_policy')
        self.logger.info(f'do_add_policy {policy_name} succeed')
        return True
    
    def do_remove_policy(self, policy_name, alias):
        try:
            self.run_cmd(f'mc admin policy remove {self.get_alias(alias)} {policy_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_policy', policy_name=policy_name)
        self.stats.success('do_remove_policy')
        self.logger.info(f'do_remove_policy {policy_name} succeed')
        return True
    
    def do_policy_info(self, policy_name, alias):
        try:
            result = self.run_cmd(f'mc admin policy info {self.get_alias(alias)} {policy_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_policy_info', policy_name=policy_name)
        self.stats.success('do_policy_info')
        self.logger.info(f'do_policy_info {policy_name} succeed')
        return self.sort_dict(json.loads(result))
    
    def do_list_policies(self, alias):
        try:
            result = self.run_cmd(f'mc admin policy list {self.get_alias(alias)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_list_policies')
        self.stats.success('do_list_policies')
        self.logger.info(f'do_list_policies succeed')
        result = [item.strip() for item in result.split("\n") if item.strip()!='diagnostics' and item.strip()!='']
        return sorted(result)
    
    def remove_all_policies(self, alias=ROOT_ALIAS):
        policies = self.do_list_policies(alias)
        for policy in policies:
            if policy in BUILD_IN_POLICIES:
                continue
            self.run_cmd(f'mc admin policy remove {self.get_alias(alias)} {policy}')
            print(f"Policy '{policy}' removed successfully.")

    def do_set_policy_to_user(self, policy_name, user_name, alias):
        try:
            self.run_cmd(f'mc admin policy set {self.get_alias(alias)} {policy_name} user={user_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_set_policy_to_user', policy_name=policy_name, user_name=user_name)
        self.stats.success('do_set_policy_to_user')
        self.logger.info(f'do_set_policy_to_user {policy_name} {user_name} succeed')
        return True

    def do_set_policy_to_group(self, policy_name, group_name, alias):
        try:
            self.run_cmd(f'mc admin policy set {self.get_alias(alias)} {policy_name} group={group_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_set_policy_to_group', policy_name=policy_name, group_name=group_name)
        self.stats.success('do_set_policy_to_group')
        self.logger.info(f'do_set_policy_to_group {policy_name} {group_name} succeed')
        return True
    
    def do_unset_policy_from_user(self, policy_name, user_name, alias):
        try:
            self.run_cmd(f'mc admin policy unset {self.get_alias(alias)} {policy_name} user={user_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_unset_policy_from_user', policy_name=policy_name, user_name=user_name)
        self.stats.success('do_unset_policy_from_user')
        self.logger.info(f'do_unset_policy_from_user {policy_name} {user_name} succeed')
        return True
    
    def do_unset_policy_from_group(self, policy_name, group_name, alias):
        try:
            self.run_cmd(f'mc admin policy unset {self.get_alias(alias)} {policy_name} group={group_name}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_unset_policy_from_group', policy_name=policy_name, group_name=group_name)
        self.stats.success('do_unset_policy_from_group')
        self.logger.info(f'do_unset_policy_from_group {policy_name} {group_name} succeed')
        return True
    
    def do_set_alias(self, alias, access_key, secret_key, url):
        alias_name = self.get_alias(alias)
        try:
            self.run_cmd(f'mc alias set {alias_name} http://{url} {access_key} {secret_key}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_set_alias', alias=alias, url=url, access_key=access_key, secret_key=secret_key)
        self.stats.success('do_set_alias')
        self.logger.info(f'do_set_alias {alias} {url} {access_key} {secret_key} succeed')
        return True
    
    def do_remove_alias(self, alias):
        try:
            self.run_cmd(f'mc alias remove {self.get_alias(alias)}')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_remove_alias', alias=alias)
        self.stats.success('do_remove_alias')
        self.logger.info(f'do_remove_alias {alias} succeed')
        return True
    
    def do_list_aliases(self):
        try:
            result = self.run_cmd(f'mc alias list')
        except subprocess.CalledProcessError as e:
            return self.handleException(e, 'do_list_aliases')
        self.stats.success('do_list_aliases')
        self.logger.info(f'do_list_aliases succeed')
        return sorted([line.strip() for line in result.split("\n") if line.strip() and ':' not in line])
    
    def remove_all_aliases(self):
        aliases = self.do_list_aliases()
        for alias in aliases:
            if alias.startswith(self.prefix+'_'):
                self.run_cmd(f'mc alias remove {alias}')
                print(f"Alias '{alias}' removed successfully.")

================================================
FILE: .github/scripts/hypo/s3_strategy.py
================================================
from hypothesis import strategies as st
from string import ascii_lowercase

MAX_OBJECT_SIZE=10*1024*1024
# https://min.io/docs/minio/linux/administration/identity-access-management/policy-based-access-control.html#minio-policy-actions
S3_ACTION_LIST = ["s3:*", "s3:DeleteObject", "s3:GetObject","s3:ListBucket","s3:PutObject", "s3:PutObjectTagging", "s3:GetObjectTagging", "s3:DeleteObjectTagging"]
st_user_name = st.sampled_from(['user1', 'user2', 'user3'])
st_group_name = st.sampled_from(['group1', 'group2', 'group3'])
st_group_members = st.lists(st_user_name, max_size=3, unique=True)
st_secret_key = st.text(alphabet=ascii_lowercase, min_size=8, max_size=8)
st_alias_name = st.text(alphabet=ascii_lowercase, min_size=4, max_size=4)
st_bucket_name = st.text(alphabet=ascii_lowercase, min_size=4, max_size=4)
st_object_name = st.text(alphabet=ascii_lowercase, min_size=4, max_size=4)
st_object_prefix = st.text(alphabet=ascii_lowercase, min_size=1, max_size=1)
st_content = st.binary(min_size=0, max_size=MAX_OBJECT_SIZE)
st_part_size = st.sampled_from([5*1024*1024, 8*1024*1024])
st_offset = st.integers(min_value=0, max_value=MAX_OBJECT_SIZE)
st_length = st.integers(min_value=0, max_value=MAX_OBJECT_SIZE)
st_policy_name = st.text(alphabet=ascii_lowercase, min_size=4, max_size=4)
st_policy = st.fixed_dictionaries({
    "Version": st.just("2012-10-17"),
    "Statement": st.lists(
        st.fixed_dictionaries({
            "Effect": st.sampled_from(["Allow", "Deny"]),
            "Principal": st.fixed_dictionaries({"AWS": st.just("*")}),
            "Action": st.lists(
                st.sampled_from(S3_ACTION_LIST),
                min_size=1, max_size=3,
                unique=True
            ),
            "Resource": st.just("arn:aws:s3:::*"),
        }),
        min_size=1, max_size=3
    )
})


================================================
FILE: .github/scripts/hypo/s3_test.py
================================================
import unittest
from s3 import S3Machine
from s3_contant import *
class TestS3(unittest.TestCase):
    def test_bucket(self):
        state = S3Machine()
        state.set_alias('alias1', DEFAULT_ACCESS_KEY)
        state.create_bucket('bucket1')
        state.create_bucket('bucket2')
        state.fput_object('bucket1', 'object1', alias='alias1')
        state.fput_object('bucket1', 'object2', alias='alias1')
        state.fput_object('bucket2', 'object1', alias='alias1')
        state.fput_object('bucket2', 'object2', alias='alias1')
        state.list_buckets()
        state.list_objects('bucket1')
        state.list_objects('bucket2')
        state.list_objects('bucket1', prefix='obj')
        state.remove_object('bucket1:object1')
        state.remove_object('bucket1:object2')
        state.remove_bucket('bucket1')
        state.remove_bucket('bucket2')
        state.teardown()

    def test_user(self):
        state = S3Machine()
        state.create_bucket('bucket1')
        state.add_user('user1')
        state.add_user('user2')
        state.list_users()
        state.remove_user('user1')
        state.list_users()
        state.disable_user('user2')
        state.enable_user('user2')
        state.list_users()
        state.remove_user('user2')
        state.list_users()
        state.teardown()
        
    def test_group(self):
        state = S3Machine()
        state.create_bucket('bucket1')
        state.add_user('user1')
        state.add_user('user2')
        state.add_user('user3')
        state.add_group('group1', ['user1', 'user2'])
        state.add_group('group2', ['user2', 'user3'])
        state.list_groups()
        state.disable_group('group2')
        state.remove_group('group1', ['user1'])
        state.remove_group('group1', ['user2'])
        state.remove_group('group1', [])
        state.list_groups()
        state.enable_group('group2')
        state.list_groups()
        state.teardown()

    def skip_test_issue_4639(self):
        # SEE https://github.com/juicedata/juicefs/issues/4639
        state = S3Machine()
        v1 = state.init_aliases()
        v2, v3, v4, v5 = state.init_policies()
        state.remove_policy(alias=v1, policy_name=v3)
        state.list_groups(alias=v1)
        state.remove_policy(alias=v1, policy_name=v2)
        state.policy_info(alias=v1, policy_name=v5)
        state.teardown()

    def skip_test_issue_4660(self):
        #SEE https://github.com/juicedata/juicefs/issues/4660
        state = S3Machine()
        v1 = state.init_aliases()
        v2, v3, v4, v5 = state.init_policies()
        v8 = state.add_user(alias=v1, user_name='user1')
        state.disable_user(alias=v1, user_name=v8)
        state.set_alias(alias='pjzm', url1='localhost:9000', url2='localhost:9006', user_name=v8)
        state.teardown()

    def test_issue_4682(self):
        # SEE https://github.com/juicedata/juicefs/issues/4682
        state = S3Machine()
        v1 = state.init_aliases()
        v2, v3, v4, v5 = state.init_policies()
        v6 = state.create_bucket(alias=v1, bucket_name='nzpy')
        state.get_bucket_policy(alias=v1, bucket_name=v6)
        state.teardown()

if __name__ == '__main__':
    unittest.main()

================================================
FILE: .github/scripts/hypo/stats.py
================================================
def singleton(cls):
    instances = {}
    def get_instance(*args, **kwargs):
        if cls not in instances:
            instances[cls] = cls(*args, **kwargs)
        return instances[cls]
    return get_instance

@singleton
class Statistics:
    def __init__(self):
        self.stats = {}

    def success(self, function_name):
        if function_name not in self.stats:
            self.stats[function_name] = {'success': 0, 'failure': 0}
        self.stats[function_name]['success'] += 1

    def failure(self, function_name):
        if function_name not in self.stats:
            self.stats[function_name] = {'success': 0, 'failure': 0}
        self.stats[function_name]['failure'] += 1

    def get(self):
        return self.stats

================================================
FILE: .github/scripts/hypo/strategy.py
================================================
import subprocess
try:
    __import__("xattr")
except ImportError:
    subprocess.check_call(["pip", "install", "xattr"])
import xattr
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from hypothesis import strategies as st
from string import ascii_lowercase
import time
import os
MIN_DIR_NAME=1
MAX_DIR_NAME=8
MIN_FILE_NAME=1
MAX_FILE_NAME=4
MAX_XATTR_NAME=255+10
MAX_XATTR_VALUE=65535+100
MAX_FILE_SIZE=1024*10
MAX_TRUNCATE_LENGTH=1024*128
MAX_FALLOCATE_LENGTH=1024*128
st_file_name = st.text(alphabet=ascii_lowercase, min_size=MIN_FILE_NAME, max_size=MAX_FILE_NAME)
dir_alphabet = ascii_lowercase + './'
# st_dir_name = st.text(alphabet=dir_alphabet, min_size=MIN_DIR_NAME, max_size=MAX_DIR_NAME)
def valid_dir_name():
    name_part = st.text(alphabet=dir_alphabet, min_size=MIN_DIR_NAME, max_size=MAX_DIR_NAME)
    def is_valid(s:str):
        if s.startswith('/'):
            return False
        if '.' in s and (not s.endswith('/.') or not s.endswith('/..')):
            return False
        return True
    return name_part.filter(is_valid)
# st_entry_name = st.text(min_size=MIN_FILE_NAME, max_size=MAX_FILE_NAME)
#TODO: remove filter when bugfix https://github.com/juicedata/jfs/issues/776
#TODO: use characters instead of ascii_lowercase
st_xattr_name = st.text(alphabet=ascii_lowercase, min_size=1, max_size=MAX_XATTR_NAME).filter(lambda x: '\x00' not in x).map(lambda s: "user." + s)
st_xattr_value = st.binary(min_size=1, max_size=MAX_XATTR_VALUE)
st_xattr_flag = st.sampled_from([0, xattr.XATTR_CREATE, xattr.XATTR_REPLACE])
# st_umask = st.integers(min_value=0o000, max_value=0o777)
st_umask = st.just(0o022)
st_entry_mode = st.integers(min_value=0o000, max_value=0o0777)

# TODO: remove alphabet=ascii_lowercase, 
st_lines = st.lists(st.text(alphabet=ascii_lowercase, min_size=0, max_size=10), min_size=1, max_size=10)
# TODO: remove filter a
st_open_mode = st.sampled_from([ 'x', 'a', 'r', 'w', 'a+', 'r+', 'w+', 'xb', 'ab', 'rb', 'wb', 'a+b', 'r+b', 'w+b'])
st_open_errors = st.sampled_from(['strict', 'ignore', 'replace', 'backslashreplace', 'namereplace'])
st_open_flags = st.lists(st.sampled_from([os.O_RDONLY, os.O_WRONLY, os.O_RDWR, os.O_APPEND, os.O_CREAT, os.O_EXCL, os.O_TRUNC, os.O_SYNC, os.O_DSYNC]), unique=True, min_size=1)
# TODO: add 0 to buffering when bugfix: https://github.com/juicedata/jfs/issues/1359
st_buffering = st.sampled_from([-1, 1, 10, 1024])
st_time = st.integers(min_value=0, max_value=int(time.time()))
st_offset = st.integers(min_value=0, max_value=MAX_FILE_SIZE)
st_length = st.integers(min_value=0, max_value=MAX_FILE_SIZE)
st_truncate_length = st.integers(min_value=0, max_value=MAX_TRUNCATE_LENGTH)
st_fallocate_length = st.integers(min_value=0, max_value=MAX_FALLOCATE_LENGTH)
st_whence = st.sampled_from([os.SEEK_SET, os.SEEK_CUR, os.SEEK_END])

@st.composite
def utf8_byte_arrays(draw, min_size=0, max_size=100):
    text = draw(st.text(min_size=min_size, max_size=max_size))
    return text.encode('utf-8')

@st.composite
def utf16_byte_arrays(draw, min_size=0, max_size=100):
    text = draw(st.text(min_size=min_size, max_size=max_size))
    return text.encode('utf-16')

@st.composite
def ascii_byte_arrays(draw, min_size=0, max_size=100):
    text = draw(st.text(alphabet=st.characters(blacklist_categories=['Cs', 'Cc', 'Co', 'Cn'], max_codepoint=127), min_size=min_size, max_size=max_size))
    return text.encode('ascii')
st_binary = st.binary(min_size=0, max_size=MAX_FILE_SIZE)
st_ascii_lowercase = st.text(alphabet=ascii_lowercase, min_size=0, max_size=MAX_FILE_SIZE) # | st.binary(min_size=0, max_size=MAX_FILE_SIZE)
st_unicode = st.text(alphabet=st.characters(max_codepoint=0x10FFFF), min_size=0, max_size=MAX_FILE_SIZE)

# st_content = st.one_of(utf8_byte_arrays(), utf16_byte_arrays(), ascii_byte_arrays(), st_binary, st_unicode, st_ascii_lowercase)
st_content = st.one_of(utf8_byte_arrays(), ascii_byte_arrays(), st_binary, st_unicode, st_ascii_lowercase)
# st_open_encoding = st.sampled_from(['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin-1'])
st_open_encoding = st.sampled_from(['utf-8', 'ascii', 'latin-1'])


================================================
FILE: .github/scripts/hypo/sync.py
================================================
import os
import subprocess
import json
import common
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from hypothesis import assume, strategies as st, settings, Verbosity
from hypothesis.stateful import rule, precondition, RuleBasedStateMachine, Bundle, initialize, multiple, consumes, invariant
from hypothesis import Phase, seed
from strategy import *
from fs_op import FsOperation
import random

st_entry_name = st.text(alphabet='abc*?', min_size=1, max_size=4)
st_patterns = st.lists(st.sampled_from(['a','?','/','*']), min_size=1, max_size=10)\
    .map(''.join).filter(lambda s: s.find('***') == -1 or (s.count('***') == 1 and s.endswith('/***')))

st_option = st.fixed_dictionaries({
    "option": st.just("--include") | st.just("--exclude"),
    "pattern": st_patterns
})

st_options = st.lists(st_option, min_size=1, max_size=10)

SEED=int(os.environ.get('SEED', random.randint(0, 1000000000)))
@seed(SEED)
class SyncMachine(RuleBasedStateMachine):
    Files = Bundle('files')
    Folders = Bundle('folders')
    ROOT_DIR1 = '/tmp/sync_src'
    ROOT_DIR2 = '/tmp/sync_src2'
    DEST_RSYNC = '/tmp/rsync'
    DEST_JUICESYNC = '/tmp/juicesync'
    
    fsop1 = FsOperation('fs1', ROOT_DIR1)
    fsop2 = FsOperation('fs2', ROOT_DIR2)
    
    @initialize(target=Folders)
    def init_folders(self):
        if not os.path.exists(self.ROOT_DIR1):
            os.makedirs(self.ROOT_DIR1)
        if not os.path.exists(self.ROOT_DIR2):
            os.makedirs(self.ROOT_DIR2)
        common.clean_dir(self.ROOT_DIR1)
        common.clean_dir(self.ROOT_DIR2)
        return ''
    
    def __init__(self):
        super(SyncMachine, self).__init__()
        
    def equal(self, result1, result2):
        if type(result1) != type(result2):
            return False
        if isinstance(result1, Exception):
            r1 = str(result1).replace(self.ROOT_DIR1, '')
            r2 = str(result2).replace(self.ROOT_DIR2, '')
            return r1 == r2
        elif isinstance(result1, tuple):
            return result1 == result2
        elif isinstance(result1, str):
            r1 = str(result1).replace(self.ROOT_DIR1, '')
            r2 = str(result2).replace(self.ROOT_DIR2, '')
            return  r1 == r2
        else:
            return result1 == result2

    @rule(target=Files, 
          parent = Folders.filter(lambda x: x != multiple()), 
          file_name = st_entry_name, 
          umask = st_umask, 
            )
    def create_file(self, parent, file_name, content='s', mode='x', user='root', umask=0o022):
        result1 = self.fsop1.do_create_file(parent=parent, file_name=file_name, mode=mode, content=content, user=user, umask=umask)
        result2 = self.fsop2.do_create_file(parent=parent, file_name=file_name, mode=mode, content=content, user=user, umask=umask)
        assert self.equal(result1, result2), f'\033[31mcreate_file:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, file_name)
    
    @rule( target = Folders, 
          parent = Folders.filter(lambda x: x != multiple()),
          subdir = st_entry_name,
          mode = st_entry_mode,
          umask = st_umask, 
          )
    def mkdir(self, parent, subdir, mode, user='root', umask=0o022):
        result1 = self.fsop1.do_mkdir(parent, subdir, mode, user, umask)
        result2 = self.fsop2.do_mkdir(parent, subdir, mode, user, umask)
        assert self.equal(result1, result2), f'\033[31mmkdir:\nresult1 is {result1}\nresult2 is {result2}\033[0m'
        if isinstance(result1, Exception):
            return multiple()
        else:
            return os.path.join(parent, subdir)

    @rule(options = st_options
        )
    def sync(self, options):
        subprocess.check_call(['rm', '-rf', self.DEST_RSYNC])
        subprocess.check_call(['rm', '-rf', self.DEST_JUICESYNC])
        options_run = ' '.join([f'{item["option"]} {item["pattern"]}' for item in options])
        options_display = ' '.join([f'{item["option"]} "{item["pattern"]}"' for item in options])
        print(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options_display}')
        subprocess.check_call(f'rsync -r -vvv {self.ROOT_DIR1}/ {self.DEST_RSYNC}/ {options_run}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options_display}')
        subprocess.check_call(f'./juicefs sync --dirs -v {self.ROOT_DIR1}/ {self.DEST_JUICESYNC}/ {options_run}'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        try:
            subprocess.check_call(['diff', '-r', self.DEST_RSYNC, self.DEST_JUICESYNC])
        except subprocess.CalledProcessError as e:
            print(f'\033[31m{e}\033[0m')
            raise e
        self.fsop1.stats.success('do_sync')
        self.fsop2.stats.success('do_sync')

    def teardown(self):
        pass

if __name__ == '__main__':
    MAX_EXAMPLE=int(os.environ.get('MAX_EXAMPLE', '1000'))
    STEP_COUNT=int(os.environ.get('STEP_COUNT', '50'))
    settings.register_profile("dev", max_examples=MAX_EXAMPLE, verbosity=Verbosity.debug, 
        print_blob=True, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain])
    settings.register_profile("ci", max_examples=MAX_EXAMPLE, verbosity=Verbosity.normal, 
        print_blob=False, stateful_step_count=STEP_COUNT, deadline=None, \
        report_multiple_bugs=False, 
        phases=[Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain])
    profile = os.environ.get('PROFILE', 'dev')
    settings.load_profile(profile)
    juicefs_machine = SyncMachine.TestCase()
    juicefs_machine.runTest()
    print(json.dumps(FsOperation.stats.get(), sort_keys=True, indent=4))


================================================
FILE: .github/scripts/hypo/sync_test.py
================================================
import unittest
from sync import SyncMachine

class TestFsrand2(unittest.TestCase):

    def test_sync1(self):
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.mkdir(mode=0, parent=v1, subdir='a', umask=0)
        v3 = state.create_file(content=b'', file_name=v2, mode='w', parent=v2, umask=0)
        state.sync(options=[{'option': '--include', 'pattern': 'aa/***'},
        {'option': '--exclude', 'pattern': 'a?**'}])
        state.teardown()

    def test_sync2(self):
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0)
        state.sync(options=[{'option': '--exclude', 'pattern': '**/***'}])
        state.teardown()

    def test_sync3(self):
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0)
        state.sync(options=[{'option': '--exclude', 'pattern': '/***'}])
        state.teardown()

    def test_sync4(self):
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0)
        state.sync(options=[{'option': '--exclude', 'pattern': '*/***'}])
        state.teardown()

    def test_sync5(self):
        state = SyncMachine()
        v1 = state.init_folders()
        state.sync(options=[{'option': '--include', 'pattern': 'a'}])
        v2 = state.mkdir(mode=0, parent=v1, subdir='a', umask=0)
        v3 = state.create_file(content=b'', file_name=v2, mode='w', parent=v2, umask=0)
        state.sync(options=[{'option': '--include', 'pattern': 'aa'},
        {'option': '--exclude', 'pattern': 'a?**'}])
        state.teardown()

    def test_sync6(self):
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='a', mode='w', parent=v1, umask=0)
        state.sync(options=[{'option': '--exclude', 'pattern': '**a'}])
        state.teardown()

    def test_sync7(self):
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.create_file(content=b'', file_name='aa', mode='w', parent=v1, umask=0)
        state.sync(options=[{'option': '--exclude', 'pattern': 'aa**a'}])
        state.teardown()
    
    def test_sync8(self):
        # SEE: https://github.com/juicedata/juicefs/issues/4471
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.mkdir(mode=8, parent=v1, subdir='a', umask=0)
        state.sync(options=[{'option': '--exclude', 'pattern': 'a/**/a'}])
        state.teardown()

    def test_sync9(self):
        # SEE: https://github.com/juicedata/juicefs/issues/4471
        state = SyncMachine()
        v1 = state.init_folders()
        v2 = state.mkdir(mode=8, parent=v1, subdir='aa', umask=0) 
        v3 = state.create_file(content=b'', file_name='a', mode='w', parent=v2, umask=0)
        state.sync(options=[{'option': '--include', 'pattern': '**aa**'},
        {'option': '--exclude', 'pattern': 'a'}])
        state.teardown()

if __name__ == '__main__':
    unittest.main()

================================================
FILE: .github/scripts/mutate/check_coverage.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os

def is_mutation_in_coverage(original_file, changed_file, coverage_file):
	coverage = parse_coverage(coverage_file)
	# print(coverage)
	original = open(original_file, 'r').readlines()
	changed = open(changed_file, 'r').readlines()
	for i in range( min(len(original), len(changed)) ):
		if original[i] != changed[i]:
			# print(f'line {i+1} is different')
			if (i+1) not in coverage:
				# print(f'line {i+1} is not in coverage')
				return False
			else:
				# print(f'line {i+1} is in coverage')
				return True
	return True


def parse_coverage(file):
	cov = set()
	with open(file, 'r') as f:
		lines = f.readlines()
		for line in lines[1:]:
			name = line.split(':')[0]
			count = int(line.split(' ')[2])
			if count > 0:
				start_line = int(line.split(':')[1].split(' ')[0].split(',')[0].split('.')[0])
				end_line = int(line.split(':')[1].split(' ')[0].split(',')[1].split('.')[0])
				for i in range(start_line, end_line+1):
					cov.add(i)
	return cov
	
if __name__ == '__main__':
	# MUTATE_ORIGINAL=../cmd/meta/xattr.go MUTATE_CHANGED=../cmd/meta/xattr_copy.go COVERAGE_FILE=xattr-cov.out python3 check_coverage.py
	# MUTATE_ORIGINAL=cmd/meta/xattr.go MUTATE_CHANGED=/var/folders/jz/mvf43cj13sl4l17z1yy8m92h0000gn/T/go-mutesting-3937777628/xattr.go.4 COVERAGE_FILE=cmd/meta/xattr-cov.out python3 scripts/check_coverage.py
	original_file = os.environ['MUTATE_ORIGINAL']
	changed_file = os.environ['MUTATE_CHANGED']
	coverage_file = os.environ['COVERAGE_FILE']
	# print(f'MUTATE_ORIGINAL={original_file} MUTATE_CHANGED={changed_file} COVERAGE_FILE={coverage_file} python3 ../../scripts/check_coverage.py')
	r = is_mutation_in_coverage(original_file, changed_file, coverage_file)
	if r:
		exit(0)
	else:
		exit(3)

================================================
FILE: .github/scripts/mutate/check_skip_by_comment.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os


def is_mutation_skipped_by_comment(original_file, changed_file):
    original = open(original_file, 'r').readlines()
    changed = open(changed_file, 'r').readlines()
    for i in range( min(len(original), len(changed)) ):
        if original[i] != changed[i]:
            # print(f'line {i+1} is different')
            if 'skip mutate' in original[i]:
                print(f'line {i+1} is skipped by comment')
                return  True
    return False


if __name__ == '__main__':
    original_file = os.environ['MUTATE_ORIGINAL']
    changed_file = os.environ['MUTATE_CHANGED']
    if is_mutation_skipped_by_comment(original_file, changed_file):
        exit(1)
    else:
        exit(0)

    
================================================
FILE: .github/scripts/mutate/how_to_use_mutate_test.md
================================================
# what is mutatation testing?
Mutation testing (or Mutation analysis or Program mutation) is used to design new software tests and evaluate the quality of existing software tests. Mutation testing involves modifying a program in small ways. Each mutated version is called a mutant and tests detect and reject mutants by causing the behavior of the original version to differ from the mutant. This is called killing the mutant. Test suites are measured by the percentage of mutants that they kill. New tests can be designed to kill additional mutants.

# what is the difference between mutants?
there are several kind of mutants:
1. killed mutants: the mutants which is killed by the unit test. which is identified by "tests passed -> FAIL" in the log
2. failed or escaped mutants: the mutants which pass the unit test. which is identified by "tests failed -> PASS" in the log
3. skipped mutants: the mutants may skipped because of 1. out of coverage code. 2. in the black list, 3. skipped by comment. 
4. other exception cases.
# how to checkout the failed mutants?
1. open the github action workflow page.
2. click "run mutate test" step.
3. search "tests passed " keyword, all the "tests passed -> FAIL" mutants are failed.
you can try here: https://github.com/juicedata/juicefs/actions/runs/3565436367/jobs/5990603552
# how to fix failed mutants?
1. open the github action workflow page.
2. click "run mutate test" step.
3. search "tests passed " keyword, all the "tests passed -> FAIL" mutants are failed.
3. find which line is changed by mutation.
4. copy the changed line to .go source file
5. run all the tests in corresponding go test file, all the tests should passed.
6. you should add test case to make the test failed, which kill this mutant.
# how to add a mutation to black list?
1. find the checksum from the github action log, like FAIL "/tmp/go-mutesting-1324412688/pkg/chunk/prefetch.go.0" with checksum bb9e9497f17e191adf89b5a2ef6764eb
2. add a line //checksum: bb9e9497f17e191adf89b5a2ef6764eb in the go test file.
For example:
//checksum 9cb13bb28aa7918edaf4f0f4ca92eea5
//checksum 05debda2840d31bac0ab5c20c5510591
func TestMin(t *testing.T) {
	assertEqual(t, Min(1, 2), 1)
	assertEqual(t, Min(-1, -2), -2)
	assertEqual(t, Min(0, 0), 0)
}

# how to skip mutate a specific line?
Add "//skip mutate" to the end of the line you don't want to mutate in the source file.
For example:
	if err != nil { //skip mutate
		return "", fmt.Errorf("failed to execute command `lsb_release`: %s", err)
	}

# how to skip a specific test case?
if you don't want to run a specific test case, you can add "//skip mutate" after the test case function.
For example:
func TestRandomWrite(t *testing.T) {//skip mutate
	...
}

# how to customize mutate test job in parallel?
if the mutants of the target source file is more than 200, we will use 4 github jobs to run it. otherwise we will use 1 job to run.
you can customize it in your test file with adding "//mutate_test_job_number: number", eg: //mutate_test_job_number: 8

# how to disable muate test for a specific go file?
add //mutate:disable in the *_test.go file to disable the mutate test.

================================================
FILE: .github/scripts/mutate/modify_sdk_pom.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import re


def get_plugin_str(taget_tests, taget_classes, time_constant):
    s = """ <plugin>
				<groupId>org.pitest</groupId>
				<artifactId>pitest-maven</artifactId>
				<version>1.9.11</version>
				<configuration>
					<targetClasses>
						<param>{taget_classes}</param>
					</targetClasses>
					<targetTests>
						<param>{taget_tests}</param>
					</targetTests>
					<timeoutConstant>{time_constant}</timeoutConstant>
				</configuration>
			</plugin> """
    s = s.replace('{taget_classes}', taget_classes)
    s = s.replace('{taget_tests}', taget_tests)
    s = s.replace('{time_constant}', time_constant)
    return s

def modify_pom(pom_path, taget_tests, taget_classes, time_constant):
    new_lines = []
    with open(pom_path, 'r') as f:
        for line in f.readlines():
            if line.strip() == '</plugins>':
                new_lines.append(get_plugin_str(taget_tests, taget_classes, time_constant)+'\n')
            new_lines.append(line)
    with open(pom_path, 'w') as f:
        f.writelines(new_lines)

if __name__ == '__main__':
    pom_path = os.environ['POM_XML_PATH']
    taget_tests = os.environ['TARGET_TESTS']
    taget_classes = os.environ['TARGET_CLASSES']
    time_constant = os.environ['TIME_CONSTANT']
    modify_pom(pom_path, taget_tests, taget_classes, time_constant)
    

================================================
FILE: .github/scripts/mutate/mutest.sh
================================================
#!/bin/bash

# This exec script implements
# - the replacement of the original file with the mutation,
# - the execution of all tests originating from the package of the mutated file,
# - and the reporting if the mutation was killed.

if [ -z ${MUTATE_CHANGED+x} ]; then echo "MUTATE_CHANGED is not set"; exit 1; fi
if [ -z ${MUTATE_ORIGINAL+x} ]; then echo "MUTATE_ORIGINAL is not set"; exit 1; fi
if [ -z ${MUTATE_PACKAGE+x} ]; then echo "MUTATE_PACKAGE is not set"; exit 1; fi
if [ -z ${COVERAGE_FILE+x} ]; then echo "COVERAGE_FILE is not set"; exit 1; fi
if [ -z ${TEST_FILE_NAME+x} ]; then echo "TEST_FILE_NAME is not set"; exit 1; fi
if [ -z ${PACKAGE_PATH+x} ]; then echo "PACKAGE_PATH is not set"; exit 1; fi

function clean_up {
	if [ -f $MUTATE_ORIGINAL.tmp ];
	then
		mv $MUTATE_ORIGINAL.tmp $MUTATE_ORIGINAL
	fi
}

function sig_handler {
	clean_up

	exit $GOMUTESTING_RESULT
}
trap sig_handler SIGHUP SIGINT SIGTERM

export MUTATE_TIMEOUT=${MUTATE_TIMEOUT:-10}

if [ -n "$TEST_RECURSIVE" ]; then
	TEST_RECURSIVE="/..."
fi

export GOMUTESTING_DIFF=$(diff -u $MUTATE_ORIGINAL $MUTATE_CHANGED)
if [ -z "$GOMUTESTING_DIFF" ]; then
	echo "mutate file is the same as original file", $MUTATE_CHANGED
	exit 100
fi

python3 .github/scripts/mutate/check_coverage.py

if [ $? -ne 0 ]; then
	echo "mutate is out of code coverage", $MUTATE_CHANGED
	exit 101
fi

python3 .github/scripts/mutate/check_skip_by_comment.py
if [ $? -ne 0 ]; then
	echo "mutate is skipped by comment", $MUTATE_CHANGED
	exit 102
fi

test_cases=$(python3 .github/scripts/mutate/parse_test_cases.py)
if [ $? -ne 0 ]; then
	echo "no test cases in test file ", $TEST_FILE_NAME
	exit 103
fi

mv $MUTATE_ORIGINAL $MUTATE_ORIGINAL.tmp
cp $MUTATE_CHANGED $MUTATE_ORIGINAL
echo "------------------------------------------------------------------------"
echo "Start unit test with: $MUTATE_CHANGED"
go test ./$PACKAGE_PATH/...  -run "$test_cases" -v -cover -count=1 -timeout=5m 
# GOMUTESTING_TEST=$(go test -timeout $(printf '%ds' $MUTATE_TIMEOUT) $MUTATE_PACKAGE$TEST_RECURSIVE 2>&1)
export GOMUTESTING_RESULT=$?


if [ "$MUTATE_DEBUG" = true ] ; then
	echo "$GOMUTESTING_TEST"
fi

clean_up

case $GOMUTESTING_RESULT in
0) # tests passed -> FAIL
	echo "$GOMUTESTING_DIFF"
	echo "tests passed -> FAIL"
	exit 1
	;;
1) # tests failed -> PASS
	echo "$GOMUTESTING_DIFF"
	echo "tests failed -> PASS"
	exit 0
	;;
2) # did not compile -> SKIP
	if [ "$MUTATE_VERBOSE" = true ] ; then
		echo "Mutation did not compile"
	fi

	if [ "$MUTATE_DEBUG" = true ] ; then
		echo "$GOMUTESTING_DIFF"
	fi
	echo "did not compile -> SKIP"
	exit 2
	;;
3) # mutation is out of coverage -> SKIP
	echo "mutation is out of coverage -> SKIP"
	echo "$GOMUTESTING_DIFF"

	exit $GOMUTESTING_RESULT
	;;
4) # check coverage failed -> SKIP
	echo "check coverage failed -> SKIP"
	echo "$GOMUTESTING_DIFF"

	exit $GOMUTESTING_RESULT
	;;

*) # Unkown exit code -> SKIP
	echo "Unknown exit code"
	echo "$GOMUTESTING_DIFF"

	exit $GOMUTESTING_RESULT
	;;
esac

================================================
FILE: .github/scripts/mutate/mutesting.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import glob
import json
import os
import sys
from tkinter import Tcl

def do_mutate_test(mutation_dir, index, total):
    print(f'mutation dir is {mutation_dir}, inde is {index}, total is {total}', file=sys.stderr)
    # os.system(f'ls -l {mutation_dir}')
    list_of_files = Tcl().call('lsort', '-dict', glob.glob(mutation_dir + '/*.go.*') )
    if len(list_of_files) > 0 and 'original' in list_of_files[-1]:
        list_of_files = list_of_files[:-1]
    # print('\n'.join(list_of_files), file=sys.stderr)
    stats = {'passed':0, 'failed':0, 'compile_error':0, 'out_of_coverage':0, 'skip_by_comment':0, 'others':0, 'total':0}
    count = int(len(list_of_files)/total) + 1
    start = index*count
    end = start + count
    print(f'count:{count}, start:{start}, end:{end}', file=sys.stderr)
    if end > len(list_of_files):
        end = len(list_of_files)
    for changed_file in list_of_files[start:end]:
        # timestamp_str = time.strftime(  '%m/%d/%Y :: %H:%M:%S',
        #                             time.gmtime(os.path.getmtime(changed_file))) 
        # print(timestamp_str, ' -->', changed_file) 
        os.environ['MUTATE_CHANGED'] = changed_file
        ret = os.system('.github/scripts/mutate/mutest.sh') >> 8
        if ret == 0:
            stats['passed'] += 1
        elif ret == 1:
            stats['failed'] += 1
        elif ret == 2:
            stats['compile_error'] += 1
        elif ret == 101:
            stats['out_of_coverage'] += 1
        elif ret == 102:
            stats['skip_by_comment'] += 1
        else:
            stats['others'] += 1
        stats['total'] += 1
    if stats['passed'] + stats['failed'] == 0:
        stats['score'] = 1.0
    else:
        stats['score'] = stats['passed'] / (stats['passed'] + stats['failed'])
    return stats

if __name__ == '__main__':
    os.environ['MUTATE_PACKAGE'] = ''
    mutation_dir = os.path.join(os.environ['MUTATION_DIR'], os.environ['PACKAGE_PATH'])
    print(f'mutation dir is {mutation_dir}', file=sys.stderr)
    original_file = os.environ['MUTATE_ORIGINAL']
    print(f'original file is {original_file}', file=sys.stderr)
    if not os.environ['JOB_INDEX']:
        index = 0
    else:
        index = int(os.environ['JOB_INDEX'])-1
    total = int(os.environ['JOB_TOTAL'])
    stats = do_mutate_test(mutation_dir, index, total)
    print(stats)
    stat_result_file = os.environ['STAT_RESULT_FILE']
    print(f'stat result file is {stat_result_file}', file=sys.stderr)
    with open(stat_result_file, "w") as f:
        json.dump(stats, f)

================================================
FILE: .github/scripts/mutate/parse_black_list.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import re


def parse_check_sum(test_file_path):
    check_sum_list = []
    with open(test_file_path) as f:
        lines = f.readlines()
        for line in lines:
            # //checksum 5b1ca0cfedd786d9df136a0e042df23a
            group = re.match('//checksum\s+(.{32})$', line.strip())
            if group:
                check_sum_list.append(group.group(1))
    return check_sum_list

def save_black_list(file_name, check_sum_list):
    with open(file_name, 'w') as f:
        f.write('\n'.join(check_sum_list))

if __name__ == '__main__':
    test_file_path = os.environ['TEST_FILE_NAME']
    if not test_file_path:
        print('test file name is empty')
        exit(1)
    black_list_file = os.environ['BLACK_LIST_FILE']
    save_black_list(black_list_file,  parse_check_sum(test_file_path))

================================================
FILE: .github/scripts/mutate/parse_job_total.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import re
import sys


def parse_test_jobs(test_file_path):
    with open(test_file_path) as f:
        lines = f.readlines()
        for line in lines:
            g = re.search('^//mutate_test_job_number:\s*(.+)', line.strip())
            if g:
                return int(g.group(1))
                
    return 0

if __name__ == '__main__':
    test_file_path = os.environ['TEST_FILE_NAME']
    if not test_file_path:
        print('test file name is empty', file=sys.stderr)
        exit(1)
    print(parse_test_jobs(test_file_path))
    

================================================
FILE: .github/scripts/mutate/parse_mutate_log.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import re


def parse_mutate_log(log_file):
    mutants = {}
    with open(log_file) as f:
        lines = f.readlines()
        for line in lines:
            # The mutation score is 0.326154 (106 passed, 180 failed, 31 duplicated, 39 skipped, total is 325)
            if line.strip().startswith("The mutation score is"):
                result = re.match(r'(.+)\((\d+) passed, (\d+) failed, (\d+) duplicated, (\d+) skipped, total is (\d+)\)', line)
                passed = result.group(2)
                failed = result.group(3)
                duplicated = result.group(4)
                skipped = result.group(5)
                total = result.group(6)
                score = int(passed) * 1.0 / (int(total) - int(skipped))
                return f'The mutation score is {score} ({passed} passed, {failed} failed, {duplicated} duplicated, {skipped} skipped, total is {total})'
    return ''

if __name__ == '__main__':
    log_file = os.environ['LOG_FILE']
    if not log_file:
        print('log file is empty')
        exit(1)
    s = parse_mutate_log(log_file)
    if s:
        print(s)
    else:
        exit(1)
    

================================================
FILE: .github/scripts/mutate/parse_test_cases.py
================================================
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
import re


def parse_test_cases(test_file_path):
    test_cases = []
    with open(test_file_path) as f:
        lines = f.readlines()
        for line in lines:
            # func TestXattr2(t *testing.T) {
            if re.search('^func\s+Test.+', line.strip()):
                if 'skip mutate' in line:
                    continue
                name = line.strip().split(' ')[1].split('(')[0]
                test_cases.append(name)
    return test_cases


if __name__ == '__main__':
    test_file_path = os.environ['TEST_FILE_NAME']
    if not test_file_path:
        print('test file name is empty')
        exit(1)
    test_cases = parse_test_cases(test_file_path)
    if len(test_cases) == 0:
        print('test case is empty')
        exit(1)
    test_cases_str = '|'.join(test_cases)
    print(f'({test_cases_str})')

================================================
FILE: .github/scripts/mutate/query_report.py
================================================

import os
import sys
import MySQLdb

def query_report(repo, run_id):
    passowrd = os.environ['MYSQL_PASSWORD']
    db = MySQLdb.connect(host="8.210.231.144", user="juicedata", passwd=passowrd, db="mutate")
    db.query(f"""SELECT job_name, github_job_url, passed, failed, compile_error, out_of_coverage, skip_by_comment, others FROM report 
        WHERE github_repo="{repo}" AND github_run_id={run_id}""")
    r=db.store_result()
    for i in range(r.num_rows()):
        row = r.fetch_row()[0]
        passed = int(row[2])
        failed = int(row[3])
        if passed+failed != 0:
            score = row[2]/(row[2]+row[3])
        else:
            score = 0
        print(f'{row[0]}: score:{score:.2f} failed:{row[3]}, passed:{row[2]}, compile error:{row[4]}, out of coverage:{row[5]}, skip by comment:{row[6]}, others:{row[7]}')
        print(f'Job detail: {row[1]}\n')
    db.close()

if __name__ == "__main__":
    repo = os.environ.get('GITHUB_REPOSITORY')
    run_id = os.environ.get('GITHUB_RUN_ID')
    # repo = 'juicedata/juicefs'
    # run_id = '3608212346'
    print(f'repo is {repo}, run_id is {run_id}', file=sys.stderr)
    query_report(repo, run_id)


================================================
FILE: .github/scripts/mutate/save_report.py
================================================

import json
import os
from sys import argv
import MySQLdb
from datetime import datetime
import argparse
# CREATE DATABASE mutate
# CREATE TABLE `report` (
#   `github_repo` varchar(128) DEFAULT NULL,
#   `github_ref_name` varchar(64) DEFAULT NULL,
#   `github_sha` varchar(128) DEFAULT NULL,
#   `github_run_id` varchar(64) DEFAULT NULL,
#   `github_job_url` varchar(1024) DEFAULT NULL,
#   `created_date` datetime DEFAULT NULL,
#   `job_name` varchar(64) DEFAULT NULL,
#   `passed` int,
#   `failed` int,
#   `compile_error` int, 
#   `out_of_coverage` int, 
#   `skip_by_comment` int,
#   `others` int
# )

def save_report(job_name, report):
    passowrd = os.environ['MYSQL_PASSWORD']
    github_repo = os.environ.get('GITHUB_REPOSITORY')
    print(f'github_repo is: {github_repo}')
    github_ref_name = os.environ.get('GITHUB_REF_NAME')
    print(f'github_ref_name is: {github_ref_name}')
    github_sha = os.environ.get('GITHUB_SHA')
    print(f'github_sha is: {github_sha}')
    github_run_id = os.environ.get('GITHUB_RUN_ID')
    print(f'github_run_id is: {github_run_id}')
    github_job_url = os.environ.get('JOB_URL')
    print(f'github_job_url is: {github_job_url}')
    created_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    db = MySQLdb.connect(host="8.210.231.144", user="juicedata", passwd=passowrd, db="mutate")
    c = db.cursor()
    c.execute(f"insert into report(github_repo, github_ref_name,  github_sha, github_run_id, github_job_url, created_date, job_name, passed, failed, compile_error, out_of_coverage, skip_by_comment, others) \
        values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (github_repo, github_ref_name, github_sha, github_run_id, github_job_url, created_date, job_name, report['passed'], report['failed'], report['compile_error'], report['out_of_coverage'], report['skip_by_comment'], report['others']))
    db.commit()
    c.close()
    db.close()
    print(f'save report for {job_name} succeed')

if __name__ == "__main__":
    job_name = os.environ.get('JOB_NAME')
    stat_result_file = os.environ.get('STAT_RESULT_FILE')
    print(f'save report for {job_name}, stat result file is {stat_result_file}')
    with open(stat_result_file) as f:
        report = json.load(f)
        save_report(job_name, report)


================================================
FILE: .github/scripts/perf/ai.sh
================================================
#!/bin/bash
# ai_format_benchmark.sh
set -e

MNT_POINT=$1
RESULTS_FILE=$2
VERSION=$3

# Create Python virtual environment if needed
if [ ! -d "venv" ]; then
    PY_VER=$(python3 -V 2>&1 | awk '{print $2}' | cut -d. -f1,2)
    PKG="python${PY_VER}-venv"
    sudo apt install $PKG -y
    python3 -m venv venv
fi

source venv/bin/activate

# Install required packages
#pip install --upgrade pip
pip install numpy pandas

# Try to install optional dependencies
pip install h5py || echo "h5py installation failed, HDF5 tests will be skipped"
pip install torch || echo "PyTorch installation failed, PyTorch tests will be skipped"
pip install tensorflow || echo "TensorFlow installation failed, TensorFlow tests will be skipped"
pip install pyarrow || echo "PyArrow installation failed, Parquet tests will be skipped"
pip install onnx || echo "OONX installation failed, ONNX tests will be skipped"
pip install onnxruntime
pip install pillow
pip install lmdb
pip install tqdm


# Run the benchmark
python .github/scripts/perf/ai_format_benchmark.py "$MNT_POINT" "$RESULTS_FILE" "$VERSION"

deactivate


================================================
FILE: .github/scripts/perf/ai_format_benchmark.py
================================================
#!/usr/bin/env python3
"""
AI Training Format Performance Benchmark Script - Fixed Version
Comprehensive performance testing for AI training file formats
"""

import os
import sys
import json
import time
import tempfile
import subprocess
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Dict, List, Tuple, Any, Callable, Optional
import argparse
import shutil
from dataclasses import dataclass
import pickle
import random
import io
from PIL import Image
import lmdb
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm

try:
    import h5py
except ImportError:
    h5py = None

try:
    import torch
except ImportError:
    torch = None

try:
    import tensorflow as tf
except ImportError:
    tf = None

try:
    import pyarrow.parquet as pq
    import pyarrow as pa
except ImportError:
    pq = None
    pa = None

try:
    import onnx
    import onnxruntime as ort
except ImportError:
    onnx = None
    ort = None

@dataclass
class BenchmarkResult:
    """Structured benchmark result"""
    min_time: float
    max_time: float
    mean_time: float
    std_time: float
    throughput_mb_s: Optional[float] = None
    file_size_bytes: Optional[int] = None
    operation_count: Optional[int] = None
    details: Dict[str, Any] = None

class AIFormatBenchmark:
    def __init__(self, mount_point: str, results_file: str, version: str):
        self.mount_point = Path(mount_point)
        self.results_file = Path(results_file)
        self.version = version
        self.results = {}
        self.verbose = False

        # Test configuration
        self.config = {
            'small_file_mb': 50,
            'medium_file_mb': 100,
            'large_file_mb': 200,
            'num_runs': 2,
            'cool_down_time': 0.5,
            'num_samples': 5000,  # For dataset benchmarks
            'image_size': (128, 128, 3),  # For image dataset benchmarks
            'lmdb_num_samples': 1000,  # Reduced for CI testing
            'lmdb_num_proc': 4,  # Reduced for CI testing
            'lmdb_image_size': (128, 128)  # Smaller images for CI
        }

        # Create test directory
        self.test_dir = self.mount_point / "ai_benchmark"
        self.test_dir.mkdir(exist_ok=True)

    def clear_cache(self):
        """Clear system cache silently"""
        try:
            subprocess.run(["sudo", "sync"], check=True,
                         stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            subprocess.run(
                "echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null",
                shell=True,
                check=True,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
        except (subprocess.CalledProcessError, FileNotFoundError):
            if self.verbose:
                print("Warning: Failed to clear cache (requires sudo privileges)")

    def run_benchmark(self, name: str, func: Callable, file_size: int = None,
                     description: str = "") -> Optional[BenchmarkResult]:
        """Run a benchmark function multiple times and calculate statistics"""
        times = []
        size_results = []

        if self.verbose:
            print(f"  Running {name}: {description}")

        for i in range(self.config['num_runs']):
            self.clear_cache()
            start_time = time.perf_counter()
            try:
                result = func()
            except Exception as e:
                print(f"    Error in run {i+1}: {e}")
                result = None
            end_time = time.perf_counter()

            elapsed_time = end_time - start_time
            times.append(elapsed_time)
            
            if "file_size" in result:
                file_size = result["file_size"]
            if result is not None:
                size_results.append(result)
            if tf is not None:
                tf.keras.backend.clear_session()
            if torch is not None:
                torch.cuda.empty_cache() if torch.cuda.is_available() else None
            import gc
            gc.collect()
            time.sleep(self.config['cool_down_time'])

        if not times:
            return None

        stats = BenchmarkResult(
            min_time=min(times),
            max_time=max(times),
            mean_time=np.mean(times),
            std_time=np.std(times),
            details=size_results[0] if size_results else {}
        )

        if file_size is not None and stats.mean_time > 0:
            if "num_layers" in result:
                stats.throughput_mb_s = file_size / result["num_layers"] / stats.mean_time / (1024**2)
            else:
                stats.throughput_mb_s = file_size / stats.mean_time / (1024**2)
            stats.file_size_bytes = file_size

        if stats.details:
            for key in ['num_records', 'records_read', 'num_files', 'num_layers', 'num_samples']:
                if key in stats.details:
                    stats.operation_count = stats.details[key]
                    break

        if self.verbose:
            self._print_benchmark_result(name, stats)

        return stats

    def _print_benchmark_result(self, name: str, stats: BenchmarkResult):
        """Print individual benchmark result in structured format"""
        print(f"    {name}:")
        print(f"      Time: {stats.mean_time:.3f}s ± {stats.std_time:.3f}s "
              f"(min: {stats.min_time:.3f}s, max: {stats.max_time:.3f}s)")

        if stats.throughput_mb_s is not None:
            print(f"      Throughput: {stats.throughput_mb_s:.2f} MB/s")

        if stats.file_size_bytes is not None:
            size_mb = stats.file_size_bytes / (1024**2)
            print(f"      File size: {size_mb:.1f} MB")

        if stats.operation_count is not None:
            print(f"      Operations: {stats.operation_count:,}")

        if stats.details:
            details_str = ", ".join([f"{k}: {v}" for k, v in stats.details.items()])
            print(f"      Details: {details_str}")
    
    def generate_random_image_bytes(self, width=64, height=64, format="JPEG", quality=85):
        """Generate random image bytes for LMDB testing"""
        image_np = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
        img = Image.fromarray(image_np)
        img_bytes = io.BytesIO()
        img.save(img_bytes, format=format, quality=quality)
        return img_bytes.getvalue()

    def generate_lmdb_data_entry(self, idx, image_size=(64, 64)):
        """Generate a single LMDB data entry"""
        img_bytes = self.generate_random_image_bytes(width=image_size[0], height=image_size[1])
        return {
            "index": idx,
            "txt": f"Sample text for entry {idx}",
            "jpeg": img_bytes
        }

    def write_lmdb_data(self, lmdb_path, num_samples, image_size=(64, 64)):
        """Write data to LMDB database"""
        env = lmdb.open(str(lmdb_path), readonly=False, meminit=False, map_size=1024**4)
        total_bytes = 0
        
        with env.begin(write=True) as txn:
            for i in range(num_samples):
                data = self.generate_lmdb_data_entry(i, image_size)
                key = str(i).encode()
                value = pickle.dumps(data)
                txn.put(key, value)
                total_bytes += len(value)
        
        env.close()
        return total_bytes

    def read_lmdb_data_single_process(self, lmdb_path):
        """Read LMDB data using single process"""
        env = lmdb.open(str(lmdb_path), readonly=True, lock=False, readahead=False, meminit=False)
        total_bytes = 0
        samples_read = 0
        
        with env.begin(write=False) as txn:
            cursor = txn.cursor()
            for key, value in cursor:
                total_bytes += len(value)
                samples_read += 1
        
        env.close()
        return total_bytes, samples_read

    def lmdb_batch_worker(self, lmdb_path, key_batch):
        """Worker function for multi-process LMDB reading"""
        env = lmdb.open(str(lmdb_path), readonly=True, lock=False, readahead=False, meminit=False)
        total_bytes = 0
        samples_processed = 0
        
        with env.begin(write=False) as txn:
            for key_bytes in key_batch:
                data = txn.get(key_bytes)
                if data:
                    total_bytes += len(data)
                    samples_processed += 1
        
        env.close()
        return samples_processed, total_bytes

    def read_lmdb_data_multi_process(self, lmdb_path, num_processes=2):
        """Read LMDB data using multiple processes"""
        env = lmdb.open(str(lmdb_path), readonly=True, lock=False, readahead=False, meminit=False)
        
        # Get all keys
        keys = []
        with env.begin(write=False) as txn:
            cursor = txn.cursor()
            for key, _ in cursor:
                keys.append(key)
        
        env.close()
        
        # Split keys into batches for each process
        batch_size = len(keys) // num_processes + 1
        key_batches = [keys[i:i + batch_size] for i in range(0, len(keys), batch_size)]
        
        total_bytes = 0
        total_samples = 0
        with ProcessPoolExecutor(max_workers=num_processes) as executor:
            futures = []
            for batch in key_batches:
                futures.append(executor.submit(self.lmdb_batch_worker, lmdb_path, batch))
            
            for future in as_completed(futures):
                samples, bytes_read = future.result()
                total_samples += samples
                total_bytes += bytes_read
        
        return total_bytes, total_samples
    
    def benchmark_lmdb(self):
        """Benchmark LMDB format for datasets"""
        results = {}
        num_samples = self.config['lmdb_num_samples']
        num_proc = self.config['lmdb_num_proc']
        image_size = self.config['lmdb_image_size']
        
        # Estimate file size (approx 5KB per sample)
        estimated_file_size = num_samples * 5 * 1024
        
        for size_name, sample_multiplier in [('small', 1), ('medium', 2)]:
            actual_samples = num_samples * sample_multiplier
            lmdb_dir = self.test_dir / f"lmdb_{size_name}_{actual_samples}samples"
            lmdb_dir.mkdir(exist_ok=True)
            
            def write_func():
                total_bytes = self.write_lmdb_data(lmdb_dir, actual_samples, image_size)
                return {"file_size": total_bytes, "num_samples": actual_samples}
            
            def read_single_func():
                bytes_read, samples_read = self.read_lmdb_data_single_process(lmdb_dir)
                return {"bytes_read": bytes_read, "samples_read": samples_read}

            def read_multi_func():
                bytes_read, samples_read = self.read_lmdb_data_multi_process(lmdb_dir, num_proc)
                return {"bytes_read": bytes_read, "samples_read": samples_read, "processes": num_proc}
            
            # Write benchmark
            write_stats = self.run_benchmark(
                f"lmdb_{size_name}_write", write_func, file_size=estimated_file_size * sample_multiplier,
                description=f"Write LMDB ({actual_samples} samples)"
            )
            
            # Single process read benchmark
            read_single_stats = self.run_benchmark(
                f"lmdb_{size_name}_read_single", read_single_func, file_size=estimated_file_size * sample_multiplier,
                description=f"Read LMDB single process ({actual_samples} samples)"
            )
            read_multi_stats = self.run_benchmark(
                f"lmdb_{size_name}_read_multi", read_multi_func, file_size=estimated_file_size * sample_multiplier,
                description=f"Read LMDB multi process ({num_proc} processes, {actual_samples} samples)"
            )
            
            # Cleanup
            if lmdb_dir.exists():
                shutil.rmtree(lmdb_dir)
            
            if write_stats and read_single_stats and read_multi_stats:
                results[size_name] = {
                    "write": write_stats,
                    "read_single": read_single_stats,
                    "read_multi": read_multi_stats
                }
        
        return results


    # ----------------------------------------------------------------------
    # Model Weights Benchmarks
    # ----------------------------------------------------------------------

    def benchmark_pytorch_weights(self):
        """Benchmark PyTorch .pt/.pth format with multiple sizes"""
        if torch is None:
            print("PyTorch not available, skipping PyTorch benchmark")
            return None

        results = {}
        for size_name, size_mb in [('small', 1000), ('large', 4000)]:
            file_path = self.test_dir / f"pytorch_weights_{size_name}_{size_mb}mb.pt"
            file_size = size_mb * 1024 * 1024

            layer_sizes = [file_size // 8 // 5] * 5  # Split into 5 layers
            dummy_data = {
                'weights': {f'layer_{i}': torch.randn(size) for i, size in enumerate(layer_sizes)},
                'optimizer': {'lr': 0.001, 'momentum': 0.9},
                'metadata': {'epoch': 10, 'version': '1.0', 'created': time.time()}
            }

            def write_func():
                torch.save(dummy_data, file_path)
                actual_size = os.path.getsize(file_path)
                return {'file_size': actual_size, 'num_layers': len(dummy_data['weights'])}

            def read_func():
                loaded = torch.load(file_path)
                total_params = 0
                for layer_name, weights in loaded['weights'].items():
                    total_params += weights.numel()
                    _ = torch.sum(weights).item() % 1000
                return {'file_size': total_params, 'num_layers': len(loaded['weights'])}

            write_stats = self.run_benchmark(
                f"pytorch_weights_{size_name}_write", write_func, file_size=file_size / 2,
                description=f"Write PyTorch weights ({size_mb}MB)"
            )

            read_stats = self.run_benchmark(
                f"pytorch_weights_{size_name}_read", read_func, file_size=file_size / 2,
                description=f"Read PyTorch weights ({size_mb}MB)"
            )

            if file_path.exists():
                file_path.unlink()

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    def benchmark_tensorflow_h5(self):
        """Benchmark TensorFlow HDF5 format with multiple sizes"""
        if tf is None or h5py is None:
            print("TensorFlow or h5py not available, skipping HDF5 benchmark")
            return None

        results = {}
        for size_name, size_mb in [('small', 500), ('large', 2000)]:
            file_path = self.test_dir / f"tf_h5_{size_name}_{size_mb}mb.h5"
            file_size = size_mb * 1024 * 1024

            def write_func():
                total_data_size = 0
                with h5py.File(file_path, "w") as f:
                    num_layers = 8
                    target_data_size = file_size
                    data_per_dataset = target_data_size // (num_layers * 2)
                    for i in range(num_layers):
                        weights_elements = data_per_dataset // 4
                        weights_data = np.random.randn(weights_elements).astype(np.float32)
                        f.create_dataset(f'conv_{i}_weights', data=weights_data)
                        total_data_size += weights_data.nbytes
                        bias_data = np.random.randn(256).astype(np.float32)
                        f.create_dataset(f'conv_{i}_bias', data=bias_data)
                        total_data_size += bias_data.nbytes

                actual_size = os.path.getsize(file_path)
                return {"file_size": actual_size, "num_datasets": num_layers * 2}

            def read_func():
                total_size = 0
                dataset_count = 0
                data_checksum = 0
                actual_size = os.path.getsize(file_path)
                with h5py.File(file_path, "r") as f:
                    for key in f.keys():
                        if isinstance(f[key], h5py.Dataset):
                            # 实际读取数据
                            data = f[key][:]
                            total_size += data.nbytes
                            dataset_count += 1
                            # 处理数据确保实际读取
                            data_checksum = (data_checksum + np.sum(data)) % 1000000
                return {"file_size": actual_size, "num_datasets": dataset_count}
            write_stats = self.run_benchmark(
                f"tensorflow_h5_{size_name}_write", write_func, file_size=file_size,
                description=f"Write TensorFlow H5 ({size_mb}MB)"
            )
            self.clear_cache()
            read_stats = self.run_benchmark(
                f"tensorflow_h5_{size_name}_read", read_func, file_size=file_size,
                description=f"Read TensorFlow H5 ({size_mb}MB)"
            )

            if file_path.exists():
                file_path.unlink()

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    def benchmark_onnx(self):
        """Benchmark ONNX model format"""
        if onnx is None or ort is None:
            print("ONNX or ONNX Runtime not available, skipping ONNX benchmark")
            return None

        results = {}
        for size_name, size_mb in [('small', 50), ('medium', 100), ('large', 200)]:
            file_path = self.test_dir / f"onnx_model_{size_name}_{size_mb}mb.onnx"
            file_size = size_mb * 1024 * 1024

            # Create a simple ONNX model
            def create_onnx_model():
                from onnx import helper, TensorProto, save

                # Calculate appropriate tensor sizes to match target file size
                tensor_size = max(100, int((file_size * 0.8) / 4 / 4))  # Rough estimation

                # Create a simple graph
                X = helper.make_tensor_value_info('X', TensorProto.FLOAT, [1, 3, 224, 224])
                Y = helper.make_tensor_value_info('Y', TensorProto.FLOAT, [1, 1000])

                # Create weights with appropriate size
                weights = helper.make_tensor(
                    'W',
                    TensorProto.FLOAT,
                    [3 * 224 * 224, 1000],
                    np.random.randn(3 * 224 * 224 * 1000).astype(np.float32)[:3*224*224*1000]
                )

                node = helper.make_node(
                    'MatMul',
                    ['X', 'W'],
                    ['Y'],
                    name='matmul'
                )

                graph = helper.make_graph(
                    [node],
                    'simple_model',
                    [X],
                    [Y],
                    [weights]
                )

                model = helper.make_model(graph, producer_name='benchmark')
                return model

            def write_func():
                model = create_onnx_model()
                onnx.save(model, file_path)
                actual_size = os.path.getsize(file_path)
                return {"file_size": actual_size, "model_size": file_size}

            def read_func():
                # Load and validate model
                model = onnx.load(file_path)
                onnx.checker.check_model(model)

                # Run inference with ONNX Runtime
                sess = ort.InferenceSession(file_path)
                input_data = np.random.randn(1, 3, 224, 224).astype(np.float32)
                outputs = sess.run(None, {'X': input_data})
                return {"output_shape": outputs[0].shape, "model_valid": True}

            write_stats = self.run_benchmark(
                f"onnx_{size_name}_write", write_func, file_size=file_size,
                description=f"Write ONNX model ({size_mb}MB)"
            )

            read_stats = self.run_benchmark(
                f"onnx_{size_name}_read", read_func, file_size=file_size,
                description=f"Read ONNX model ({size_mb}MB)"
            )

            if file_path.exists():
                file_path.unlink()

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    def benchmark_huggingface_bin(self):
        """Benchmark HuggingFace .bin format"""
        if torch is None:
            print("PyTorch not available, skipping HuggingFace benchmark")
            return None

        results = {}
        for size_name, size_mb in [('small', 10), ('medium', 50), ('large', 100)]:
            file_path = self.test_dir / f"hf_model_{size_name}_{size_mb}mb.bin"
            file_size = size_mb * 1024 * 1024

            # Create HuggingFace-style model weights
            def create_hf_weights():
                # Calculate layer sizes to approximate target file size
                num_layers = 12
                layer_size = max(100, int((file_size * 0.9) / num_layers / 4))  # Rough estimation

                weights = {}
                for i in range(num_layers):
                    weights[f"layer.{i}.attention.self.query.weight"] = torch.randn(layer_size)
                    weights[f"layer.{i}.attention.self.key.weight"] = torch.randn(layer_size)
                    weights[f"layer.{i}.attention.self.value.weight"] = torch.randn(layer_size)
                    weights[f"layer.{i}.attention.output.dense.weight"] = torch.randn(layer_size)
                    weights[f"layer.{i}.intermediate.dense.weight"] = torch.randn(layer_size)
                    weights[f"layer.{i}.output.dense.weight"] = torch.randn(layer_size)

                # Add embeddings
                weights["embeddings.word_embeddings.weight"] = torch.randn(layer_size)
                weights["embeddings.position_embeddings.weight"] = torch.randn(512, layer_size)
                weights["embeddings.token_type_embeddings.weight"] = torch.randn(2, layer_size)

                return weights

            def write_func():
                weights = create_hf_weights()
                torch.save(weights, file_path)
                actual_size = os.path.getsize(file_path)
                return {"file_size": actual_size, "num_tensors": len(weights)}

            def read_func():
                weights = torch.load(file_path)
                total_params = sum(param.numel() for param in weights.values())
                return {"loaded_params": total_params, "num_tensors": len(weights)}

            write_stats = self.run_benchmark(
                f"huggingface_{size_name}_write", write_func, file_size=file_size,
                description=f"Write HuggingFace weights ({size_mb}MB)"
            )

            read_stats = self.run_benchmark(
                f"huggingface_{size_name}_read", read_func, file_size=file_size,
                description=f"Read HuggingFace weights ({size_mb}MB)"
            )

            if file_path.exists():
                file_path.unlink()

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    def benchmark_tensorflow_checkpoint(self):
        """Benchmark TensorFlow checkpoint format"""
        if tf is None:
            print("TensorFlow not available, skipping TF checkpoint benchmark")
            return None
     #   physical_devices = tf.config.list_physical_devices('CPU')
     #   if physical_devices:
     #       try:
     #           tf.config.set_logical_device_configuration(
     #               physical_devices[0],
     #               [tf.config.LogicalDeviceConfiguration(memory_limit=5 * 1024)]  # 5GB
     #           )
     #           print("Set TensorFlow memory limit to 5GB")
     #       except RuntimeError as e:
     #           print(f"Could not set memory limit: {e}")
        results = {}
        for size_name, size_mb in [('small', 10), ('large', 100)]:
            checkpoint_dir = self.test_dir / f"tf_checkpoint_{size_name}_{size_mb}mb"
            checkpoint_dir.mkdir(exist_ok=True)
            file_size = size_mb * 1024 * 1024

            def write_func():
                # Create a simple model
                if size_mb == 10:
                    layer_sizes = [2048, 1024, 512, 256, 128, 64]
                else:  # 100MB
                    layer_sizes = [8192, 4096, 2048, 1024, 512, 256]
                layers = [tf.keras.layers.Dense(layer_sizes[0], activation='relu', input_shape=(784,))]
                for size in layer_sizes[1:]:
                    layers.append(tf.keras.layers.Dense(size, activation='relu'))
                layers.append(tf.keras.layers.Dense(10, activation='softmax'))
                model = tf.keras.Sequential(layers)
                checkpoint = tf.train.Checkpoint(model=model)
                checkpoint_path = checkpoint_dir / "model.ckpt"
                checkpoint.write(str(checkpoint_path))
                del model
                tf.keras.backend.clear_session()
            
                total_size = sum(file.stat().st_size for file in checkpoint_dir.glob("*"))
                return {"file_size": total_size, "num_files": len(list(checkpoint_dir.glob("*")))}

            def read_func():
                if size_mb == 10:
                    layer_sizes = [2048, 1024, 512, 256, 128, 64]
                else:
                    layer_sizes = [8192, 4096, 2048, 1024, 512, 256]
            
                layers = [tf.keras.layers.Dense(layer_sizes[0], activation='relu', input_shape=(784,))]
                for size in layer_sizes[1:]:
                    layers.append(tf.keras.layers.Dense(size, activation='relu'))
                layers.append(tf.keras.layers.Dense(10, activation='softmax'))
            
                model = tf.keras.Sequential(layers)
                checkpoint = tf.train.Checkpoint(model=model)
                checkpoint_path = checkpoint_dir / "model.ckpt"
                checkpoint.restore(str(checkpoint_path))

                batch_size = 64
                num_batches = 100
            
                for i in range(num_batches):
                    test_input = tf.random.normal((batch_size, 784))
                    output = model(test_input)
                    _ = tf.reduce_mean(output)
            
                del model
                tf.keras.backend.clear_session()
            
                return {"output_shape": output.shape, "restored": True}
            tf.keras.backend.clear_session()
            import gc
            gc.collect()
            write_stats = self.run_benchmark(
                f"tf_checkpoint_{size_name}_write", write_func, file_size=file_size,
                description=f"Write TF checkpoint ({size_mb}MB)"
            )

            read_stats = self.run_benchmark(
                f"tf_checkpoint_{size_name}_read", read_func, file_size=file_size,
                description=f"Read TF checkpoint ({size_mb}MB)"
            )

            # Cleanup
            if checkpoint_dir.exists():
                shutil.rmtree(checkpoint_dir)

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    # ----------------------------------------------------------------------
    # Dataset Format Benchmarks
    # ----------------------------------------------------------------------

    def benchmark_tfrecord(self):
        """Benchmark TFRecord format for datasets"""
        if tf is None:
            print("TensorFlow not available, skipping TFRecord benchmark")
            return None

        results = {}
        num_samples = self.config['num_samples']
        image_size = self.config['image_size']
        
        for size_name, sample_multiplier in [('small', 1), ('medium', 2), ('large', 4)]:
            actual_samples = num_samples * sample_multiplier
            file_path = self.test_dir / f"tfrecord_{size_name}_{actual_samples}samples.tfrecord"
        
            image_data_size = np.prod(image_size) * 4  # 图像数据大小 (float32)
            sample_size_estimate = image_data_size + 100  # 图像 + 标签 + 元数据
            file_size_bytes = actual_samples * sample_size_estimate

            def create_example(image_data, label, extra_features):
                feature = {
                    'image': tf.train.Feature(
                        bytes_list=tf.train.BytesList(value=[image_data])),
                    'label': tf.train.Feature(
                        int64_list=tf.train.Int64List(value=[label])),
                    'extra_features': tf.train.Feature(
                        float_list=tf.train.FloatList(value=extra_features))
                }
                return tf.train.Example(features=tf.train.Features(feature=feature))

            def write_func():
                with tf.io.TFRecordWriter(str(file_path)) as writer:
                    for i in range(actual_samples):
                    # 创建随机图像数据和额外特征
                        image_data = np.random.rand(*image_size).astype(np.float32).tobytes()
                        label = i % 100
                        extra_features = np.random.randn(10).astype(np.float32).tolist()

                        example = create_example(image_data, label, extra_features)
                        writer.write(example.SerializeToString())

                actual_file_size = os.path.getsize(file_path)
                return {"file_size": actual_file_size, "num_samples": actual_samples}
        
            def read_func():
                def parse_example(example_proto):
                    feature_description = {
                        'image': tf.io.FixedLenFeature([], tf.string),
                        'label': tf.io.FixedLenFeature([], tf.int64),
                        'extra_features': tf.io.FixedLenFeature([10], tf.float32),
                    }
                    return tf.io.parse_single_example(example_proto, feature_description)

            # 创建数据集
                dataset = tf.data.TFRecordDataset(str(file_path))
                dataset = dataset.map(parse_example)

            # 实际读取和处理所有样本
                total_samples = 0
                total_image_size = 0
                label_sum = 0
                feature_sum = 0.0
                for example in dataset:
                    total_samples += 1

                # 实际处理图像数据（触发磁盘读取）
                    image_data = tf.io.decode_raw(example['image'], tf.float32)
                    total_image_size += image_data.shape[0] * 4  # 4 bytes per float32

                # 处理标签和特征数据
                    label_sum += example['label'].numpy()
                    feature_sum += tf.reduce_sum(example['extra_features']).numpy()

            # 验证处理结果（防止编译器优化）
                validation_value = (label_sum + int(feature_sum)) % 1000
                _ = validation_value  # 确保值被使用

                file_size = os.path.getsize(file_path)
                return {
                    "samples_read": total_samples,
                    "file_size": file_size,
                    "total_data_processed": total_image_size,
                    "validation_ok": validation_value >= 0
                }

            write_stats = self.run_benchmark(
                f"tfrecord_{size_name}_write", write_func, file_size=file_size_bytes,
                description=f"Write TFRecord ({actual_samples} samples)"
            )

            if write_stats:
                self.clear_cache()
                # 读取测试
                read_stats = self.run_benchmark(
                    f"tfrecord_{size_name}_read", read_func, file_size=file_size_bytes,
                    description=f"Read TFRecord ({actual_samples} samples)"
                )

            # 清理文件
                if file_path.exists():
                    file_path.unlink()

                if read_stats:
                    results[size_name] = {"write": write_stats, "read": read_stats}
            elif file_path.exists():
                file_path.unlink()
        return results    

    def benchmark_hdf5_dataset(self):
        """Benchmark HDF5 format for datasets"""
        if h5py is None:
            print("h5py not available, skipping HDF5 dataset benchmark")
            return None

        results = {}
        num_samples = self.config['num_samples']
        image_size = self.config['image_size']

        sample_size_estimate = np.prod(image_size) * 4 

        for size_name, sample_multiplier in [('small', 1), ('medium', 2)]:
            actual_samples = num_samples * sample_multiplier
            file_path = self.test_dir / f"hdf5_dataset_{size_name}_{actual_samples}samples.h5"
            file_size_bytes = actual_samples * sample_size_estimate

            def write_func():
                all_images = np.random.rand(actual_samples, *image_size).astype(np.float32)
                all_labels = np.arange(actual_samples) % 10

                with h5py.File(file_path, 'w') as f:
                    images = f.create_dataset(
                        'images',
                        data=all_images,
                        dtype=np.float32,
                        compression='gzip'
                    )
                    labels = f.create_dataset(
                        'labels',
                        data=all_labels,
                        dtype=np.int64
                    )

                actual_file_size = os.path.getsize(file_path)
                return {"file_size": actual_file_size, "num_samples": actual_samples}
            def read_func():
                with h5py.File(file_path, 'r') as f:
                    images = f['images'][:]
                    labels = f['labels'][:]

                total_images = len(images)
                file_size = os.path.getsize(file_path)
                return {"samples_read": total_images, "file_size": file_size}

            write_stats = self.run_benchmark(
                f"hdf5_dataset_{size_name}_write", write_func, file_size=file_size_bytes,
                description=f"Write HDF5 dataset ({actual_samples} samples)"
            )
            self.clear_cache()
            read_stats = self.run_benchmark(
                f"hdf5_dataset_{size_name}_read", read_func, file_size=file_size_bytes,
                description=f"Read HDF5 dataset ({actual_samples} samples)"
            )

            if file_path.exists():
                file_path.unlink()

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    def benchmark_parquet(self):
        """Benchmark Parquet format for datasets"""
        if pq is None or pa is None:
            print("PyArrow not available, skipping Parquet benchmark")
            return None

        results = {}
        num_samples = self.config['num_samples']

        sample_size_estimate = 500

        for size_name, sample_multiplier in [('small', 2), ('medium', 4), ('large', 8)]:
            actual_samples = num_samples * sample_multiplier
            file_path = self.test_dir / f"parquet_{size_name}_{actual_samples}samples.parquet"
            file_size_bytes = actual_samples * sample_size_estimate

            def write_func():
                # Create sample data
                data = {
                    'id': list(range(actual_samples)),
                    'feature1': np.random.randn(actual_samples).astype(np.float32),
                    'feature2': np.random.randn(actual_samples).astype(np.float32),
                    'feature3': np.random.randn(actual_samples).astype(np.float32),
                    'label': np.random.randint(0, 10, actual_samples).astype(np.int64),
                    'timestamp': [time.time()] * actual_samples
                }

                table = pa.Table.from_pydict(data)
                pq.write_table(table, file_path, compression='snappy')

                actual_file_size = os.path.getsize(file_path)
                return {"file_size": actual_file_size, "num_samples": actual_samples}
           
            def read_func():
                parquet_file = pq.ParquetFile(file_path)
                total_rows = 0
                feature_sum = 0.0
            
                for i in range(parquet_file.num_row_groups):
                    table = parquet_file.read_row_group(i)
                    df = table.to_pandas()
                    total_rows += len(df)
                    feature_sum += df['feature1'].sum() + df['feature2'].sum()
            
                _ = feature_sum % 1000
                return {"rows_read": total_rows, "file_size": os.path.getsize(file_path)}

            write_stats = self.run_benchmark(
                f"parquet_{size_name}_write", write_func, file_size=file_size_bytes,
                description=f"Write Parquet ({actual_samples} samples)"
            )
            self.clear_cache()
            read_stats = self.run_benchmark(
                f"parquet_{size_name}_read", read_func, file_size=file_size_bytes,
                description=f"Read Parquet ({actual_samples} samples)"
            )

            if file_path.exists():
                file_path.unlink()

            if write_stats and read_stats:
                results[size_name] = {"write": write_stats, "read": read_stats}

        return results

    def benchmark_comprehensive(self):
        """Run comprehensive benchmarks with multiple file sizes"""
        benchmarks = [
            ("LMDB", self.benchmark_lmdb),
            ("PyTorch Weights", self.benchmark_pytorch_weights),
            ("TensorFlow H5", self.benchmark_tensorflow_h5),
        #    ("ONNX", self.benchmark_onnx),
            ("HuggingFace Bin", self.benchmark_huggingface_bin),
            ("TensorFlow Checkpoint", self.benchmark_tensorflow_checkpoint),
        #    ("TFRecord Dataset", self.benchmark_tfrecord),
            ("HDF5 Dataset", self.benchmark_hdf5_dataset),
            ("Parquet Dataset", self.benchmark_parquet),
        ]

        comprehensive_results = {}

        for name, benchmark_func in benchmarks:
            try:
                print(f"\n{'='*60}")
                print(f"RUNNING COMPREHENSIVE {name.upper()} BENCHMARK")
                print(f"{'='*60}")

                result = benchmark_func()
                if result:
                    comprehensive_results[name.lower().replace(" ", "_")] = result
                    print(f"✓ Completed comprehensive {name} benchmark")
                else:
                    print(f"✗ {name} benchmark returned no results")

            except Exception as e:
                print(f"✗ Error running comprehensive {name} benchmark: {e}")
                import traceback
                traceback.print_exc()
                comprehensive_results[name.lower().replace(" ", "_")] = {"error": str(e)}

        return comprehensive_results

    def generate_report(self):
        """Generate detailed performance report"""
        def default_serializer(obj):
            if isinstance(obj, BenchmarkResult):
                return {
                    'min_time': obj.min_time,
                    'max_time': obj.max_time,
                    'mean_time': obj.mean_time,
                    'std_time': obj.std_time,
                    'throughput_mb_s': obj.throughput_mb_s,
                    'file_size_bytes': obj.file_size_bytes,
                    'operation_count': obj.operation_count,
                    'details': obj.details
                }
            elif isinstance(obj, np.integer):
                return int(obj)
            elif isinstance(obj, np.floating):
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            elif hasattr(obj, '__dict__'):
                return obj.__dict__
            return str(obj)

        report = {
            'version': self.version,
            'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
            'mount_point': str(self.mount_point),
            'config': self.config,
            'environment': {
                'python_version': sys.version,
                'has_torch': torch is not None,
                'has_tensorflow': tf is not None,
                'has_h5py': h5py is not None,
                'has_pyarrow': pq is not None,
                'has_onnx': onnx is not None,
                'has_onnxruntime': ort is not None
            },
            'results': self.results
        }

        summary = {}
        for benchmark_name, benchmark_data in self.results.items():
            if isinstance(benchmark_data, dict) and 'error' not in benchmark_data:
                for size_name, size_data in benchmark_data.items():
                    if isinstance(size_data, dict):
                        for op_type, stats in size_data.items():
                            if hasattr(stats, 'throughput_mb_s') and stats.throughput_mb_s:
                                key = f"{benchmark_name}_{size_name}_{op_type}"
                                summary[key] = {
                                    'throughput_mb_s': stats.throughput_mb_s,
                                    'time_s': stats.mean_time,
                                    'file_size_mb': stats.file_size_bytes / (1024**2) if stats.file_size_bytes else None
                                }

        report['summary'] = summary
        return report

    def save_results(self):
        """Save results to JSON file with comprehensive report"""
        self.results_file.parent.mkdir(parents=True, exist_ok=True)

        report = self.generate_report()

        def default_serializer(obj):
            if isinstance(obj, BenchmarkResult):
                return {
                    'min_time': obj.min_time,
                    'max_time': obj.max_time,
                    'mean_time': obj.mean_time,
                    'std_time': obj.std_time,
                    'throughput_mb_s': obj.throughput_mb_s,
                    'file_size_bytes': obj.file_size_bytes,
                    'operation_count': obj.operation_count,
                    'details': obj.details
                }
            elif isinstance(obj, np.integer):
                return int(obj)
            elif isinstance(obj, np.floating):
                return float(obj)
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            elif hasattr(obj, '__dict__'):
                return obj.__dict__
            return str(obj)

        with open(self.results_file, 'w') as f:
            json.dump(report, f, indent=2, default=default_serializer)

        print(f"\nResults saved to {self.results_file}")

    def print_detailed_summary(self):
        """Print detailed summary of all benchmark results"""
        print(f"\n{'='*80}")
        print(f"COMPREHENSIVE AI FORMAT PERFORMANCE BENCHMARK SUMMARY")
        print(f"Version: {self.version}")
        print(f"Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"{'='*80}")

        for benchmark_name, benchmark_data in self.results.items():
            if isinstance(benchmark_data, dict) and 'error' in benchmark_data:
                print(f"\n{benchmark_name.upper()}: ERROR - {benchmark_data['error']}")
                continue

            print(f"\n{benchmark_name.upper()}:")
            for size_name, size_data in benchmark_data.items():
                print(f"  {size_name.upper()} FILES:")
                for op_type, stats in size_data.items():
                    if hasattr(stats, 'mean_time'):
                        print(f"    {op_type.upper()}:")
                        print(f"      Time:      {stats.mean_time:.3f}s ± {stats.std_time:.3f}s")
                        print(f"      Range:     {stats.min_time:.3f}s - {stats.max_time:.3f}s")

                        if stats.throughput_mb_s:
                            print(f"      Throughput: {stats.throughput_mb_s:.2f} MB/s")

                        if stats.file_size_bytes:
                            size_mb = stats.file_size_bytes / (1024**2)
                            print(f"      Size:      {size_mb:.1f} MB")

                        if stats.details:
                            details = ", ".join([f"{k}: {v}" for k, v in stats.details.items()])
                            print(f"      Details:    {details}")

def main():
    parser = argparse.ArgumentParser(description="Comprehensive AI Format Performance Benchmark")
    parser.add_argument("mount_point", help="Mount point to test")
    parser.add_argument("results_file", help="File to save results JSON")
    parser.add_argument("version", help="Version identifier")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    parser.add_argument("--quick", "-q", action="store_true", help="Quick test (small files only)")
    args = parser.parse_args()

    benchmark = AIFormatBenchmark(args.mount_point, args.results_file, args.version)
    benchmark.verbose = args.verbose

    if args.quick:
        benchmark.config.update({
            'small_file_mb': 10,
            'medium_file_mb': 20,
            'large_file_mb': 50,
            'num_runs': 1,
            'cool_down_time': 0.3,
            'num_samples': 500,
            'lmdb_num_samples': 200,
            'lmdb_num_proc': 1,
            'lmdb_image_size': (32, 32)
        })

    print("Starting Comprehensive AI Format Performance Benchmark...")
    print(f"Configuration: {benchmark.config}")

    # Run comprehensive benchmarks
    benchmark.results = benchmark.benchmark_comprehensive()

    # Save and display results
    benchmark.save_results()
    benchmark.print_detailed_summary()

if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/perf/compare_ai.sh
================================================
#!/bin/bash
# fixed_compare.sh

current_file="$1"
old_file="$2"
TOLERANCE=${TOLERANCE:-0.3}
EXIT_ON_REGRESSION=${EXIT_ON_REGRESSION:-true}

echo "===================================================================="
echo "Fixed Performance Comparison Summary ($(echo "$TOLERANCE * 100" | bc)% tolerance):"
echo "===================================================================="
echo "Current: $current_file"
echo "Old:     $old_file"
echo "===================================================================="

regression_detected=false

keys=$(jq -r '.summary | keys[]' "$current_file")

declare -A categories
categories["lmdb"]="Lmdb"
categories["pytorch_weights"]="PyTorch Weights"
categories["tensorflow_h5"]="TensorFlow H5"
categories["huggingface"]="HuggingFace Bin"
categories["tensorflow_checkpoint"]="TensorFlow Checkpoint"
categories["tfrecord"]="TFRecord Dataset"
categories["hdf5_dataset"]="HDF5 Dataset"
categories["parquet"]="Parquet Dataset"

echo "Available keys in results:"
echo "$keys"
echo ""

for category_pattern in "${!categories[@]}"; do
    category_name="${categories[$category_pattern]}"
    echo "=== $category_name ==="
    category_keys=$(echo "$keys" | grep "^${category_pattern}_")
    if [ -z "$category_keys" ]; then
        echo "  No tests found for this category (pattern: $category_pattern)"
        echo ""
        continue
    fi
    
    while read -r key; do
        current_throughput=$(jq -r ".summary.\"$key\".throughput_mb_s" "$current_file")
        old_throughput=$(jq -r ".summary.\"$key\".throughput_mb_s" "$old_file")

        if [ "$current_throughput" = "null" ] || [ "$old_throughput" = "null" ] || [ "$current_throughput" = "" ] || [ "$old_throughput" = "" ]; then
            continue
        fi

        diff=$(echo "scale=1; $current_throughput - $old_throughput" | bc)
        diff_pct=$(echo "scale=1; ($diff / $old_throughput) * 100" | bc)
        abs_diff_pct=$(echo $diff_pct | awk '{if ($1<0) print -$1; else print $1}')
        current_formatted=$(printf "%.1f" "$current_throughput")
        old_formatted=$(printf "%.1f" "$old_throughput")
        diff_pct_formatted=$(printf "%.1f" "$diff_pct")

        status="✓ OK"
        if (( $(echo "$abs_diff_pct > $TOLERANCE * 100" | bc -l) )); then
            if (( $(echo "$current_throughput < $old_throughput" | bc -l) )); then
                status="❌ Worse"
                regression_detected=true
            else
                status="✅ Better"
            fi
        fi

        test_size=$(echo "$key" | awk -F_ '{print $(NF-1)}')
        test_operation=$(echo "$key" | awk -F_ '{print $NF}')
        
        echo "  ${test_size}_${test_operation}:"
        echo "    Current: $current_formatted MB/s"
        echo "    Old:     $old_formatted MB/s"
        echo "    Diff:    $diff_pct_formatted%"
        echo "    Status:  $status"
        echo ""

    done <<< "$category_keys"
done

echo "===================================================================="
echo "Summary:"
if [ "$regression_detected" = true ]; then
    echo "❌ PERFORMANCE REGRESSION DETECTED!"
    if [ "$EXIT_ON_REGRESSION" = true ]; then
        exit 1
    else
        exit 0
    fi
else
    echo "✅ No performance regression detected."
    exit 0
fi


================================================
FILE: .github/scripts/perf/compare_mdtest_fio.sh
================================================
#!/bin/bash
set -e

CURRENT_RESULTS=$1
OLD_RESULTS=$2
FILTER_OPS=("File read" "File stat" "File removal" "Tree removal" "Tree creation")

# Function to extract files/s from built-in mdtest output
extract_files_per_sec() {
    local mdtest_output=$1
    local files_per_sec=$(grep -oP 'Created .*files.*\(\K[0-9]+(\.[0-9]+)?(?= files/s\))' <<< "$mdtest_output" | head -1)
    if [[ -z "$files_per_sec" ]]; then
        files_per_sec=$(grep -oP '\(\K[0-9]+(\.[0-9]+)?(?= files/s\))' <<< "$mdtest_output" | head -1)
    fi
    if [[ -z "$files_per_sec" ]]; then
        echo "0"
    else
        echo "$files_per_sec"
    fi
}

# Function to extract IOPS from fio output
extract_iops() {
    local fio_output=$1
    local iops=$(grep -oP 'IOPS=\K[\d.]+[kMG]?' <<< "$fio_output" | head -1)
    # Convert to numeric value (handle k/M/G suffixes)
    if [[ "$iops" == *k ]]; then
        echo "${iops%k} * 1000" | bc -l
    elif [[ "$iops" == *M ]]; then
        echo "${iops%M} * 1000000" | bc -l
    elif [[ "$iops" == *G ]]; then
        echo "${iops%G} * 1000000000" | bc -l
    else
        echo "$iops"
    fi
}

extract_bw() {
    local fio_output=$1
    local bw=$(grep -oP 'BW=\K[^, ]+' <<< "$fio_output" | head -1)
    if [[ -z "$bw" ]]; then
        echo "N/A"
    else
        echo "$bw"
    fi
}

extract_metrics() {
    awk '{
        op_description=$1;
        op_type=$2;
        for(i=3;i<=NF;i++) if($i == ":") break;
        max=$(i+1); min=$(i+2); mean=$(i+3); stddev=$(i+4);
        print op_description, op_type, max, min, mean, stddev
    }' <<< "$1"
}

is_op_in_filter() {
    local op="$1"
    for allowed_op in "${FILTER_OPS[@]}"; do
        if [[ "$op" == "$allowed_op" ]]; then
            return 0
        fi
    done
    return 1
}

compare_with_tolerance() {
    local current=$1
    local old=$2
    local op_type=$3
    local direction=${4:-higher}
    tolerance=$(echo "$old * 0.2" | bc -l)
    lower_bound=$(echo "$old - $tolerance" | bc -l)
    upper_bound=$(echo "$old + $tolerance" | bc -l)

    # For time comparison, lower is better
    if is_op_in_filter "$op_type"; then
        echo "skip"
    elif (( $(echo "$current <= $upper_bound && $current >= $lower_bound" | bc -l) )); then
        echo "same"
    else
        if [[ "$direction" == "lower" ]]; then
            if (( $(echo "$current < $old" | bc -l) )); then
                echo "better"
            else
                echo "worse"
            fi
        else
            if (( $(echo "$current > $old" | bc -l) )); then
                echo "better"
            else
                echo "worse"
            fi
        fi
    fi
}

compare_scenario() {
    local scenario=$1
    local current_file="${CURRENT_RESULTS}.${scenario}.summary"
    local old_file="${OLD_RESULTS}.${scenario}.summary"

    echo ""
    echo "===================================================================="
    echo "Detailed Comparison for $scenario (with 20% tolerance)"
    case "$scenario" in
        "scenario1")
            echo "Command is : mpirun --use-hwthread-cpus --allow-run-as-root -np 4 mdtest -b 3 -z 1 -I 300"
            ;;
        "scenario2")
            echo "Command is : mpirun --use-hwthread-cpus --allow-run-as-root -np 4 mdtest -F -w 102400 -I 3000 -z 0"
            ;;
        "scenario3")
            echo "Command is : ./juicefs mdtest <meta-url> /mdtest_perf --threads 10 --dirs 3 --depth 3 --files 100"
            ;;
        "fio_scenario4")
            echo "Command is : fio --name=big-write --directory=/mnt/fio --group_reporting --rw=write --direct=1 --bs=64k --end_fsync=1 --numjobs=8 --nrfiles=1 --size=2G --runtime=120"
            ;;
        "fio_scenario5")
            echo "Command is : fio --name=big-write  --group_reporting --rw=randwrite --direct=1 --bs=64k --end_fsync=1 --runtime=200 --numjobs=8 --nrfiles=1 --size=2G"
            ;;
        "fio_scenario6")
            echo "Command is : fio --name=big-read-multiple  --group_reporting --runtime=300 --rw=read --direct=1 --bs=4k --numjobs=8 --nrfiles=1 --size=2G"
            ;;
        "fio_scenario7")
            echo "Command is : fio --name=big-read-multiple-concurrent  --group_reporting --rw=randread --direct=1 --bs=4k --numjobs=8 --nrfiles=1 --openfiles=1 --size=2G --output-format=normal --runtime=120"
            ;;
        "fio_scenario8")
            echo "fio --name=big-write --directory="$MNT_POINT/fio" --group_reporting \
    --rw=write --direct=1 --bs=1m --end_fsync=1 --runtime=120 \
    --numjobs=8 --nrfiles=8 --size=2G"
            ;;
        "fio_scenario9")
            echo "Command is : fio --name=big-read-multiple-concurrent --directory="$MNT_POINT/fio" --group_reporting \
    --rw=read --direct=1 --bs=1m --numjobs=8 --nrfiles=8 --openfiles=1 --size=2G --output-format=normal --runtime=120"
            ;;
    esac
    echo "===================================================================="

    # Handle built-in mdtest scenario (scenario3)
    if [[ "$scenario" == "scenario3" ]]; then
        printf "%-30s %-12s %-12s %-12s %-12s %-12s\n" "Operation" "Current files/s" "Old files/s" "Diff" "Status" "Variance"
        echo "--------------------------------------------------------------------"

        current_files_per_sec=$(extract_files_per_sec "$(cat "${current_file}")")
        old_files_per_sec=$(extract_files_per_sec "$(cat "${old_file}")")

        diff=$(echo "$current_files_per_sec - $old_files_per_sec" | bc -l)
        if (( $(echo "$old_files_per_sec == 0" | bc -l) )); then
            variance="N/A"
            comparison="same"
        else
            variance=$(echo "scale=2; ($current_files_per_sec - $old_files_per_sec)*100/$old_files_per_sec" | bc -l)
            comparison=$(compare_with_tolerance $current_files_per_sec $old_files_per_sec "builtin_mdtest")
        fi

        case $comparison in
            "worse") status="❌ Worse" ;;
            "better") status="✅ Better" ;;
            "same") status="⚖️ Same" ;;
            "skip") status="⏭️ Skipped" ;;
            *) status="⚠️ Unknown" ;;
        esac

         if [[ "$variance" == "N/A" ]]; then
             printf "%-30s %-12.2f %-12.2f %-12.2f %-12s %-12s\n" \
                 "Built-in mdtest" "$current_files_per_sec" "$old_files_per_sec" "$diff" "$status" "$variance"
         else
             printf "%-30s %-12.2f %-12.2f %-12.2f %-12s %-12s%%\n" \
                 "Built-in mdtest" "$current_files_per_sec" "$old_files_per_sec" "$diff" "$status" "$variance"
         fi
    
    # Handle fio scenarios
    elif [[ "$scenario" =~ ^fio ]]; then
        printf "%-30s %-12s %-12s %-12s %-12s %-12s\n" "Operation" "Current IOPS" "Old IOPS" "Diff" "Status" "Variance"
        echo "--------------------------------------------------------------------"

        current_iops=$(extract_iops "$(cat "${current_file}")")
        old_iops=$(extract_iops "$(cat "${old_file}")")
        current_bw=$(extract_bw "$(cat "${current_file}")")
        old_bw=$(extract_bw "$(cat "${old_file}")")

        diff=$(echo "$current_iops - $old_iops" | bc -l)
        variance=$(echo "scale=2; ($current_iops - $old_iops)*100/$old_iops" | bc -l)
        comparison=$(compare_with_tolerance $current_iops $old_iops "fio_${scenario}")

        case $comparison in
            "worse") status="❌ Worse" ;;
            "better") status="✅ Better" ;;
            "same") status="⚖️ Same" ;;
            "skip") status="⏭️ Skipped" ;;
            *) status="⚠️ Unknown" ;;
        esac

        printf "%-30s %-12.2f %-12.2f %-12.2f %-12s %-12s%%\n" \
               "FIO ${scenario}" "$current_iops" "$old_iops" "$diff" "$status" "$variance"
        printf "%-30s %-12s %-12s\n" "Bandwidth" "$current_bw" "$old_bw"

    # Handle mdtest scenarios
    else
        printf "%-30s %-12s %-12s %-12s %-12s %-12s\n" "Operation" "Current Max" "Old Max" "Diff" "Status" "Variance"
        echo "--------------------------------------------------------------------"

        while IFS= read -r current_line && IFS= read -r old_line <&3; do
            if [ -z "$current_line" ] || [ -z "$old_line" ]; then
                continue
            fi

            current_metrics=($(extract_metrics "$current_line"))
            old_metrics=($(extract_metrics "$old_line"))

            current_op="${current_metrics[0]} ${current_metrics[1]}"
            old_op="${old_metrics[0]} ${old_metrics[1]}"

            if [ "$current_op" != "$old_op" ]; then
                echo "Warning: Operation mismatch ('$current_op' vs '$old_op'), skipping..."
                continue
            fi

            current_max=${current_metrics[2]}
            old_max=${old_metrics[2]}

            if [[ "$current_max" =~ ^[0-9.]+$ ]] && [[ "$old_max" =~ ^[0-9.]+$ ]]; then
                diff=$(echo "$current_max - $old_max" | bc -l)
                variance=$(echo "scale=2; ($current_max - $old_max)*100/$old_max" | bc -l)
                comparison=$(compare_with_tolerance $current_max $old_max "$current_op")

                case $comparison in
                    "worse") status="❌ Worse" ;;
                    "better") status="✅ Better" ;;
                    "same") status="⚖️ Same" ;;
                    "skip") status="⏭️ Skipped" ;;
                    *) status="⚠️ Unknown" ;;
                esac

                printf "%-30s %-12.2f %-12.2f %-12.2f %-12s %-12s%%\n" \
                       "$current_op" "$current_max" "$old_max" "$diff" "$status" "$variance"
            else
                printf "%-30s %-12s %-12s %-12s %-12s %-12s\n" \
                       "$current_op" "N/A" "N/A" "N/A" "⚠️ Invalid" "N/A"
            fi
        done < "$current_file" 3< "$old_file"
    fi
}

# Check if any scenario has "worse" results
check_regression() {
    local scenario=$1
    local current_file="${CURRENT_RESULTS}.${scenario}.summary"
    local old_file="${OLD_RESULTS}.${scenario}.summary"
    local regression_detected=0

    # Handle built-in mdtest scenario (scenario3)
    if [[ "$scenario" == "scenario3" ]]; then
        current_files_per_sec=$(extract_files_per_sec "$(cat "${current_file}")")
        old_files_per_sec=$(extract_files_per_sec "$(cat "${old_file}")")
        if (( $(echo "$old_files_per_sec == 0" | bc -l) )); then
            comparison="same"
        else
            comparison=$(compare_with_tolerance $current_files_per_sec $old_files_per_sec "builtin_mdtest")
        fi

        if [ "$comparison" == "worse" ]; then
            variance=$(echo "scale=2; ($current_files_per_sec - $old_files_per_sec)*100/$old_files_per_sec" | bc -l)
            echo "Regression detected in $scenario for built-in mdtest (files/s): Current $current_files_per_sec vs Old $old_files_per_sec (Variance: ${variance}%)"
            regression_detected=1
        fi
    
    # Handle fio scenarios
    elif [[ "$scenario" =~ ^fio ]]; then
        current_iops=$(extract_iops "$(cat "${current_file}")")
        old_iops=$(extract_iops "$(cat "${old_file}")")
        comparison=$(compare_with_tolerance $current_iops $old_iops "fio_${scenario}")

        if [ "$comparison" == "worse" ]; then
            variance=$(echo "scale=2; ($current_iops - $old_iops)*100/$old_iops" | bc -l)
            echo "Regression detected in $scenario: Current $current_iops IOPS vs Old $old_iops IOPS (Variance: ${variance}%)"
            regression_detected=1
        fi

    # Handle mdtest scenarios
    else
        while IFS= read -r current_line && IFS= read -r old_line <&3; do
            # Skip empty lines
            if [ -z "$current_line" ] || [ -z "$old_line" ]; then
                continue
            fi

            current_metrics=($(extract_metrics "$current_line"))
            old_metrics=($(extract_metrics "$old_line"))

            current_op="${current_metrics[0]} ${current_metrics[1]}"
            old_op="${old_metrics[0]} ${old_metrics[1]}"

            if [ "$current_op" != "$old_op" ]; then
                continue
            fi

            current_max=${current_metrics[2]}
            old_max=${old_metrics[2]}

            if [[ "$current_max" =~ ^[0-9.]+$ ]] && [[ "$old_max" =~ ^[0-9.]+$ ]]; then
                comparison=$(compare_with_tolerance $current_max $old_max "$current_op")
                if [ "$comparison" == "worse" ]; then
                    variance=$(echo "scale=2; ($current_max - $old_max)*100/$old_max" | bc -l)
                    echo "Regression detected in $scenario for $current_op: Current $current_max vs Old $old_max (Variance: ${variance}%)"
                    regression_detected=1
                fi
            fi
        done < "$current_file" 3< "$old_file"
    fi

    return $regression_detected
}

echo ""
echo "===================================================================="
echo "Performance Comparison Summary (with 20% tolerance)"
echo "===================================================================="

compare_scenario "scenario1"
compare_scenario "scenario2"
compare_scenario "scenario3"
compare_scenario "fio_scenario4"
compare_scenario "fio_scenario5"
compare_scenario "fio_scenario6"
compare_scenario "fio_scenario7"
compare_scenario "fio_scenario8"
compare_scenario "fio_scenario9"

echo ""
echo "===================================================================="
echo "Regression Check Summary (with 20% tolerance)"
echo "===================================================================="

regression_found=0
if ! check_regression "scenario1"; then
    regression_found=1
fi
if ! check_regression "scenario2"; then
    regression_found=1
fi
if ! check_regression "scenario3"; then
    regression_found=1
fi
if ! check_regression "fio_scenario4"; then
    regression_found=1
fi
if ! check_regression "fio_scenario5"; then
    regression_found=1
fi
if ! check_regression "fio_scenario6"; then
    regression_found=1
fi
if ! check_regression "fio_scenario7"; then
    regression_found=1
fi
if ! check_regression "fio_scenario8"; then
    regression_found=1
fi
if ! check_regression "fio_scenario9"; then
    regression_found=1
fi

if [ $regression_found -eq 1 ]; then
    echo ""
    echo "ERROR: Performance regression detected compared to old version!"
    exit 1
else
    echo ""
    echo "SUCCESS: No performance regression detected."
    exit 0
fi


================================================
FILE: .github/scripts/perf/mdtest_fio.sh
================================================
#!/bin/bash
set -e

MNT_POINT=$1
RESULTS_FILE=$2
VERSION=$3
META_URL=$4

if [[ -z "$META_URL" ]]; then
    echo "ERROR: META_URL is required as 4th argument for built-in mdtest scenario"
    exit 1
fi

mkdir -p "$(dirname "$RESULTS_FILE")"

process_run() {
    local output=$1
    local scenario=$2
    local attempt=$3

    # For built-in mdtest (scenario3) and fio tests, we just capture the output
    if [[ "$scenario" == "scenario3" || "$scenario" =~ ^fio ]]; then
        cp "$output" "${output}.summary"
        return
    fi

    grep -A 100 "SUMMARY rate:" "$output" | \
    grep -v "SUMMARY rate:" | \
    grep -v "\-\-\-" | \
    grep -v "Command line used:" | \
    grep -v "Path:" | \
    grep -v "FS:" | \
    grep -v "Nodemap:" | \
    grep -v "tasks," | \
    awk 'NF' > "${output}.tmp"

    # Convert to CSV format for easier processing
    awk '{
        # Skip lines that don'\''t contain operation metrics
        if ($0 ~ /:/ && $0 !~ /^-+$/) {
            op="";
            for(i=1;i<=NF;i++) {
                if ($i == ":") {
                    # Join all words before ":" as operation name
                    for(j=1;j<i;j++) op=op (j>1?" ":"") $j;
                    # Extract metrics
                    max=$(i+1); min=$(i+2); mean=$(i+3); stddev=$(i+4);
                    print op "," max "," min "," mean "," stddev;
                    break;
                }
            }
        }
    }' "${output}.tmp" > "${output}.csv"

    rm -f "${output}.tmp"
}

calculate_averages() {
    local scenario=$1
    local runs=$2

    # Skip averaging for built-in mdtest (scenario3) and fio tests
    if [[ "$scenario" == "scenario3" || "$scenario" =~ ^fio ]]; then
        return
    fi

    declare -A ops max_sum min_sum mean_sum stddev_sum count
    declare -a op_order  # To maintain operation order

    for ((i=1; i<=runs; i++)); do
        while IFS=, read -r op max min mean stddev; do
            # Skip empty lines
            [ -z "$op" ] && continue

            # Add to op_order if not already present
            if [[ -z "${ops[$op]}" ]]; then
                ops["$op"]=1
                op_order+=("$op")
            fi

            max=$(echo "$max" | tr -d ',')
            min=$(echo "$min" | tr -d ',')
            mean=$(echo "$mean" | tr -d ',')
            stddev=$(echo "$stddev" | tr -d ',')

            max_sum["$op"]=$(echo "${max_sum[$op]:-0} + $max" | bc -l)
            min_sum["$op"]=$(echo "${min_sum[$op]:-0} + $min" | bc -l)
            mean_sum["$op"]=$(echo "${mean_sum[$op]:-0} + $mean" | bc -l)
            stddev_sum["$op"]=$(echo "${stddev_sum[$op]:-0} + $stddev" | bc -l)
            count["$op"]=$(( ${count[$op]:-0} + 1 ))
        done < "${RESULTS_FILE}.${scenario}.run${i}.csv"
    done

    > "${RESULTS_FILE}.${scenario}.summary"  # Clear the file
    for op in "${op_order[@]}"; do
        cnt=${count[$op]:-1}  # Avoid division by zero
        avg_max=$(echo "scale=2; ${max_sum[$op]:-0} / $cnt" | bc -l)
        avg_min=$(echo "scale=2; ${min_sum[$op]:-0} / $cnt" | bc -l)
        avg_mean=$(echo "scale=2; ${mean_sum[$op]:-0} / $cnt" | bc -l)
        avg_stddev=$(echo "scale=2; ${stddev_sum[$op]:-0} / $cnt" | bc -l)

        printf "%-25s : %12.2f %12.2f %12.2f %12.2f\n" \
               "$op" "$avg_max" "$avg_min" "$avg_mean" "$avg_stddev" \
               >> "${RESULTS_FILE}.${scenario}.summary"
    done
}

# Scenario 1: -b 3 -z 1 -I 1000
for i in {1..3}; do
    echo "Running scenario 1 (attempt $i)..."
    output_file="${RESULTS_FILE}.scenario1.run${i}"
    echo 3 | sudo tee /proc/sys/vm/drop_caches
    mpirun --use-hwthread-cpus --allow-run-as-root -np 4 mdtest -b 3 -z 1 -I 300 -d "$MNT_POINT/mdtest" | tee "$output_file"
    process_run "$output_file" "scenario1" $i
    rm -rf "$MNT_POINT/mdtest"/*
done

# Scenario 2: -F -w 102400 -I 1000 -z 0
for i in {1..3}; do
    echo "Running scenario 2 (attempt $i)..."
    output_file="${RESULTS_FILE}.scenario2.run${i}"
    echo 3 | sudo tee /proc/sys/vm/drop_caches
    mpirun --use-hwthread-cpus --allow-run-as-root -np 4 mdtest -F -w 102400 -I 2000 -z 0 -d "$MNT_POINT/mdtest" | tee "$output_file"
    process_run "$output_file" "scenario2" $i
    rm -rf "$MNT_POINT/mdtest"/*
done

# Scenario 3: JuiceFS built-in mdtest (run only once)
echo "Running scenario 3 (built-in mdtest)..."
output_file="${RESULTS_FILE}.scenario3.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
{ time sudo ./juicefs mdtest "$META_URL" /mdtest_perf --threads 10 --dirs 3 --depth 3 --files 100; } 2>&1 | tee "$output_file"
process_run "$output_file" "scenario3" 1

# Fio Scenario 4: Concurrent sequential write of 1 big file per thread (16 threads)
echo "Running fio scenario 4..."
output_file="${RESULTS_FILE}.fio_scenario4.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
mkdir -p "$MNT_POINT/fio"

fio --name=big-write --filename="${MNT_POINT}/fio/fio_test_$(date +%Y%m%d_%H%M%S).dat" --group_reporting \
    --rw=write --direct=1 --bs=64k --end_fsync=1 --runtime=200 \
    --numjobs=8 --nrfiles=1 --size=1G --output-format=normal | tee "$output_file"
process_run "$output_file" "fio_scenario4" 1
rm -rf "$MNT_POINT/fio"/*

# Fio Scenario 5: Concurrent sequential write of multiple big files (16 threads, 64 files each)
echo "Running fio scenario 5..."
output_file="${RESULTS_FILE}.fio_scenario5.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
mkdir -p "$MNT_POINT/fio"
fio --name=big-write --filename="${MNT_POINT}/fio/fio_test_$(date +%Y%m%d_%H%M%S).dat" --group_reporting \
    --rw=randwrite --direct=1 --bs=64k --end_fsync=1 --runtime=200 \
    --numjobs=8 --nrfiles=1 --size=1G --output-format=normal | tee "$output_file"
process_run "$output_file" "fio_scenario5" 1
rm -rf "$MNT_POINT/fio"/*

# Fio Scenario 6: Sequential read of multiple big files (single thread)
echo "Running fio scenario 6..."
output_file="${RESULTS_FILE}.fio_scenario6.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
mkdir -p "$MNT_POINT/fio"
fio --name=big-read-multiple --filename="${MNT_POINT}/fio/fio_test_$(date +%Y%m%d_%H%M%S).dat" --group_reporting --runtime=300 \
    --rw=read --direct=1 --bs=4k --numjobs=8 --nrfiles=1 --size=1G --output-format=normal | tee "$output_file"
process_run "$output_file" "fio_scenario6" 1
rm -rf "$MNT_POINT/fio"/*

# Fio Scenario 7: Concurrent sequential read of multiple big files (64 threads)
echo "Running fio scenario 7..."
output_file="${RESULTS_FILE}.fio_scenario7.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
mkdir -p "$MNT_POINT/fio"
fio --name=big-read-multiple-concurrent --filename="${MNT_POINT}/fio/fio_test_$(date +%Y%m%d_%H%M%S).dat" --group_reporting \
    --rw=randread --direct=1 --bs=4k --numjobs=8 --nrfiles=1 --openfiles=1 --size=1G --output-format=normal --runtime=120 | tee "$output_file"
process_run "$output_file" "fio_scenario7" 1
rm -rf "$MNT_POINT/fio"/*

# Fio Scenario 8: Concurrent sequential write of multiple big files (8 threads, 8 files each)
echo "Running fio scenario 8..."
output_file="${RESULTS_FILE}.fio_scenario8.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
mkdir -p "$MNT_POINT/fio"
fio --name=big-write --directory="$MNT_POINT/fio" --group_reporting \
    --rw=write --direct=1 --bs=1m --end_fsync=1 --runtime=120 \
    --numjobs=8 --nrfiles=8 --size=1G --output-format=normal | tee "$output_file"
process_run "$output_file" "fio_scenario8" 1
rm -rf "$MNT_POINT/fio"/*


# Fio Scenario 9: Concurrent sequential read of multiple big files (8 threads)
echo "Running fio scenario 9..."
output_file="${RESULTS_FILE}.fio_scenario9.run1"
echo 3 | sudo tee /proc/sys/vm/drop_caches
mkdir -p "$MNT_POINT/fio"
fio --name=big-read-multiple-concurrent --directory="$MNT_POINT/fio" --group_reporting \
    --rw=read --direct=1 --bs=1m --numjobs=8 --nrfiles=8 --openfiles=1 --size=1G --output-format=normal --runtime=120 | tee "$output_file"
process_run "$output_file" "fio_scenario9" 1
rm -rf "$MNT_POINT/fio"/*


# Calculate averages for scenario1 and scenario2
calculate_averages "scenario1" 3
calculate_averages "scenario2" 3

# For scenario3 and fio scenarios, just rename the single run file to .summary
mv "${RESULTS_FILE}.scenario3.run1.summary" "${RESULTS_FILE}.scenario3.summary"
for scenario in fio_scenario4 fio_scenario5 fio_scenario6 fio_scenario7 fio_scenario8 fio_scenario9; do
    mv "${RESULTS_FILE}.${scenario}.run1.summary" "${RESULTS_FILE}.${scenario}.summary"
done

rm -f "${RESULTS_FILE}"*.run*.csv "${RESULTS_FILE}"*.run[1-3]

# Print summary results
echo ""
echo "Summary Results for $VERSION:"
for scenario in scenario1 scenario2; do
    echo ""
    echo "$scenario Results:"
    printf "%-25s %-12s %-12s %-12s %-12s\n" "Operation" "Max" "Min" "Mean" "Std Dev"
    cat "${RESULTS_FILE}.${scenario}.summary"
done

# Print built-in mdtest results
echo ""
echo "Scenario3 (Built-in mdtest) Results:"
cat "${RESULTS_FILE}.scenario3.summary"

# Print fio results
for scenario in fio_scenario4 fio_scenario5 fio_scenario6 fio_scenario7 fio_scenario8 fio_scenario9; do
    echo ""
    echo "${scenario} Results:"
    cat "${RESULTS_FILE}.${scenario}.summary"
done


================================================
FILE: .github/scripts/prepare_db.sh
================================================
#!/bin/bash -e
source .github/scripts/start_meta_engine.sh
[ -z "$TEST" ] && echo "TEST is not set" && exit 1

# check port is ready until 60s, sleep 1s for each query
check_port() {
    port=$1
    echo "check for port:" $port
    for i in {1..30}; do
        sudo lsof -i :$port && echo "port is available: $port after $i sec" && return 0 ||
            (echo "port is not available after $i" && sleep 1)
    done
    echo "service not ready on: $port" && exit 1
}

install_mysql() {
    sudo service mysql start
    sudo mysql -uroot -proot -e "use mysql;alter user 'root'@'localhost' identified with mysql_native_password by '';"
    sudo mysql -e "create database dev;"
    sudo mysql -e "create database dev2;"
    check_port 3306
}

install_postgres() {
    sudo service postgresql start
    sudo chmod 777 /etc/postgresql/*/main/pg_hba.conf
    sudo sed -i "s?local.*all.*postgres.*peer?local   all             postgres                                trust?" /etc/postgresql/*/main/pg_hba.conf
    sudo sed -i "s?host.*all.*all.*32.*scram-sha-256?host    all             all             127.0.0.1/32            trust?" /etc/postgresql/*/main/pg_hba.conf
    sudo sed -i "s?host.*all.*all.*128.*scram-sha-256?host    all             all             ::1/128                 trust?" /etc/postgresql/*/main/pg_hba.conf
    cat /etc/postgresql/*/main/pg_hba.conf
    sudo service postgresql restart
    psql -c "create user runner superuser;" -U postgres
    sudo service postgresql restart
    psql -c 'create database test;' -U postgres
}

install_etcd() {
    docker run -d \
        -p 3379:2379 \
        -p 3380:2380 \
        --name etcd_3_5_7 \
        quay.io/coreos/etcd:v3.5.7 \
        /usr/local/bin/etcd --data-dir=/etcd-data --name node1 \
        --listen-client-urls http://0.0.0.0:2379 \
        --advertise-client-urls http://0.0.0.0:3379 \
        --listen-peer-urls http://0.0.0.0:2380 \
        --initial-advertise-peer-urls http://0.0.0.0:2380 \
        --initial-cluster node1=http://0.0.0.0:2380
    check_port 3379
    check_port 3380
}

install_keydb() {
    echo "deb https://download.keydb.dev/open-source-dist $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/keydb.list
    sudo wget -O /etc/apt/trusted.gpg.d/keydb.gpg https://download.keydb.dev/open-source-dist/keyring.gpg
    sudo .github/scripts/apt_install.sh keydb
    keydb-server --storage-provider flash /tmp/ --port 6378 --bind 127.0.0.1 --daemonize yes
    keydb-server --port 6377 --bind 127.0.0.1 --daemonize yes
    check_port 6377
    check_port 6378
}

install_minio() {
    docker run -d -p 9000:9000 -p 9001:9001 -e "MINIO_ROOT_USER=testUser" -e "MINIO_ROOT_PASSWORD=testUserPassword" quay.io/minio/minio:RELEASE.2022-01-25T19-56-04Z server /data --console-address ":9001"
    go install github.com/minio/mc@RELEASE.2022-01-07T06-01-38Z && mc alias set local http://127.0.0.1:9000 testUser testUserPassword && mc mb local/testbucket
}

install_fdb() {
    wget -O /home/travis/.m2/foundationdb-clients_6.3.23-1_amd64.deb https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-clients_6.3.23-1_amd64.deb
    wget -O /home/travis/.m2/foundationdb-server_6.3.23-1_amd64.deb https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-server_6.3.23-1_amd64.deb
    sudo dpkg -i /home/travis/.m2/foundationdb-clients_6.3.23-1_amd64.deb /home/travis/.m2/foundationdb-server_6.3.23-1_amd64.deb
    check_port 4500
}

install_gluster() {
    sudo systemctl start glusterd.service
    mkdir -p /tmp/gluster/gv0
    sudo hostname jfstest
    sudo gluster volume create gv0 jfstest:/tmp/gluster/gv0 force
    sudo gluster volume start gv0
    sudo gluster volume info gv0
}

install_litmus() {
    wget -O /home/travis/.m2/litmus-0.13.tar.gz http://www.webdav.org/neon/litmus/litmus-0.13.tar.gz
    tar -zxvf /home/travis/.m2/litmus-0.13.tar.gz -C /home/travis/.m2/
    cd /home/travis/.m2/litmus-0.13/ && ./configure && make && cd -
}

install_webdav() {
    wget -O /home/travis/.m2/rclone-v1.57.0-linux-amd64.zip --no-check-certificate https://downloads.rclone.org/v1.57.0/rclone-v1.57.0-linux-amd64.zip
    unzip /home/travis/.m2/rclone-v1.57.0-linux-amd64.zip -d /home/travis/.m2/
    nohup /home/travis/.m2/rclone-v1.57.0-linux-amd64/rclone serve webdav local --addr 127.0.0.1:9007 >>rclone.log 2>&1 &
}

prepare_db() {
    case "$TEST" in
    "test.meta.core")
        retry install_tikv
        install_mysql
        ;;
    "test.meta.non-core")
        install_postgres
        install_etcd
        install_keydb
        ;;
    "test.cmd")
        install_minio
        install_litmus
        ;;
    "test.fdb")
        install_fdb
        ;;
    "test.pkg")
        install_mysql
        retry install_tikv
        install_minio
        install_gluster
        install_webdav
        docker run -d --name sftp -p 2222:22 juicedata/ci-sftp
        docker run -d --name samba -p 4445:445 -e "USER=samba" -e "PASS=secret" dockurr/samba
        install_etcd
        .github/scripts/setup-hdfs.sh
        ;;
    *)
        echo "Test: $TEST is not valid" && exit 1
        ;;
    esac
}

prepare_db


================================================
FILE: .github/scripts/pysdk/bench.py
================================================
import os
import random
import sys
import time
import argparse
import threading
import hashlib

sys.path.append('.')
from sdk.python.juicefs.juicefs import juicefs

def print_stats(stats, interval):
    while not stats['stop']:
        time.sleep(interval)
        elapsed_time = time.time() - stats['start_time']
        iops = stats['ops'] / elapsed_time
        print(f"IOPS: {iops:.2f}")

def seq_write(filename, client: juicefs.Client, protocol, block_size, buffering, run_time, file_size):
    stats = {'bytes': 0, 'ops': 0, 'start_time': time.time(), 'stop': False}
    stats_thread = threading.Thread(target=print_stats, args=(stats, 2))
    stats_thread.start()

    def perform_seq_writes(f):
        while time.time() - stats['start_time'] < run_time and stats['bytes'] < file_size:
            data = os.urandom(block_size)  
            f.write(data)
            stats['bytes'] += block_size
            stats['ops'] += 1

    try:
        if protocol == 'pysdk':
            with client.open(filename, 'wb', buffering=buffering) as f:
                perform_seq_writes(f)
        else:
            with open(f'/tmp/jfs/{filename}', 'wb') as f:
                perform_seq_writes(f)
    finally:
        stats['stop'] = True
        stats_thread.join()

def random_write(filename, client: juicefs.Client, protocol, buffering, block_size, run_time, file_size, seed):
    random.seed(seed)
    stats = {'bytes': 0, 'ops': 0, 'start_time': time.time(), 'stop': False}
    stats_thread = threading.Thread(target=print_stats, args=(stats, 2))
    stats_thread.start()

    write_records = []

    def perform_random_writes(f):
        while time.time() - stats['start_time'] < run_time and stats['bytes'] < file_size:
            offset = random.randint(0, file_size - block_size)
            data = os.urandom(block_size)  
            f.seek(offset)
            f.write(data)
            stats['bytes'] += block_size
            stats['ops'] += 1

            f.seek(offset)
            read_data = f.read(block_size)
            if hashlib.md5(read_data).hexdigest() != hashlib.md5(data).hexdigest():
                print(f"data inconsistency: offset {offset}")
                return False
    try:
        if protocol == 'pysdk':
            with client.open(filename, 'w+b', buffering=buffering) as f:
                perform_random_writes(f)
        else:
            with open(f'/tmp/jfs/{filename}', 'w+b') as f:
                perform_random_writes(f)
    finally:
        stats['stop'] = True
        stats_thread.join()

def seq_read(filename, client: juicefs.Client, protocol, block_size, buffering):
    stats = {'bytes': 0, 'ops': 0, 'start_time': time.time(), 'stop': False}
    stats_thread = threading.Thread(target=print_stats, args=(stats, 2))
    stats_thread.start()

    def perform_seq_reads(f):
        while True:
            buffer = f.read(block_size)
            if not buffer:
                break
            stats['bytes'] += len(buffer)
            stats['ops'] += 1

    try:
        if protocol == 'pysdk':
            with client.open(filename, 'rb', buffering=buffering) as f:
                perform_seq_reads(f)
        else:
            with open(f'/tmp/jfs/{filename}', 'rb') as f:
                perform_seq_reads(f)
    finally:
        stats['stop'] = True
        stats_thread.join()

def random_read(filename, client: juicefs.Client, protocol, buffering, block_size, seed, count):
    random.seed(seed)
    stats = {'bytes': 0, 'ops': 0, 'start_time': time.time(), 'stop': False}
    stats_thread = threading.Thread(target=print_stats, args=(stats, 2))
    stats_thread.start()

    def perform_random_reads(f):
        f.seek(0, 2)
        file_size = f.tell()
        for _ in range(count):
            length = random.randint(1, block_size)
            offset = random.randint(0, file_size - length)
            f.seek(offset)
            buffer = f.read(length)
            stats['bytes'] += len(buffer)
            stats['ops'] += 1

    try:
        if protocol == 'pysdk':
            with client.open(filename, 'rb', buffering=buffering) as f:
                perform_random_reads(f)
        else:
            with open(f'/tmp/jfs/{filename}', 'rb') as f:
                perform_random_reads(f)
    finally:
        stats['stop'] = True
        stats_thread.join()

def clean_page_cache():
    with open('/proc/sys/vm/drop_caches', 'w') as f:
        f.write('3')
        f.flush()

if __name__ == "__main__":
    parser = argparse.ArgumentParser('benchmark on pysdk')
    parser.add_argument('operation', type=str, help='operation: [random_read|seq_read|random_write|seq_write]')
    parser.add_argument('filename', type=str, help='file name')
    parser.add_argument('--seed', type=int, default=0, help='seed of random read/write')
    parser.add_argument('--count', type=int, default=1000, help='count of random read')
    parser.add_argument('--buffer-size', type=int, default=300, help='buffer size')
    parser.add_argument('--block-size', type=int, default=128*1024, help='block size')
    parser.add_argument('--buffering', type=int, default=2*1024*1024, help='buffering')
    parser.add_argument('--run-time', type=int, default=10, help='run time in seconds')
    parser.add_argument('--file-size', type=int, default=1024*1024*1024, help='file size in bytes')
    parser.add_argument('-p', '--protocol', type=str, default='pysdk', help='protocol: [fuse|pysdk]')
    args = parser.parse_args()

    if args.protocol == 'pysdk':
        meta_url=os.environ.get('META_URL', 'redis://localhost')
        client = juicefs.Client("test-volume", meta=meta_url, access_log="/tmp/access.log")
    else:
        client = None
    start=time.time()
    if args.operation == 'seq_read':
        seq_read(client=client, filename=args.filename, protocol=args.protocol, block_size=args.block_size, buffering=args.buffering)
    elif args.operation == 'random_read':
        random_read(client=client, filename=args.filename, protocol=args.protocol, block_size=args.block_size, buffering=args.buffering, seed=args.seed, count=args.count)
    cold_read=time.time()-start
    clean_page_cache()
    start=time.time()
    if args.operation == 'seq_read':
        seq_read(client=client, filename=args.filename, protocol=args.protocol, block_size=args.block_size, buffering=args.buffering)
        hot_read=time.time()-start
        print(f"{cold_read:.2f} {hot_read:.2f} ")
    elif args.operation == 'random_read':
        random_read(client=client, filename=args.filename, protocol=args.protocol, block_size=args.block_size, buffering=args.buffering, seed=args.seed, count=args.count)
        hot_read=time.time()-start
        print(f"{cold_read:.2f} {hot_read:.2f} ")
    elif args.operation == 'seq_write':
        seq_write(client=client, filename=args.filename, protocol=args.protocol, block_size=args.block_size, buffering=args.buffering, run_time=args.run_time, file_size=args.file_size)
    elif args.operation == 'random_write':
        random_write(client=client, filename=args.filename, protocol=args.protocol, buffering=args.buffering, block_size=args.block_size, run_time=args.run_time, file_size=args.file_size, seed=args.seed)
    else:
        raise ValueError(f"Unsupported operation: {args.operation}")

================================================
FILE: .github/scripts/pysdk/pysdk_test.py
================================================
import errno
import fractions
import unittest
import os
import pwd
from os.path import dirname
import sys
import time
sys.path.append('.')
from sdk.python.juicefs.juicefs import juicefs
from bench import seq_write, random_write, seq_read, random_read

TESTFN='/test'
TESTFILE='/test/file'
os.makedirs('/tmp/jfsCache0', exist_ok=True)
meta_url=os.environ.get('META_URL', 'redis://localhost')


class FileTests(unittest.TestCase):
    def setUp(self):
        self.v = juicefs.Client("test-volume", meta=meta_url, access_log="/tmp/access.log")
        if not self.v.exists(TESTFN):
            self.v.mkdir(TESTFN)

    def tearDown(self):
        self.v.rmr(TESTFN)

    def create_file(self, filename, content=b'content'):
        with self.v.open(filename, "xb", 0) as fp:
            fp.write(content)

    def test_read(self):
        with self.v.open(TESTFILE, "w+b") as fobj:
            fobj.write(b"spam")
            fobj.flush()
            fd = fobj.fileno()
            fobj.seek(0,0)
            s = fobj.read(4)
            self.assertEqual(type(s), bytes)
            self.assertEqual(s, b"spam")

    def test_write(self):
        fd = self.v.open(TESTFILE, 'wb')
        self.assertRaises(TypeError, os.write, fd, "beans")
        fd.write(b"bacon\n")
        fd.close()
        with self.v.open(TESTFILE, "rb") as fobj:
            self.assertEqual(fobj.read().splitlines(), [b"bacon"])


class UtimeTests(FileTests):
    def setUp(self):
        super().setUp()
        self.fname = os.path.join(TESTFN, "f1")
        if not self.v.exists(self.fname):
            self.create_file(self.fname)

    def _test_utime(self, set_time, filename=None):
        if not filename:
            filename = self.fname
        atime = 1.0   # 1.0 seconds
        mtime = 4.0   # 4.0 seconds
        set_time(filename, (atime, mtime))
        st = self.v.stat(filename)
        self.assertEqual(st.st_atime, atime)
        self.assertEqual(st.st_mtime, mtime)

    def test_utime(self):
        def set_time(filename, times):
            self.v.utime(filename, times)
        self._test_utime(set_time)

    def test_utime_by_times(self):
        self.test_utime()


class MakedirTests(FileTests):
    def test_makedir(self):
        base = TESTFN
        path = os.path.join(base, 'dir1', 'dir2', 'dir3')
        self.v.makedirs(path)             # Should work
        path = os.path.join(base, 'dir1', 'dir2', 'dir3', 'dir4')
        self.v.makedirs(path)
        self.assertRaises(OSError, self.v.makedirs, os.curdir)
        path = os.path.join(base, 'dir1', 'dir2', 'dir3', 'dir4', 'dir5', os.curdir)
        path = os.path.join(base, 'dir1', os.curdir, 'dir2', 'dir3', 'dir4',
                            'dir5', 'dir6')
        self.v.makedirs(path)


class ChownFileTests(FileTests):
    def test_chown_uid_gid_arguments_must_be_index(self):
        stat = self.v.stat(TESTFN)
        uid = stat.st_uid
        gid = stat.st_gid
        for value in (-1.0, -1j, fractions.Fraction(-2, 2)):
            self.assertRaises(TypeError, self.v.chown, TESTFN, value, gid)
            self.assertRaises(TypeError, self.v.chown, TESTFN, uid, value)
        self.assertIsNone(self.v.chown(TESTFN, uid, gid))

    def test_chown_with_root(self):
        try:
            all_users = [u.pw_uid for u in pwd.getpwall()]
        except (AttributeError):
            all_users = []
        uid_1, uid_2 = all_users[:2]
        gid = self.v.stat(TESTFN).st_gid
        self.v.chown(TESTFN, uid_1, gid)
        uid = self.v.stat(TESTFN).st_uid
        self.assertEqual(uid, uid_1)
        self.v.chown(TESTFN, uid_2, gid)
        uid = self.v.stat(TESTFN).st_uid
        self.assertEqual(uid, uid_2)


class LinkTests(FileTests):
    def setUp(self):
        super().setUp()
        self.file1 = os.path.join(TESTFN, "1")
        self.file2 = os.path.join(TESTFN, "2")

    def are_files_same(self, file1, file2):
        stat1 = self.v.lstat(file1)
        stat2 = self.v.lstat(file2)
        return stat1.st_ino  == stat2.st_ino and stat1.st_dev == stat2.st_dev

    def _test_link(self, file1, file2):
        self.create_file(file1)

        try:
            self.v.link(file1, file2)
        except PermissionError as e:
            self.skipTest('os.link(): %s' % e)
        self.assertTrue(self.are_files_same(file1, file2))

    def test_link(self):
        self._test_link(self.file1, self.file2)


class SummaryTests(FileTests):
    # /test/dir1/file
    #      /dir2
    #      /file
    def setUp(self):
        super().setUp()
        self.create_file(TESTFILE)
        self.v.mkdir(TESTFN + '/dir1')
        self.create_file(TESTFN + '/dir1/file')
        self.v.mkdir(TESTFN + '/dir2')

    def test_summary(self):
        res = self.v.summary(TESTFILE, depth=258, entries=2)
        self.assertTrue(normalize(res)==normalize({"Path": "file", "Type": 2, "Files":1, "Dirs":0, "Size":4096}))
        res = self.v.summary(TESTFN)
        self.assertTrue(normalize(res)==normalize({"Path": "test", "Type": 2, "Files":2, "Dirs":3, "Size":20480}))
        res = self.v.summary(TESTFN, depth=257, entries=1)
        self.assertTrue(normalize(res)==normalize({"Path": "test", "Type": 2, "Files":2, "Dirs":3, "Size":20480, "Children":[
            {"Path": "dir1", "Type": 2, "Files":1, "Dirs":1, "Size":8192},{'Path': '...', 'Type': 1, 'Size': 8192, 'Files': 1, 'Dirs': 1}]}))
        res = self.v.summary(TESTFN, depth=258, entries=1)
        self.assertTrue(normalize(res)==normalize(
            {
                "Path": "test", "Type": 2, "Files":2, "Dirs":3, "Size":20480, "Children":
                [
                    {"Path": "dir1", "Type": 2, "Files":1, "Dirs":1, "Size":8192, "Children": [
                        {"Path": "dir1/file", "Type": 1, "Size": 4096, "Files": 1, "Dirs": 0}
                    ]
                     },{'Path': '...', 'Type': 1, 'Size': 8192, 'Files': 1, 'Dirs': 1}
                ]}
        ))
        res = self.v.summary(TESTFN, depth=259, entries=4)
        self.assertTrue(normalize(res)==normalize(
            {
                "Path": "test", "Type": 2, "Files":2, "Dirs":3, "Size":20480, "Children":
                [
                    {
                        "Path": "dir1", "Type": 2, "Files":1, "Dirs":1, "Size":8192, "Children":
                        [{"Path": "dir1/file", "Type": 1, "Size": 4096, "Files": 1, "Dirs": 0}]
                    },{
                    'Path': 'file', 'Type': 1, 'Size': 4096, 'Files': 1, 'Dirs': 0
                },{
                    'Path': 'dir2', 'Type': 2, 'Size': 4096, 'Files': 0, 'Dirs': 1
                }
                ]}
        ))


class QuotaTests(FileTests):
    def test_quota(self):
        # /test/dir1/file
        #      /dir2
        #      /file
        self.create_file(TESTFILE)
        self.v.mkdir(TESTFN + '/dir1')
        self.create_file(TESTFN + '/dir1/file')
        self.v.mkdir(TESTFN + '/dir2')

        # set quota
        self.v.set_quota(path=TESTFN, capacity=1024*1024*1024, inodes=1000, create=True)
        res = self.v.get_quota(path=TESTFN)
        self.assertTrue(normalize(res)==normalize({"/test": {"MaxSpace": 1024*1024*1024, "MaxInodes": 1000, "UsedSpace": 0, "UsedInodes": 3}}))

        res = self.v.list_quota()
        self.assertTrue(normalize(res)==normalize({"/test": {"MaxSpace": 1024*1024*1024, "MaxInodes": 1000, "UsedSpace": 0, "UsedInodes": 3}}))

        self.v.set_quota(path=TESTFN+"/dir1",  capacity=1024*1024*1024, inodes=10000, create=True, strict=True)
        res = self.v.list_quota()
        self.assertTrue(normalize(res)==normalize({"/test": {"MaxSpace": 1024*1024*1024, "MaxInodes": 1000, "UsedSpace": 0, "UsedInodes": 3}, "/test/dir1": {"MaxSpace": 1024*1024*1024, "MaxInodes": 10000, "UsedSpace": 4096, "UsedInodes": 1}}))

        # check quota
        self.v.check_quota(path=TESTFN, strict=True, repair=True)

        # unset quota
        self.v.del_quota(path=TESTFN)
        res = self.v.get_quota(path=TESTFN)
        self.assertTrue(res=={})


def normalize(d):
    if isinstance(d, dict):
        if "Children" in d:
            d["Children"].sort(key=lambda x: x["Path"])
        return {k: normalize(v) for k, v in d.items()}
    elif isinstance(d, list):
        return sorted((normalize(x) for x in d), key=lambda x: x.get("Path", ""))
    else:
        return d


class NonLocalSymlinkTests(FileTests):
    def test_directory_link_nonlocal(self):
        src = os.path.join(TESTFN, 'some_link')
        self.v.symlink('/some_dir', src)
        assert self.v.readlink(src) == '../some_dir'


class ExtendedAttributeTests(FileTests):
    def _check_xattrs_str(self, s, getxattr, setxattr, removexattr, listxattr, **kwargs):
        fn = TESTFN + '_xattr'
        if self.v.exists(fn):
            self.v.unlink(fn)
        self.create_file(fn)

        #        with self.assertRaises(OSError) as cm:
        #            self.v.getxattr(fn, s("user.test"), **kwargs)
        #        self.assertEqual(cm.exception.errno, errno.ENODATA)

        init_xattr = self.v.listxattr(fn)
        self.assertIsInstance(init_xattr, list)

        self.v.setxattr(fn, s("user.test"), b"a", **kwargs)
        xattr = set(init_xattr)
        xattr.add("user.test")
        self.assertEqual(set(self.v.listxattr(fn)), xattr)
        self.assertEqual(self.v.getxattr(fn, b"user.test", **kwargs), b"a")
        self.v.setxattr(fn, s("user.test"), b"hello", os.XATTR_REPLACE, **kwargs)
        self.assertEqual(self.v.getxattr(fn, b"user.test", **kwargs), b"hello")

        with self.assertRaises(OSError) as cm:
            self.v.setxattr(fn, s("user.test"), b"bye", os.XATTR_CREATE, **kwargs)
        self.assertEqual(cm.exception.errno, errno.EEXIST)

        #        with self.assertRaises(OSError) as cm:
        #            self.v.setxattr(fn, s("user.test2"), b"bye", os.XATTR_REPLACE, **kwargs)
        #        self.assertEqual(cm.exception.errno, errno.ENODATA)

        self.v.setxattr(fn, s("user.test2"), b"foo", os.XATTR_CREATE, **kwargs)
        xattr.add("user.test2")
        self.assertEqual(set(self.v.listxattr(fn)), xattr)
        self.v.removexattr(fn, s("user.test"), **kwargs)

        with self.assertRaises(OSError) as cm:
            self.v.getxattr(fn, s("user.test"), **kwargs)
        self.assertEqual(cm.exception.errno, errno.ENODATA)

        xattr.remove("user.test")
        self.assertEqual(set(self.v.listxattr(fn)), xattr)
        self.assertEqual(self.v.getxattr(fn, s("user.test2"), **kwargs), b"foo")
        self.v.setxattr(fn, s("user.test"), b"a"*1024, **kwargs)
        self.assertEqual(self.v.getxattr(fn, s("user.test"), **kwargs), b"a"*1024)
        self.v.removexattr(fn, s("user.test"), **kwargs)
        many = sorted("user.test{}".format(i) for i in range(100))
        for thing in many:
            self.v.setxattr(fn, thing, b"x", **kwargs)
        self.assertEqual(set(self.v.listxattr(fn)), set(init_xattr) | set(many))

    def _check_xattrs(self, *args, **kwargs):
        self._check_xattrs_str(str, *args, **kwargs)
        self.v.unlink(TESTFN + '_xattr')

        self._check_xattrs_str(os.fsencode, *args, **kwargs)
        self.v.unlink(TESTFN + '_xattr')

    def test_simple(self):
        self._check_xattrs(self.v.getxattr, self.v.setxattr, self.v.removexattr,
                           self.v.listxattr)

    def test_fds(self):
        def getxattr(path, *args):
            with self.v.open(path, "rb") as fp:
                return self.v.getxattr(fp.fileno(), *args)
        def setxattr(path, *args):
            with self.v.open(path, "wb", 0) as fp:
                self.v.setxattr(fp.fileno(), *args)
        def removexattr(path, *args):
            with self.v.open(path, "wb", 0) as fp:
                self.v.removexattr(fp.fileno(), *args)
        def listxattr(path, *args):
            with self.v.open(path, "rb") as fp:
                return self.v.listxattr(fp.fileno(), *args)
        self._check_xattrs(getxattr, setxattr, removexattr, listxattr)


class BenchTests(FileTests):
    test_file = TESTFILE + '_bench'
    block_size = 128 * 1024  # 128KB
    buffer_size = 300
    buffering = 2 * 1024 * 1024
    run_time = 30
    file_size = 100 * 1024 * 1024
    seed = 20
    count = 200

    def test_seq_write(self):
        print('test_seq_write')
        seq_write(
            filename=self.test_file,
            client=self.v,
            protocol='pysdk',
            block_size=self.block_size,
            buffering=self.buffering,
            run_time=self.run_time,
            file_size=self.file_size
        )
        self.assertTrue(self.v.exists(self.test_file))
        stat = self.v.stat(self.test_file)
        self.assertGreater(stat.st_size, 0)

    def test_random_write(self):
        print('test_random_write')
        random_write(
            filename=self.test_file,
            client=self.v,
            protocol='pysdk',
            buffering=self.buffering,
            block_size=self.block_size,
            run_time=self.run_time,
            file_size=self.file_size,
            seed=self.seed
        )
        self.assertTrue(self.v.exists(self.test_file))
        stat = self.v.stat(self.test_file)
        self.assertGreater(stat.st_size, 0)

    def test_seq_read(self):
        print('test_seq_read')
        with self.v.open(self.test_file, 'wb') as f:
            f.write(os.urandom(self.file_size))

        seq_read(
            filename=self.test_file,
            client=self.v,
            protocol='pysdk',
            block_size=self.block_size,
            buffering=self.buffering
        )

    def test_random_read(self):
        print('test_random_read')
        with self.v.open(self.test_file, 'wb') as f:
            f.write(os.urandom(self.file_size))

        random_read(
            filename=self.test_file,
            client=self.v,
            protocol='pysdk',
            buffering=self.buffering,
            block_size=self.block_size,
            seed=self.seed,
            count=self.count
        )


class ClientParamsTests(FileTests):
    testfile = TESTFN + '/testfile'

    def test_readonly_param(self):
        v = juicefs.Client(
            "test-volume-ro",
            meta=meta_url,
            read_only=True
        )
        with self.assertRaises(OSError):
            v.open(self.testfile, 'w')

    def test_cache_params(self):
        v = juicefs.Client(
            "test-volume-cache",
            meta=meta_url,
            cache_dir="/tmp/jfs_test_cache",
            cache_size="100M",
            cache_partial_only=False
        )

        size_mb = 48
        test_data = os.urandom(size_mb * 1024 * 1024)
        with v.open(self.testfile, 'wb') as f:
            f.write(test_data)

        with v.open(self.testfile, 'rb') as f:
            read_data = f.read()
        self.assertEqual(read_data, test_data)

        cache_dir = "/tmp/jfs_test_cache"
        cache_size = 0
        for root, dirs, files in os.walk(cache_dir):
            for file in files:
                cache_size += os.path.getsize(os.path.join(root, file))
        self.assertGreaterEqual(cache_size, size_mb * 1024 * 1024/2)

    def test_io_limits(self):
        v = juicefs.Client(
            "test-volume-limited",
            meta=meta_url,
            upload_limit="1M",
            download_limit="1M"
        )

        test_data = b"x" * (10 * 1024 * 1024)  # 10MB
        start_time = time.time()
        with v.open(self.testfile, 'wb') as f:
            f.write(test_data)
        write_time = time.time() - start_time

        self.assertGreaterEqual(write_time, 10.0)


class CloneTests(FileTests):
    def setUp(self):
        super().setUp()
        self.source = TESTFN + '/source'
        self.target = TESTFN + '/target'
        self.test_data = b"Hello JuiceFS!" * 1024

        with self.v.open(self.source, 'wb') as f:
            f.write(self.test_data)

    def test_basic_clone(self):
        self.v.clone(self.source, self.target)

        self.assertTrue(self.v.exists(self.target))

        with self.v.open(self.target, 'rb') as f:
            cloned_data = f.read()
        self.assertEqual(cloned_data, self.test_data)

        source_stat = self.v.stat(self.source)
        target_stat = self.v.stat(self.target)
        self.assertEqual(source_stat.st_size, target_stat.st_size)

    def test_clone_with_preserve(self):
        self.v.chmod(self.source, 0o644)

        self.v.clone(self.source, self.target, preserve=True)
        source_stat = self.v.stat(self.source)
        target_stat = self.v.stat(self.target)
        self.assertEqual(source_stat.st_mode, target_stat.st_mode)


class WarmupTests(unittest.TestCase):
    @classmethod
    def setUpClass(self):
        self.v = juicefs.Client(
            "test-warmup",
            meta=meta_url,
            cache_dir="/tmp/jfs_test_warmup",
            cache_size="1000M",
            cache_partial_only=True
        )
        if self.v.exists(TESTFN):
            self.v.rmr(TESTFN)
        self.v.mkdir(TESTFN)
        self.test_files = [
            TESTFN + '/file1',
            TESTFN + '/file2'
        ]
        size_mb = 50
        test_data = os.urandom(size_mb * 1024 * 1024)
        for file in self.test_files:
            with self.v.open(file, 'wb') as f:
                f.write(test_data)

    @classmethod
    def tearDownClass(self):
        if self.v.exists(TESTFN):
            self.v.warmup(self.test_files, isEvict=True)
            self.v.rmr(TESTFN)

    def test_basic_warmup(self):
        result = self.v.warmup(self.test_files, numthreads=4)
        self.assertIn('FileCount', result)
        self.assertEqual(result['FileCount'], 2)
        self.assertIn('SliceCount', result)
        self.assertIn('TotalBytes', result)
        self.assertIn('MissBytes', result)
        #        self.assertIn('Locations', result)
        cache_dir = "/tmp/jfs_test_warmup"
        size_mb = 100
        cache_size = 0
        time.sleep(2)
        for root, dirs, files in os.walk(cache_dir):
            for file in files:
                cache_size += os.path.getsize(os.path.join(root, file))
        self.assertGreaterEqual(cache_size, size_mb * 1024 * 1024)

    def test_warmup_check(self):
        self.v.warmup(self.test_files)
        result = self.v.warmup(self.test_files, isCheck=True)
        self.assertEqual(result['MissBytes'], 0)
        self.assertTrue(any('jfs_test_warmup' in path for path in result['Locations']),
                        msg=f"'jfs_test_warmup' not found in {result['Locations']}")

    def test_warmup_evict(self):
        self.v.warmup(self.test_files)
        result = self.v.warmup(self.test_files, isEvict=True)
        time.sleep(2)
        cache_dir = "/tmp/jfs_test_warmup"
        size_mb = 1
        cache_size = 0
        for root, dirs, files in os.walk(cache_dir):
            for file in files:
                cache_size += os.path.getsize(os.path.join(root, file))
        self.assertLessEqual(cache_size, size_mb * 1024 * 1024)
        result = self.v.warmup(self.test_files, isCheck=True)
        self.assertEqual(result['MissBytes'], result['TotalBytes'])


class InfoTests(FileTests):
    def test_file_info(self):
        self.test_dir = TESTFN + '/infotest'
        self.test_file = self.test_dir + '/testfile'
        self.v.makedirs(self.test_dir)
        with self.v.open(self.test_file, 'w') as f:
            f.write("test content")

        info = self.v.info(self.test_dir,recursive=True,strict=True)
        self.assertIn('Length', info)
        self.assertEqual(info['Files'], 1)
        self.assertEqual(info['Dirs'], 1)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: .github/scripts/random_read_write.py
================================================
import random
import os

def random_write(path1, path2, count=1000):
    if not os.path.exists(path1):
        os.system(f'touch {path1}')
    if not os.path.exists(path2):
        os.system(f'touch {path2}')
    with open(path1, 'r+b') as f1, open(path2, 'r+b') as f2:
        print(f1.seek(0, 2))
        for i in range(1, count):
            # Get the size of the file
            # size = os.path.getsize(path1)
            size = f1.seek(0, 2)
            # Generate a random position within the file that is not at the end
            pos = random.randint(0, size)
            f1.seek(pos, 0)
            f2.seek(pos, 0)
            # Generate random data
            length = random.randint(1, 1024*1024*5)
            data = os.urandom(length)
            # data = b"abcdefg"
            length = len(data)
            # Write data to the files
            f1.write(data)
            f2.write(data)
            f1.flush()
            f2.flush()
            assert f1.seek(0, 2) == pos+max(length, size-pos)
            assert f1.seek(0, 2) == f2.seek(0, 2)
            print("Wrote %d bytes at position %d" % (length, pos))

def random_read(path1, path2):
    with open(path1, 'rb') as f1, open(path2, 'rb') as f2:
        size = f1.seek(0, 2)
        pos = random.randint(0, size)
        f1.seek(pos)
        f2.seek(pos)
        len = random.randint(1, 1024*1024)
        assert f1.read(len) == f2.read(len)
        print("Read %d bytes at position %d" % (len, pos))

def read_all(path1, path2):
    with open(path1, 'rb') as f1, open(path2, 'rb') as f2:
        assert f1.read() == f2.read()
        print("Read all bytes")
    
if __name__ == '__main__':
    path1 = os.environ.get('PATH1', '/tmp/test1')
    path2 = os.environ.get('PATH2', '/tmp/test2')
    print(f'path1: {path1}, path2: {path2}')
    if os.path.exists(path1):
        os.remove(path1)
    if os.path.exists(path2):
        os.remove(path2)
    for i in range(10):
        random_write(path1, path2, count=100)

    for i in range(1000):
        random_read(path1, path2)

    read_all(path1, path2)

================================================
FILE: .github/scripts/save_benchmark.sh
================================================
#/bin/bash -e

mount_jfs(){
    mkdir -p /root/.juicefs
    wget -q s.juicefs.com/static/Linux/mount -O /root/.juicefs/jfsmount 
    chmod +x /root/.juicefs/jfsmount
    curl -s -L https://juicefs.com/static/juicefs -o /usr/local/bin/juicefs && sudo chmod +x /usr/local/bin/juicefs
    juicefs auth ci-coverage --access-key $AWS_ACEESS_KEY --secret-key $AWS_SECRET_KEY --token $AWS_ACCESS_TOKEN --encrypt-keys
    juicefs mount ci-coverage --subdir juicefs/ci-benchmark/ --allow-other /ci-benchmark
}  

save_benchmark(){
    while [[ $# -gt 0 ]]; do
        key="$1"
        case $key in
            --name)
                name="$2"
                shift
                ;;
            --result)
                result="$2"
                shift
                ;;
            --meta)
                meta="$2"
                shift
                ;;
            --storage)
                storage="$2"
                shift
                ;;
            --extra)
                extra="$2"
                shift
                ;;
            *)
                # Unknown option
                ;;
        esac
        shift
    done
    [[ -z $name ]] && echo "name is required" && exit 1
    [[ -z $result ]] && echo "result is required" && exit 1
    [[ -z $meta ]] && echo "meta is required" && exit 1
    [[ -z $storage ]] && storage='unknown'

    version=$(./juicefs -V | cut -b 17- | sed 's/:/-/g')
    created_date=$(date +"%Y-%m-%d")
    cat <<EOF > result.json
    {
        "workflow": "$GITHUB_WORKFLOW",
        "name": "$name",
        "result": "$result",
        "meta": "$meta",
        "storage": "$storage",
        "extra": "$extra",
        "version": "$version",
        "created_date": "$created_date",
        "github_repo": "$GITHUB_REPOSITORY",
        "github_ref_name": "$GITHUB_REF_NAME",
        "github_run_id": "$GITHUB_RUN_ID",
        "github_sha": "$GITHUB_SHA",
        "workflow_url": "https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID",
    }
EOF
    cat result.json
    if [[ "$GITHUB_EVENT_NAME" == "schedule" || "$GITHUB_EVENT_NAME" == "workflow_dispatch"   ]]; then
        mount_jfs
        echo "save result.json to /ci-benchmark/$GITHUB_WORKFLOW/$name/$created_date/$meta-$storage.json"
        mkdir -p /ci-benchmark/$GITHUB_WORKFLOW/$name/$created_date/
        cp result.json /ci-benchmark/$GITHUB_WORKFLOW/$name/$created_date/$meta-$storage.json
    fi
}

save_benchmark $@


================================================
FILE: .github/scripts/setup-hdfs.sh
================================================
#!/bin/bash

#  JuiceFS, Copyright 2021 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e
sudo apt-get update
sudo apt-get install openjdk-8-jdk -y

HADOOP_VERSION="2.10.2"
wget -q https://dlcdn.apache.org/hadoop/common/hadoop-2.10.2/hadoop-2.10.2.tar.gz
mkdir ~/app
tar -zxf hadoop-${HADOOP_VERSION}.tar.gz -C ~/app

sudo tee -a ~/.bashrc <<EOF
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export JRE_HOME=\${JAVA_HOME}/jre
export CLASSPATH=.:\${JAVA_HOME}/lib:\${JRE_HOME}/lib
export PATH=\${PATH}:\${JAVA_HOME}/bin

export HADOOP_HOME=~/app/hadoop-${HADOOP_VERSION}
export HADOOP_CONF_DIR=\${HADOOP_HOME}/etc/hadoop
export PATH=\$PATH:\${HADOOP_HOME}/bin:\${HADOOP_HOME}/sbin
EOF

source ~/.bashrc
echo $HADOOP_HOME
echo $HADOOP_CONF_DIR
echo $PATH

ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa -q
cat ~/.ssh/id_rsa.pub  >> ~/.ssh/authorized_keys
chmod 700 ~/.ssh
chmod 600 ~/.ssh/authorized_keys
echo "StrictHostKeyChecking no" >> ~/.ssh/config

sed -i 's/${JAVA_HOME}/\/usr\/lib\/jvm\/java-8-openjdk-amd64/g' ~/app/hadoop-${HADOOP_VERSION}/etc/hadoop/hadoop-env.sh

sudo tee ~/app/hadoop-${HADOOP_VERSION}/etc/hadoop/core-site.xml <<EOF
    <configuration>
        <property>
            <name>fs.defaultFS</name>
            <value>hdfs://localhost:8020</value>
        </property>

        <property>
            <name>hadoop.tmp.dir</name>
            <value>${HOME}/apps/tmp</value>
        </property>
    </configuration>
EOF

sudo tee ~/app/hadoop-${HADOOP_VERSION}/etc/hadoop/hdfs-site.xml <<EOF
    <configuration>
        <property>
            <name>dfs.replication</name>
            <value>1</value>
        </property>
    </configuration>
EOF

cd ~/app/hadoop-${HADOOP_VERSION}/bin
./hdfs namenode -format
cd ~/app/hadoop-${HADOOP_VERSION}/sbin
./start-dfs.sh

for i in {1..3} ; do
  ProcNumber=$( jps |grep -w DataNode|wc -l)
  if [ ${ProcNumber} -lt 1 ];then
    echo "current java process:"
    jps
    echo "The DataNode is not running, Retry for the $i time..."
    ./start-dfs.sh
  fi
done

echo "hello world" > /tmp/testfile
cd ~/app/hadoop-${HADOOP_VERSION}/bin
./hdfs dfs -put /tmp/testfile /
./hdfs dfs -rm /testfile
./hdfs dfs -chmod 777 /

echo "hdfs started successfully"


================================================
FILE: .github/scripts/ssh/Dockerfile
================================================
FROM ubuntu:latest
RUN apt update && apt install  openssh-server sudo -y
RUN groupadd juicedata && useradd -ms /bin/bash -g juicedata juicedata -u 1024
RUN mkdir /var/jfs
RUN mkdir -p /home/juicedata/.ssh
COPY id_rsa.pub /home/juicedata/.ssh/authorized_keys
RUN chown juicedata:juicedata /home/juicedata/.ssh/authorized_keys && chmod 600 /home/juicedata/.ssh/authorized_keys
RUN service ssh start
EXPOSE 22
CMD ["/usr/sbin/sshd","-D"]

================================================
FILE: .github/scripts/ssh/docker-compose.yml
================================================
version: '2'
services:
  worker1:
    image: juicedata/ssh
    container_name: worker1
    restart: unless-stopped
    networks:
      static-network:
        ipv4_address: 172.20.0.2
    
  worker2:
    image: juicedata/ssh
    container_name: worker2
    restart: unless-stopped
    networks:
      static-network:
        ipv4_address: 172.20.0.3
  
networks:
  static-network:
    ipam:
      config:
        - subnet: 172.20.0.0/16


================================================
FILE: .github/scripts/start_meta_engine.sh
================================================
#!/bin/bash -e
REDIS_CSC_QUERY="client-cache=true&client-cache-size=500&client-cache-expire=60s&client-cache-preload=100"

retry() {
    local retries=5
    local delay=3
    for i in $(seq 1 $retries); do
        set +e
        ( set -e; "$@" )
        exit=$?
        set -e
        if [ $exit == 0 ]; then
            echo "run $@ succceed"
            return $exit
        elif [ $i ==  $retries ]; then
            echo "Retry failed after $i attempts."
            exit $exit
        else
            echo "Retry in $delay seconds..."
            sleep $delay
        fi
    done
}

install_tikv(){
    [[ ! -d tcli ]] && git clone https://github.com/c4pt0r/tcli
    make -C tcli && sudo cp tcli/bin/tcli /usr/local/bin
    # retry because of: https://github.com/pingcap/tiup/issues/2057
    echo 'head -1' > /tmp/head.txt
    if lsof -i:2379 && pgrep pd-server && tcli -pd 127.0.0.1:2379 < /tmp/head.txt; then
        echo "TiKV is already running and healthy"
        return 0
    fi
    user=$(whoami)
    echo user is $user
    if [[ "$user" == "root" ]]; then
        curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sudo sh
        export PATH=/root/.tiup/bin:$PATH
        tiup=/root/.tiup/bin/tiup
    elif [[ "$user" == "runner" ]]; then
        curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh
        export PATH=/home/runner/.tiup/bin:$PATH
        tiup=/home/runner/.tiup/bin/tiup
    else
        echo "Unknown user $user"
        exit 1
    fi
    echo tiup is $tiup
    echo $(whoami) $(pwd)
    $tiup playground --mode tikv-slim > tikv.log 2>&1  &
    pid=$!
    timeout=60
    count=0
    while true; do
        echo 'head -1' > /tmp/head.txt
        lsof -i:2379 && pgrep pd-server && tcli -pd 127.0.0.1:2379 < /tmp/head.txt && exit_code=0 || exit_code=$?
        if [ $exit_code -eq 0 ]; then
            echo "TiDB is running."
            exit 0
        fi
        sleep 1
        count=$((count+1))
        if [ $count -eq $timeout ]; then
            echo "TiDB failed to start within $timeout seconds."
            kill -9 $pid || true
            exit 1
        fi
    done
}

install_tidb(){
    user=$(whoami)
    echo user is $user
    if [[ "$user" == "root" ]]; then
        curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sudo sh
        tiup=/root/.tiup/bin/tiup
    elif [[ "$user" == "runner" ]]; then
        curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh
        tiup=/home/runner/.tiup/bin/tiup
    else
        echo "Unknown user $user"
        exit 1
    fi
    echo tiup is $tiup
    
    $tiup playground 5.4.0 > tidb.log 2>&1  &
    pid=$!
    timeout=60
    count=0
    while true; do
        lsof -i:4000 && pgrep pd-server && mysql -h127.0.0.1 -P4000 -uroot -e "select version();" && exit_code=0 || exit_code=$?
        if [ $exit_code -eq 0 ]; then
            echo "TiDB is running."
            exit 0
        fi
        sleep 1
        count=$((count+1))
        if [ $count -eq $timeout ]; then
            echo "TiDB failed to start within $timeout seconds."
            kill -9 $pid || true
            exit 1
        fi
    done
}

start_meta_engine(){
    meta=$1
    storage=$2
    if [ "$meta" == "mysql" ]; then
        sudo /etc/init.d/mysql start
    elif [ "$meta" == "redis" ]; then
        sudo .github/scripts/apt_install.sh  redis-tools redis-server
    elif [ "$meta" == "tikv" ]; then
        retry install_tikv
    elif [ "$meta" == "badger" ]; then
        sudo go get github.com/dgraph-io/badger/v3
    elif [ "$meta" == "mariadb" ]; then
        if lsof -i:3306; then
            echo "mariadb is already running"
        else
            docker run -p 127.0.0.1:3306:3306  --name mdb -e MARIADB_ROOT_PASSWORD=root -d mariadb:latest
            sleep 10
        fi
    elif [ "$meta" == "tidb" ]; then
        retry install_tidb
        mysql -h127.0.0.1 -P4000 -uroot -e "set global tidb_enable_noop_functions=1;"
    elif [ "$meta" == "etcd" ]; then
        sudo .github/scripts/apt_install.sh etcd
    elif [ "$meta" == "fdb" ]; then
        if lsof -i:4500; then
            echo "fdb is already running"
        else  
            docker run --name fdb --rm -d -p 4500:4500 foundationdb/foundationdb:6.3.23
            sleep 5
            docker exec fdb fdbcli --exec "configure new single memory"
            echo "docker:docker@127.0.0.1:4500" > /home/runner/fdb.cluster
            fdbcli -C /home/runner/fdb.cluster --exec "status"
        fi
    elif [ "$meta" == "ob" ]; then
        docker rm obstandalone --force || echo "remove obstandalone failed"
        docker run -p 2881:2881 --name obstandalone -e MINI_MODE=1 -d oceanbase/oceanbase-ce
        sleep 60
        mysql -h127.0.0.1 -P2881 -uroot -e "ALTER SYSTEM SET _ob_enable_prepared_statement=TRUE;"
    elif [ "$meta" == "postgres" ]; then
        echo "start postgres"
        lsof -i:5432 || true
        if lsof -i:5432; then
            echo "postgres is already running"
        else
            # default max_connections is 100.
            docker run --name postgresql \
                -e POSTGRES_USER=postgres \
                -e POSTGRES_PASSWORD=postgres \
                -p 5432:5432 \
                -v /tmp/postgresql:/var/lib/postgresql \
                -d postgres \
                -N 300
            sleep 10
            docker exec -i postgresql psql -U postgres -c "SHOW max_connections;"
        fi
    fi
    
    if [ "$storage" == "minio" ]; then
        if ! docker ps | grep "minio/minio"; then
            docker run -d -p 9000:9000 --name minio \
                -e "MINIO_ACCESS_KEY=minioadmin" \
                -e "MINIO_SECRET_KEY=minioadmin" \
                -v /tmp/data:/data \
                -v /tmp/config:/root/.minio \
                minio/minio server /data
            sleep 3s
        fi
        [ ! -x mc ] && wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc && chmod +x mc
        ./mc alias set myminio http://localhost:9000 minioadmin minioadmin || ./mc alias set myminio http://127.0.0.1:9000 minioadmin minioadmin
    elif [ "$storage" == "gluster" ]; then
        dpkg -s glusterfs-server || .github/scripts/apt_install.sh glusterfs-server
        systemctl start glusterd.service
    elif [ "$meta" != "postgres" ] && [ "$storage" == "postgres" ]; then
        echo "start postgres"
        if lsof -i:5432; then
            echo "postgres is already running"
        else
            docker run --name postgresql \
                -e POSTGRES_USER=postgres \
                -e POSTGRES_PASSWORD=postgres \
                -p 5432:5432 \
                -v /tmp/data:/var/lib/postgresql/data \
                -d postgres
            sleep 10
        fi
    elif [ "$meta" != "mysql" ] && [ "$storage" == "mysql" ]; then
        echo "start mysql"
        sudo /etc/init.d/mysql start
    fi
}

get_meta_url(){
    meta=$1
    if [ "$meta" == "postgres" ]; then
        meta_url="postgres://postgres:postgres@127.0.0.1:5432/test?sslmode=disable"
    elif [ "$meta" == "mysql" ]; then
        meta_url="mysql://root:root@(127.0.0.1)/test?max_open_conns=30"
    elif [ "$meta" == "redis" ]; then
        meta_url="redis://127.0.0.1:6379/1?${REDIS_CSC_QUERY}"
    elif [ "$meta" == "sqlite3" ]; then
        meta_url="sqlite3://test.db"
    elif [ "$meta" == "tikv" ]; then
        meta_url="tikv://127.0.0.1:2379/test"
    elif [ "$meta" == "badger" ]; then
        meta_url="badger:///tmp/test"
    elif [ "$meta" == "mariadb" ]; then
        meta_url="mysql://root:root@(127.0.0.1)/test?max_open_conns=30"
    elif [ "$meta" == "tidb" ]; then
        meta_url="mysql://root:@(127.0.0.1:4000)/test"
    elif [ "$meta" == "etcd" ]; then
        meta_url="etcd://localhost:2379/test"
    elif [ "$meta" == "fdb" ]; then
        meta_url="fdb:///home/runner/fdb.cluster?prefix=jfs"
    elif [ "$meta" == "ob" ]; then
        meta_url="mysql://root:@\\(127.0.0.1:2881\\)/test"
    else
        echo >&2 "<FATAL>: meta $meta is not supported"
        meta_url=""
        return 1
    fi
    echo $meta_url
    return 0
}

get_meta_url2(){
    meta=$1
    if [ "$meta" == "postgres" ]; then
        meta_url="postgres://postgres:postgres@127.0.0.1:5432/test2?sslmode=disable"
    elif [ "$meta" == "mysql" ]; then
        meta_url="mysql://root:root@(127.0.0.1)/test2?max_open_conns=30"
    elif [ "$meta" == "redis" ]; then
        meta_url="redis://127.0.0.1:6379/2?${REDIS_CSC_QUERY}"
    elif [ "$meta" == "sqlite3" ]; then
        meta_url="sqlite3://test2.db"
    elif [ "$meta" == "tikv" ]; then
        meta_url="tikv://127.0.0.1:2379/jfs2"
    elif [ "$meta" == "badger" ]; then
        meta_url="badger:///tmp/test2"
    elif [ "$meta" == "mariadb" ]; then
        meta_url="mysql://root:root@(127.0.0.1)/test2?max_open_conns=30"
    elif [ "$meta" == "tidb" ]; then
        meta_url="mysql://root:@(127.0.0.1:4000)/test2"
    elif [ "$meta" == "etcd" ]; then
        meta_url="etcd://localhost:2379/test2"
    elif [ "$meta" == "fdb" ]; then
        meta_url="fdb:///home/runner/fdb.cluster?prefix=jfs2"
    elif [ "$meta" == "ob" ]; then
        meta_url="mysql://root:@\\(127.0.0.1:2881\\)/test2"
    else
        echo >&2 "<FATAL>: meta $meta is not supported"
        meta_url=""
        return 1
    fi
    echo $meta_url
    return 0
}

create_database(){
    meta_url=$1
    db_name=$(basename $meta_url | awk -F? '{print $1}')
    if [[ "$meta_url" == mysql* ]]; then
        user=$(echo $meta_url |  awk -F/ '{print $3}' | awk -F@ '{print $1}' | awk -F: '{print $1}')
        password=$(echo $meta_url |  awk -F/ '{print $3}' | awk -F@ '{print $1}' | awk -F: '{print $2}')
        test -n "$password" && password="-p$password" || password=""
        host=$(basename $(dirname $meta_url) | awk -F@ '{print $2}'| sed 's/(//g' | sed 's/)//g' | awk -F: '{print $1}')
        port=$(basename $(dirname $meta_url) | awk -F@ '{print $2}'| sed 's/(//g' | sed 's/)//g' | awk -F: '{print $2}')
        test -z "$port" && port="3306"
        echo user=$user, password=$password, host=$host, port=$port, db_name=$db_name
        if [ "$#" -eq 2 ]; then
            echo isolation_level=$2
            mysql -u$user $password -h $host -P $port -e "set global transaction isolation level $2;"
            mysql -u$user $password -h $host -P $port -e "show variables like '%isolation%;'"
        fi
        mysql -u$user $password -h $host -P $port -e "drop database if exists $db_name; create database $db_name;"
        elif [[ "$meta_url" == postgres* ]]; then
            export PGPASSWORD="postgres"
            printf "\set AUTOCOMMIT on\ndrop database if exists $db_name; create database $db_name; " |  psql -U postgres -h localhost
        if [ "$#" -eq 2 ]; then
            echo isolation_level=$2
            printf "\set AUTOCOMMIT on\nALTER DATABASE $db_name SET DEFAULT_TRANSACTION_ISOLATION TO '$2';" |  psql -U postgres -h localhost
        fi
    fi
}


================================================
FILE: .github/scripts/sync/sync.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$ENCRYPT" ]] && ENCRYPT=false
[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)
FORMAT_OPTIONS=""
if [ "$ENCRYPT" == "true" ]; then
    export JFS_RSA_PASSPHRASE=the-passwd-for-rsa
    openssl genrsa -aes256 -passout pass:$JFS_RSA_PASSPHRASE -out my-priv-key.pem 2048
    FORMAT_OPTIONS="--encrypt-rsa-key my-priv-key.pem"
fi

generate_source_dir(){
    rm -rf jfs_source
    git clone https://github.com/juicedata/juicefs.git jfs_source --depth 1
    chmod 777 jfs_source
    mkdir jfs_source/empty_dir
    dd if=/dev/urandom of=jfs_source/file bs=5M count=1
    chmod 777 jfs_source/file
    ln -sf file jfs_source/symlink_to_file
    ln -f jfs_source/file jfs_source/hard_link_to_file
    id -u juicefs  && sudo userdel juicefs
    sudo useradd -u 1101 juicefs
    sudo -u juicefs touch jfs_source/file2
    ln -s ../cmd jfs_source/pkg/symlink_to_cmd
}

generate_source_dir

generate_fsrand(){
    seed=$(date +%s)
    python3 .github/scripts/fsrand.py -a -c 2000 -s $seed  fsrand
}

test_sync_with_mount_point(){
    do_sync_with_mount_point 
    do_sync_with_mount_point --list-threads 10 --list-depth 5
    do_sync_with_mount_point --dirs --update --perms --check-all 
    do_sync_with_mount_point --dirs --update --perms --check-all --list-threads 10 --list-depth 5
}

test_sync_without_mount_point(){
    do_sync_without_mount_point 
    do_sync_without_mount_point --list-threads 10 --list-depth 5
    do_sync_without_mount_point --dirs --update --perms --check-all 
    do_sync_without_mount_point --dirs --update --perms --check-all --list-threads 10 --list-depth 5
}

do_sync_without_mount_point(){
    prepare_test
    options=$@
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    meta_url=$META_URL ./juicefs sync jfs_source/ jfs://meta_url/jfs_source/ $options --links

    ./juicefs mount -d $META_URL /jfs
    if [[ ! "$options" =~ "--dirs" ]]; then
        find jfs_source -type d -empty -delete
    fi
    find /jfs/jfs_source -type f -name ".*.tmp*" -delete
    diff -ur --no-dereference  jfs_source/ /jfs/jfs_source
}

do_sync_with_mount_point(){
    prepare_test
    options=$@
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount -d $META_URL /jfs
    ./juicefs sync jfs_source/ /jfs/jfs_source/ $options --links

    if [[ ! "$options" =~ "--dirs" ]]; then
        find jfs_source -type d -empty -delete
    fi
    find /jfs/jfs_source -type f -name ".*.tmp*" -delete
    diff -ur --no-dereference jfs_source/ /jfs/jfs_source/
}

test_sync_with_loop_link(){
    prepare_test
    options="--dirs --update --perms --check-all --list-threads 10 --list-depth 5"
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount -d $META_URL /jfs
    ln -s looplink jfs_source/looplink
    ./juicefs sync jfs_source/ /jfs/jfs_source/ $options  2>&1 | tee err.log || true
    grep -i "failed to handle 1 objects" err.log || (echo "grep failed" && exit 1)
    rm -rf jfs_source/looplink
}

test_sync_with_deep_link(){
    prepare_test
    options="--dirs --update --perms --check-all --list-threads 10 --list-depth 5"
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount -d $META_URL /jfs
    touch jfs_source/symlink_1
    for i in {1..41}; do
        ln -s symlink_$i jfs_source/symlink_$((i+1))
    done
    ./juicefs sync jfs_source/ /jfs/jfs_source/ $options  2>&1 | tee err.log || true
    grep -i "failed to handle 1 objects" err.log || (echo "grep failed" && exit 1)
    rm -rf jfs_source/symlink_*
}

skip_test_sync_fsrand_with_mount_point(){
    generate_fsrand
    do_test_sync_fsrand_with_mount_point 
    do_test_sync_fsrand_with_mount_point --list-threads 10 --list-depth 5
    do_test_sync_fsrand_with_mount_point --dirs --update --perms --check-all 
    do_test_sync_fsrand_with_mount_point --dirs --update --perms --check-all --list-threads 10 --list-depth 5
}

do_test_sync_fsrand_with_mount_point(){
    prepare_test
    options=$@
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount -d $META_URL /jfs
    ./juicefs sync fsrand/ /jfs/fsrand/ $options --links

    if [[ ! "$options" =~ "--dirs" ]]; then
        find jfs_source -type d -empty -delete
    fi
    diff -ur --no-dereference fsrand/ /jfs/fsrand/
}

test_sync_include_exclude_option(){
    prepare_test
    ./juicefs format --trash-days 0 $FORMAT_OPTIONS $META_URL myjfs
    ./juicefs mount $META_URL /jfs -d
    ./juicefs sync jfs_source/ /jfs/
    for source_dir in "/jfs/" "jfs_source/" ; do 
        while IFS=, read -r jfs_option rsync_option status; do
            printf '\n%s, %s, %s\n' "$jfs_option" "$rsync_option" "$status"
            status=$(echo $status| xargs)
            if [[ -z "$status" || "$status" = "disable" ]]; then 
                continue
            fi
            if [ "$source_dir" == "/jfs/" ]; then 
                jfs_option="--exclude .stats --exclude .config $jfs_option " 
                rsync_option="--exclude .stats --exclude .config $rsync_option " 
            fi
            rm rsync_dir/ -rf && mkdir rsync_dir
            set -o noglob
            rsync -a $source_dir rsync_dir/ $rsync_option
            rm jfs_sync_dir/ -rf && mkdir jfs_sync_dir/
            ./juicefs sync $source_dir jfs_sync_dir/ $jfs_option --list-threads 2
            set -u noglob
            printf 'juicefs sync %s %s %s\n' "$source_dir"  "jfs_sync_dir/" "$jfs_option" 
            printf 'rsync %s %s %s\n' "$source_dir" "rsync_dir/"  "$rsync_option" 
            printf 'diff between juicefs sync and rsync:\n'
            diff -ur jfs_sync_dir rsync_dir
        done < .github/workflows/resources/sync-options.txt
    done
}

test_sync_with_time(){
    prepare_test
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount $META_URL /jfs -d
    rm -rf data/
    mkdir data
    echo "old" > data/file1
    echo "old" > data/file2
    echo "old" > data/file3
    sleep 1
    start_time=$(date "+%Y-%m-%d %H:%M:%S")
    sleep 1
    echo "new" > data/file2
    sleep 1
    mid_time=$(date "+%Y-%m-%d %H:%M:%S")
    sleep 1
    echo "new" > data/file3
    sleep 1
    end_time=$(date "+%Y-%m-%d %H:%M:%S")
    mkdir -p sync_dst1 sync_dst2
    ./juicefs sync --start-time "$start_time" data/ /jfs/sync_dst1/
    [ "$(cat /jfs/sync_dst1/file1 2>/dev/null)" = "" ] || (echo "file1 should not exist" && exit 1)
    [ "$(cat /jfs/sync_dst1/file2)" = "new" ] || (echo "file2 should be new" && exit 1)
    [ "$(cat /jfs/sync_dst1/file3)" = "new" ] || (echo "file3 should be new" && exit 1)
    ./juicefs sync --start-time "$start_time" --end-time "$mid_time" data/ /jfs/sync_dst2/
    [ "$(cat /jfs/sync_dst2/file1 2>/dev/null)" = "" ] || (echo "file1 should not exist" && exit 1)
    [ "$(cat /jfs/sync_dst2/file2)" = "new" ] || (echo "file2 should be new" && exit 1)
    [ "$(cat /jfs/sync_dst2/file3 2>/dev/null)" = "" ] || (echo "file3 should not exist" && exit 1)
}

test_sync_check_change()
{
    prepare_test
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount $META_URL /jfs -d
    rm -rf data/
    mkdir data
    nohup bash -c 'for i in `seq 1 1000000`; do echo $i >> data/echo; done' > /dev/null 2>&1 &
    pid=$!
    sleep 0.5
    ./juicefs sync --check-change data/ /jfs/data/ 2>&1 | grep "changed during sync" || (echo "should detect file changes during sync" && exit 1 )
    kill $pid || true
}

test_ignore_existing()
{
    prepare_test
    rm -rf /tmp/src_dir /tmp/rsync_dir /tmp/jfs_sync_dir
    mkdir -p /tmp/src_dir/d1
    mkdir -p /tmp/jfs_sync_dir/d1
    echo abc > /tmp/src_dir/file1
    echo 1234 > /tmp/jfs_sync_dir/file1
    echo abcde > /tmp/src_dir/d1/d1file1
    echo 123456 > /tmp/jfs_sync_dir/d1/d1file1
    cp -rf /tmp/jfs_sync_dir/ /tmp/rsync_dir
    
    mkdir /tmp/src_dir/no-exist-dir
    echo 1111 > /tmp/src_dir/no-exist-dir/f1
    echo 123456 > /tmp/src_dir/d1/no-exist-file

    ./juicefs sync /tmp/src_dir /tmp/jfs_sync_dir --existing
    rsync -r /tmp/src_dir/ /tmp/rsync_dir --existing --size-only
    diff -ur /tmp/jfs_sync_dir /tmp/rsync_dir
    
    rm -rf /tmp/src_dir /tmp/rsync_dir
    mkdir -p /tmp/src_dir/d1
    mkdir -p /tmp/jfs_sync_dir/d1
    echo abc > /tmp/src_dir/file1
    echo 1234 > /tmp/jfs_sync_dir/file1
    echo abcde > /tmp/src_dir/d1/d1file1
    echo 123456 > /tmp/jfs_sync_dir/d1/d1file1
    echo abc > /tmp/src_dir/file2
    echo abcde > /tmp/src_dir/d1/d1file2
    cp -rf /tmp/jfs_sync_dir/ /tmp/rsync_dir
    
    ./juicefs sync /tmp/src_dir /tmp/jfs_sync_dir --ignore-existing 
    rsync -r /tmp/src_dir/ /tmp/rsync_dir --ignore-existing --size-only
    diff -ur /tmp/jfs_sync_dir /tmp/rsync_dir
}
test_file_head(){
    # issue link: https://github.com/juicedata/juicefs/issues/2125
    ./juicefs format $META_URL $FORMAT_OPTIONS myjfs
    ./juicefs mount $META_URL /jfs -d
    mkdir /jfs/jfs_source/
    [[ ! -d jfs_source ]] && git clone https://github.com/juicedata/juicefs.git jfs_source
    ./juicefs sync jfs_source/ /jfs/jfs_source/  --update --perms --check-all --bwlimit=81920 --dirs --threads=30 --list-threads=3 --debug
    echo "test" > jfs_source/test_file
    mkdir -p jfs_source/test_dir
    ./juicefs sync jfs_source/ /jfs/jfs_source/  --update --perms --check-all --bwlimit=81920 --dirs --threads=30 --list-threads=2 --debug
    find /jfs/jfs_source -type f -name ".*.tmp*" -delete
    diff -ur jfs_source/ /jfs/jfs_source
}


source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/sync/sync_cluster.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh
[[ -z "$CI" ]] && CI=false
[[ -z "$META" ]] && META=redis
[[ -z "$KEY_TYPE" ]] && KEY_TYPE=ed25519
[[ -z "$FILE_COUNT" ]] && FILE_COUNT=600
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)
dpkg -s gawk || .github/scripts/apt_install.sh gawk
start_minio(){
    if ! docker ps | grep "minio/minio"; then
        docker run -d -p 9000:9000 --name minio \
                -e "MINIO_ACCESS_KEY=minioadmin" \
                -e "MINIO_SECRET_KEY=minioadmin" \
                -v /tmp/data:/data \
                -v /tmp/config:/root/.minio \
                minio/minio server /data
        sleep 3s
    fi
    [ ! -x mc ] && wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc && chmod +x mc
    ./mc alias set myminio http://localhost:9000 minioadmin minioadmin || ./mc alias set myminio http://127.0.0.1:9000 minioadmin minioadmin
}
start_minio
start_worker(){
    if getent group juicedata ; then groupdel -f juicedata; echo delete juicedata group; fi
    if getent passwd juicedata ; then rm -rf /home/juicedata && userdel -f juicedata; echo delete juicedata user; fi
    groupadd juicedata && useradd -ms /bin/bash -g juicedata juicedata -u 1024
    if [ "$CI" != "true" ] && [ -f ~/.ssh/id_rsa ]; then
        echo "ssh key already exists, don't overwrite it in non ci environment"
    else
        echo "generating ssh key with type $KEY_TYPE"
        yes |sudo -u juicedata ssh-keygen -t $KEY_TYPE -C "default" -f /home/juicedata/.ssh/id_rsa -q -N ""
        chmod 600 /home/juicedata/.ssh/id_rsa
    fi
    cp -f /home/juicedata/.ssh/id_rsa.pub .github/scripts/ssh/id_rsa.pub
    docker build -t juicedata/ssh -f .github/scripts/ssh/Dockerfile .github/scripts/ssh
    docker rm worker1 worker2 -f
    docker compose -f .github/scripts/ssh/docker-compose.yml up -d
    sleep 3s
    sudo -u juicedata ssh -o BatchMode=yes -o StrictHostKeyChecking=no juicedata@172.20.0.2 exit
    sudo -u juicedata ssh -o BatchMode=yes -o StrictHostKeyChecking=no juicedata@172.20.0.3 exit
}
start_worker

sed -i 's/bind 127.0.0.1 ::1/bind 0.0.0.0 ::1/g' /etc/redis/redis.conf
systemctl restart redis
META_URL=$(echo $META_URL | sed 's/127\.0\.0\.1/172.20.0.1/g')
# github runner 22.04 will set /home/runner to 750, which make juicefs binary not accessed by other users.
chmod 755 /home/runner/

test_sync_without_mount_point(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    file_count=$FILE_COUNT
    mkdir -p /jfs/data
    for i in $(seq 1 $file_count); do
        dd if=/dev/urandom of=/jfs/data/file$i bs=1M count=1 status=none
    done
    dd if=/dev/urandom of=/jfs/data/file$file_count bs=1M count=1024
    (./mc rb myminio/data1 > /dev/null 2>&1 --force || true) && ./mc mb myminio/data1
    sudo -u juicedata meta_url=$META_URL ./juicefs sync -v jfs://meta_url/data/ minio://minioadmin:minioadmin@172.20.0.1:9000/data1/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-change \
         >sync.log 2>&1
    # diff data/ /jfs/data1/
    check_sync_log $file_count
    ./mc rm -r --force myminio/data1
}

test_sync_without_mount_point2(){
    prepare_test
    file_count=$FILE_COUNT
    rm -rf data/
    mkdir -p data/
    for i in $(seq 1 $file_count); do
        dd if=/dev/urandom of=data/file$i bs=1M count=1 status=none
    done
    dd if=/dev/urandom of=data/file$file_count bs=1M count=1024
    (./mc rb myminio/data > /dev/null 2>&1 --force || true) && ./mc mb myminio/data
    ./mc cp -r data myminio/data
    
    # (./mc rb myminio/data1 > /dev/null 2>&1 --force || true) && ./mc mb myminio/data1
    set -o pipefail
    sudo -u juicedata meta_url=$META_URL ./juicefs sync -v minio://minioadmin:minioadmin@172.20.0.1:9000/data/ jfs://meta_url/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-change \
         >sync.log 2>&1
    set +o pipefail
    check_sync_log $file_count
    set -o pipefail
    sudo -u juicedata meta_url=$META_URL ./juicefs sync -v  minio://minioadmin:minioadmin@172.20.0.1:9000/data/ jfs://meta_url/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 --start-time 2020-01-01 \
         --list-threads 10 --list-depth 5 --check-all \
         >sync.log 2>&1
    set +o pipefail
    ./juicefs mount -d $META_URL /jfs
    diff data/ /jfs/data/
    current_time=$(date -d "1 minute ago" "+%Y-%m-%d %H:%M:%S")
    for i in $(seq 1 $file_count); do
        dd if=/dev/urandom of=data/file$i bs=1M count=2 status=none
    done
    dd if=/dev/urandom of=data/file$file_count bs=1M count=10
    ./mc cp -r data myminio/data
    sleep 2
    set -o pipefail
    sudo -u juicedata meta_url=$META_URL ./juicefs sync  minio://minioadmin:minioadmin@172.20.0.1:9000/data/ jfs://meta_url/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 --start-time "$current_time" \
         --list-threads 10 --list-depth 5 --update \
         >sync.log 2>&1
    set +o pipefail
    diff data/ /jfs/data/
    ./mc rm -r --force myminio/data
    rm -rf data
    grep "panic:\|<FATAL>\|ERROR" sync.log && echo "panic or fatal or ERROR in sync.log" && exit 1 || true
}   

test_sync_delete_src_and_update(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    file_count=$FILE_COUNT
    rm -rf data
    mkdir -p data
    for i in $(seq 1 $file_count); do
        echo "test-$i" > data/test-$i
    done
    ./mc cp -r data myminio/data
    set -o pipefail
    sudo -u juicedata meta_url=$META_URL ./juicefs sync  minio://minioadmin:minioadmin@172.20.0.1:9000/data/ jfs://meta_url/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --dirs --check-change \
         >sync.log 2>&1
    set +o pipefail
    diff data/ /jfs/data/
    rm sync.log
    for i in $(seq 1 $file_count); do
        echo "test-update-$i" > data/test-$i
    done
    ./mc cp -r data myminio/data
    set -o pipefail
    sudo -u juicedata meta_url=$META_URL ./juicefs sync minio://minioadmin:minioadmin@172.20.0.1:9000/data/ jfs://meta_url/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --delete-src --update --dirs --check-change \
         >sync.log 2>&1
    set +o pipefail
    diff data/ /jfs/data/
    set -o pipefail
    sudo -u juicedata meta_url=$META_URL ./juicefs sync  minio://minioadmin:minioadmin@172.20.0.1:9000/data/ jfs://meta_url/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --delete-src --dirs --check-change \
         >sync.log 2>&1
    set +o pipefail
    if ./mc ls myminio/data/ | grep -q .; then
        echo "Error: MinIO bucket /data is not empty"
        exit 1
    fi
    diff data/ /jfs/data/
    grep "panic:\|<FATAL>\|ERROR" sync.log && echo "panic or fatal or ERROR in sync.log" && exit 1 || true
}

test_sync_delete_dst(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    file_count=$FILE_COUNT
    rm -rf data
    mkdir -p /jfs/data
    for i in $(seq 1 $file_count); do
        dd if=/dev/urandom of=/jfs/data/file$i bs=1M count=1 status=none
    done
    dd if=/dev/urandom of=/jfs/data/file$file_count bs=1M count=1024
    echo "retain" > /jfs/data/retain
    chmod -R 777 /jfs/data
    rm -rf empty && mkdir empty
    sudo -u juicedata meta_url=$META_URL ./juicefs sync --delete-dst --match-full-path --exclude='retain' --include='*' \
         ./empty/ jfs://meta_url/data/  --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-new \
         >sync.log 2>&1
    grep "panic:\|<FATAL>\|ERROR" sync.log && echo "panic or fatal in sync.log" && exit 1 || true
    [ ! -f /jfs/data/retain ] && echo "Error: retain file was incorrectly deleted" && exit 1 || true
}

test_sync_with_random_test(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs/test || true
    mkdir /jfs/test2 || true
    current_time=$(date -d "1 minute ago" "+%Y-%m-%d %H:%M:%S")
    ./random-test runOp -baseDir /jfs/test -files 100000 -ops 1000000 -threads 50 -dirSize 100 -duration 30s -createOp 30,uniform \
    -deleteOp 5,end --linkOp 10,uniform --symlinkOp 20,uniform --setXattrOp 10,uniform --truncateOp 10,uniform
    chmod -R 777 /jfs/test
    chmod -R 777 /jfs/test2
    sudo -u juicedata meta_url=$META_URL ./juicefs sync jfs://meta_url/test/ jfs://meta_url/test2/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-new --links --dirs --start-time "$current_time" \
         >sync.log 2>&1
    grep "panic:\|<FATAL>\|ERROR" sync.log && echo "panic or fatal in sync.log" && exit 1 || true
    sudo -u juicedata meta_url=$META_URL ./juicefs sync --delete-src --match-full-path jfs://meta_url/test/ jfs://meta_url/test2/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-all --links --start-time 2199-12-30 \
         >sync.log 2>&1
    grep "panic:\|<FATAL>\|ERROR" sync.log && echo "panic or fatal in sync.log" && exit 1 || true 
    sudo -u juicedata meta_url=$META_URL ./juicefs sync --delete-src --match-full-path jfs://meta_url/test/ jfs://meta_url/test2/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 --dirs \
         --list-threads 10 --list-depth 5 --check-all --links --start-time "$current_time" \
         >sync.log 2>&1
    grep "panic:\|<FATAL>\|ERROR" sync.log && echo "panic or fatal in sync.log" && exit 1 || true
    [ -z "$(ls -A /jfs/test)" ] || exit 1
    rm -rf empty || mkdir empty
    sudo -u juicedata meta_url=$META_URL ./juicefs sync --delete-dst --match-full-path  --include='*' \
         ./empty/ jfs://meta_url/test2/ --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-change --dirs --links --start-time "$current_time" \
         >sync.log 2>&1
    grep "panic:\|<FATAL>" sync.log && echo "panic or fatal in sync.log" && exit 1 || true
    [ -z "$(ls -A /jfs/test2)" ] || exit 1
}

test_sync_files_from_file(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs/test || true
    mkdir /jfs/test2 || true
    ./random-test runOp -baseDir /jfs/test -files 50000 -ops 500000 -threads 50 -dirSize 100 -duration 30s -createOp 30,uniform \
    -deleteOp 5,end --linkOp 10,uniform --symlinkOp 20,uniform --setXattrOp 10,uniform --truncateOp 10,uniform
    chmod -R 777 /jfs/test
    chmod -R 777 /jfs/test2
    ls /jfs/test > files | tee files
    sudo -u juicedata meta_url=$META_URL ./juicefs sync jfs://meta_url/test/ jfs://meta_url/test2/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-all --check-change --links --dirs --files-from files \
         >sync.log 2>&1
    grep "panic\|<FATAL>\|ERROR" sync.log && echo "panic or fatal or error in sync.log" && exit 1 || true
}

test_sync_chown_perms(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    mkdir /jfs/data
    for i in $(seq 1 $FILE_COUNT); do
        mkdir /jfs/data/test$i
        dd if=/dev/urandom of=/jfs/data/test$i/file$i bs=1M count=1 status=none
    done
    sudo chown 1000:1000 /jfs/data -R
    sudo chmod -R 777 /jfs/data
    sudo -u juicedata meta_url=$META_URL ./juicefs sync jfs://meta_url/data/ jfs://meta_url/data2/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-all --links --dirs --perms \
         >sync.log 2>&1
    grep "panic\|<FATAL>\|ERROR" sync.log && echo "panic or fatal or error in sync.log" && exit 1 || true
    diff /jfs/data/ /jfs/data2/
}

skip_test_sync_between_oss(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    mkdir -p /jfs/test
    file_count=$FILE_COUNT
    for i in $(seq 1 $file_count); do
        dd if=/dev/urandom of=/jfs/file$i bs=1M count=1 status=none
    done
    start_gateway
    sudo -u juicedata ./juicefs sync -v minio://minioadmin:minioadmin@172.20.0.1:9005/myjfs/ \
         minio://minioadmin:minioadmin@172.20.0.1:9000/myjfs/ \
        --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
        --list-threads 10 --list-depth 5 \
        > sync.log 2>&1
    count1=$(./mc ls myminio/myjfs/test -r | wc -l)
    count2=$(./mc ls juicegw/myjfs/test -r | awk '$4=="5MiB"' | wc -l)
    if [ "$count1" != "$count2" ]; then
        echo "count not equal, $count1, $count2"
        exit 1
    fi
    check_sync_log $file_count
}

test_sync_worker_down(){
    prepare_test
    ./juicefs mount -d $META_URL /jfs
    file_count=$FILE_COUNT 
    mkdir -p /jfs/data
    for i in $(seq 1 $file_count); do
        echo "test-$i" > /jfs/data/test-$i
    done
    docker stop worker1
    sudo -u juicedata meta_url=$META_URL ./juicefs sync jfs://meta_url/data/ jfs://meta_url/data2/ \
         --manager-addr 172.20.0.1:8081 --worker juicedata@172.20.0.2,juicedata@172.20.0.3 \
         --list-threads 10 --list-depth 5 --check-new \
         >sync.log 2>&1
    diff /jfs/data/ /jfs/data2/
    docker start worker1
}

check_sync_log(){
    grep "panic:\|<FATAL>" sync.log && echo "panic or fatal in sync.log" && exit 1 || true
    file_count=$1
    if tail -1 sync.log | grep -q "close session"; then
      file_copied=$(tail -n 3 sync.log | head -n 1  | sed 's/.*copied: \([0-9]*\).*/\1/' )
    else
      file_copied=$(tail -1 sync.log  | sed 's/.*copied: \([0-9]*\).*/\1/' )
    fi
    if [ "$file_copied" != "$file_count" ]; then
        echo "file_copied not equal, $file_copied, $file_count"
        exit 1
    fi
    count2=$(cat sync.log | grep 172.20.0.2 | grep "receive stats" | gawk '{sum += gensub(/.*Copied:([0-9]+).*/, "\\1", "g");} END {print sum;}')
    [ -z "$count2" ] && count2=0
    count3=$(cat sync.log | grep 172.20.0.3 | grep "receive stats" | gawk '{sum += gensub(/.*Copied:([0-9]+).*/, "\\1", "g");} END {print sum;}')
    [ -z "$count3" ] && count3=0
    count1=$((file_count - count2 - count3))
    echo "count1, $count1, count2, $count2, count3, $count3"
    min_count=10
    # check if count1 is less than min_count
    if [ "$count1" -lt "$min_count" ] || [ "$count2" -lt "$min_count" ] || [ "$count3" -lt "$min_count" ]; then
        echo "count is less than min_count, $count1, $count2, $count3, $min_count"
        exit 1
    fi
}

prepare_test(){
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs
    rm -rf /var/jfsCache/myjfs
    (./mc rb myminio/myjfs > /dev/null 2>&1 --force || true) && ./mc mb myminio/myjfs
    ./juicefs format $META_URL myjfs --storage minio --access-key minioadmin --secret-key minioadmin --bucket http://172.20.0.1:9000/myjfs
}
start_gateway(){
    lsof -i :9005 | awk 'NR!=1 {print $2}' | xargs -r kill -9
    MINIO_ROOT_USER=minioadmin MINIO_ROOT_PASSWORD=minioadmin ./juicefs gateway $META_URL 172.20.0.1:9005 &
    ./mc alias set juicegw http://172.20.0.1:9005 minioadmin minioadmin --api S3v4
}


source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/sync/sync_fsrand.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh
[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META
META_URL=$(get_meta_url $META)

[[ -z "$SEED" ]] && SEED=$(date +%s)
[[ -z "$DERANDOMIZE" ]] && DERANDOMIZE=false
[[ -z "$MAX_EXAMPLE" ]] && MAX_EXAMPLE=100
[[ -z "$GOCOVERDIR" ]] && GOCOVERDIR=/tmp/cover
[[ -z "$USER" ]] && USER=root
if [ ! -d "$GOCOVERDIR" ]; then
    mkdir -p $GOCOVERDIR
fi
trap "echo random seed is $SEED" EXIT
SOURCE_DIR1=/tmp/fsrand1/
SOURCE_DIR2=/tmp/fsrand2/
DEST_DIR1=/tmp/jfs/fsrand1/
DEST_DIR2=/tmp/jfs/fsrand2/

rm $SOURCE_DIR1 -rf && sudo -u $USER mkdir $SOURCE_DIR1
rm $SOURCE_DIR2 -rf && sudo -u $USER mkdir $SOURCE_DIR2
EXCLUDE_RULES="utime"
PROFILE=generate EXCLUDE_RULES=$EXCLUDE_RULES MAX_EXAMPLE=$MAX_EXAMPLE SEED=$SEED ROOT_DIR1=$SOURCE_DIR1 ROOT_DIR2=$SOURCE_DIR2 python3 .github/scripts/hypo/fs.py || true
prepare_test()
{
    umount_jfs /tmp/jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs || true
    rm -rf /var/jfsCache/myjfs || true
    ./juicefs format $META_URL myjfs
}

test_cmp_cp(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option="--dirs --perms --check-all --links --list-threads 10 --list-depth 5"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option 2>&1| tee sync.log || true
    do_copy $sync_option
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_cmp_cp_without_perms(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option="--dirs --check-all --links --list-threads 10 --list-depth 5"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option 2>&1| tee sync.log || true
    do_copy $sync_option
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_cmp_cp_without_links(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option="--dirs --check-all --perms --list-threads 10 --list-depth 5"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option 2>&1| tee sync.log || true
    do_copy $sync_option
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_no_mount_point(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option="--dirs --perms --check-all --links --list-threads 10 --list-depth 5"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option 2>&1| tee sync1.log || true
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync -v $SOURCE_DIR1 jfs://meta_url/fsrand2/ $sync_option 2>&1| tee sync2.log || true
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_inplace(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option1="--dirs --perms --check-all --links --list-threads 10 --list-depth 5"
    sync_option2="--dirs --perms --check-all --links --list-threads 10 --list-depth 5 --inplace"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync -v $SOURCE_DIR1 jfs://meta_url/fsrand1/ $sync_option1 2>&1| tee sync1.log || true
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync -v $SOURCE_DIR1 jfs://meta_url/fsrand2/ $sync_option2 2>&1| tee sync2.log || true
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_list_threads(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option1="--dirs --perms --check-all --links --list-threads 10 --list-depth 5"
    sync_option2="--dirs --perms --check-all --links"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option1 2>&1| tee sync1.log || true
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR2 $sync_option2 2>&1| tee sync2.log || true
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_update(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option="--dirs --perms --check-all --links --list-threads 10 --list-depth 5"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option 2>&1| tee sync.log || true
    do_copy $sync_option
    check_diff $DEST_DIR1 $DEST_DIR2
    
    sudo -u $USER PROFILE=generate EXCLUDE_RULES=$EXCLUDE_RULES MAX_EXAMPLE=$MAX_EXAMPLE SEED=$SEED ROOT_DIR1=$SOURCE_DIR1 ROOT_DIR2=$SOURCE_DIR2 python3 .github/scripts/hypo/fs.py || true
    # chmod 777 $SOURCE_DIR1
    # chmod 777 $SOURCE_DIR2
    do_copy $sync_option
    for i in {1..5}; do
        sync_option+=" --update --delete-dst"
        echo sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync $SOURCE_DIR1 jfs://meta_url/fsrand1/ $sync_option
        sudo -u $USER GOCOVERDIR=$GOCOVERDIR meta_url=$META_URL ./juicefs sync $SOURCE_DIR1 jfs://meta_url/fsrand1/ $sync_option 2>&1| tee sync.log || true
        if grep -q "Failed to delete" sync.log; then
            echo "failed to delete, retry sync"
        else
            echo "sync delete success"
            break
        fi
    done
    diff -ur --no-dereference $DEST_DIR1 $DEST_DIR2
}

test_files_from(){
    prepare_test
    ./juicefs mount $META_URL /tmp/jfs -d
    sync_option1="--dirs --perms --check-all --links --list-threads 10 --list-depth 5"
    sync_option2="--dirs --perms --check-all --links --list-threads 10 --list-depth 5 --files-from files"
    ls -A "$SOURCE_DIR1" | while read file; do 
      full_path="$SOURCE_DIR1/$file"
      if [ -L "$full_path" ] && [ ! -e "$full_path" ]; then
        rm "$full_path"
      else
        echo "$file"
      fi 
    done > files
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option1 2>&1| tee sync1.log || true
    SOURCE_PERM=$(sudo stat -c "%a" "$DEST_DIR1")
    SOURCE_OWNER=$(sudo stat -c "%U" "$DEST_DIR1")
    SOURCE_GROUP=$(sudo stat -c "%G" "$DEST_DIR1")
    sudo mkdir -p $DEST_DIR2
    sudo chmod $SOURCE_PERM $DEST_DIR2
    sudo chown $SOURCE_OWNER:$SOURCE_GROUP $DEST_DIR2
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR2 $sync_option2 2>&1| tee sync2.log || true
    check_diff $DEST_DIR1 $DEST_DIR2
}

test_check_change(){
    prepare_test
   ./juicefs mount $META_URL /tmp/jfs -d
    sync_option="--dirs --check-change --links --perms --list-threads 10 --list-depth 5"
    sudo -u $USER GOCOVERDIR=$GOCOVERDIR ./juicefs sync -v $SOURCE_DIR1 $DEST_DIR1 $sync_option 2>&1| tee sync.log || true
    do_copy $sync_option
    check_diff $DEST_DIR1 $DEST_DIR2
}

do_copy(){
    local sync_option=$@
    local preserve="timestamps"
    local no_preserve=""
    if [[ "$sync_option" =~ "--perms" ]]; then
        preserve+=",mode,ownership"
    else
        no_preserve+="mode,ownership"
    fi
    if [[ "$sync_option" =~ "--links" ]]; then
       preserve+=",links"
    fi
    local cp_option="--recursive --preserve=$preserve"
    if [[ -n "$no_preserve" ]]; then
        cp_option+=" --no-preserve=$no_preserve"
    fi
    if [[ "$sync_option" =~ "--links" ]]; then
        cp_option+=" --no-dereference"
    else
        cp_option+=" --dereference"
    fi
    rm -rf $DEST_DIR2 
    sudo -u $USER cp  $SOURCE_DIR1 $DEST_DIR2 $cp_option || true
    echo sudo -u $USER cp  $SOURCE_DIR1 $DEST_DIR2 $cp_option
}

check_diff(){
    local dir1=$1
    local dir2=$2
    diff -ur --no-dereference $dir1 $dir2
    pushd . && diff <(cd $dir1 && find . -printf "%p:%m:%u:%g:%y\n" | sort) <(cd $dir2 && find . -printf "%p:%m:%u:%g:%y\n" | sort) && popd
    if [ $? -ne 0 ]; then
        echo "permission or owner or group not equal"
        exit 1
    fi
    # pushd . && diff <(cd $dir1 && find . ! -type d -printf "%p:%.23T+\n" | sort) <(cd $dir2 && find . ! -type d -printf "%p:%.23T+\n" | sort) && popd
    # if [ $? -ne 0 ]; then
    #     echo "mtime not equal"
    #     exit 1
    # fi
    # TODO: uncomment this after xattr is supported
    # pushd . && diff <(cd $dir1 && find . -exec getfattr -dm- {} + | sort) <(cd $dir2 && find . -exec getfattr -dm- {} + | sort) && popd
    # if [ $? -ne 0 ]; then
    #     echo "xattr not equal"
    #     exit 1
    # fi
    echo "check diff success"
}


source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/sync/sync_minio.sh
================================================
#!/bin/bash -e
source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
source .github/scripts/start_meta_engine.sh
start_meta_engine $META minio
META_URL=$(get_meta_url $META)

test_sync_small_files(){
    prepare_test
    ./juicefs mdtest $META_URL /test --dirs 10 --depth 3 --files 5 --threads 10
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ --list-threads 100 --list-depth 10
    count1=$(./mc ls -r juicegw/myjfs/ | wc -l)
    count2=$(./mc ls -r myminio/myjfs/ | wc -l)
    [ $count1 -eq $count2 ]
}

test_sync_big_file_with_jfs(){
    prepare_test
    [[ ! -f "/tmp/bigfile" ]] && dd if=/dev/urandom of=/tmp/bigfile bs=1M count=1024
    ./mc cp /tmp/bigfile myminio/myjfs/bigfile
    export dst_jfs=$META_URL 
    timeout 10 ./juicefs sync minio://minioadmin:minioadmin@localhost:9000/myjfs/bigfile jfs://dst_jfs/bigfile --threads=64 --force-update
    cmp /tmp/bigfile /jfs/bigfile
}

test_sync_big_file(){
    prepare_test
    dd if=/dev/urandom of=/tmp/bigfile bs=1M count=1024
    cp /tmp/bigfile /jfs/bigfile
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    ./mc cp myminio/myjfs/bigfile /tmp/bigfile2
    cmp /tmp/bigfile /tmp/bigfile2
}

test_sync_with_limit(){
    prepare_test
    ./juicefs mdtest $META_URL /test --dirs 10 --depth 2 --files 5 --threads 10
    ./juicefs sync --limit 1000 minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    count=$(./mc ls myminio/myjfs -r | wc -l)
    echo count is $count
    [ $count -eq 1000 ]
}
test_sync_with_existing(){
    prepare_test
    echo abc > /jfs/abc
    ./juicefs sync --existing minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    ./mc find myminio/myjfs/abc && echo "myminio/myjfs/abc should not exist" && exit 1 || true
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    ./mc find myminio/myjfs/abc
}
test_sync_with_update(){
    prepare_test
    echo abc > /jfs/abc
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    echo def > def
    ./mc cp def myminio/myjfs/abc
    ./juicefs sync --update minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    ./mc cat myminio/myjfs/abc | grep def || (echo "content should be def" && exit 1)
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    ./mc cat myminio/myjfs/abc | grep def || (echo "content should be def" && exit 1)
    ./juicefs sync --force-update minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    ./mc cat myminio/myjfs/abc | grep abc || (echo "content should be abc" && exit 1)
    echo hijk > hijk
    ./mc cp hijk myminio/myjfs/abc
    ./juicefs sync --update minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    ./mc cat myminio/myjfs/abc | grep hijk || (echo "content should be hijk" && exit 1)
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    ./mc cat myminio/myjfs/abc | grep abc || (echo "content should be abc" && exit 1)
}

test_sync_hard_link(){
    prepare_test
    echo abc > /jfs/abc
    ln /jfs/abc /jfs/def
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/ 
    ./mc cat myminio/myjfs/def | grep abc || (echo "content should be abc" && exit 1)
    echo abcd > /jfs/abc
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    ./mc cat myminio/myjfs/def | grep abcd || (echo "content should be abcd" && exit 1)
}

test_sync_external_link(){
    prepare_test
    touch hello
    ln -s $(realpath hello) /jfs/hello
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    [ -z $(./mc cat myminio/myjfs/hello) ]
}

# list object should be skipped when encountering a loop symlink
test_sync_loop_symlink(){
    prepare_test
    touch hello
    ln -s hello /jfs/hello
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    rm -rf /jfs/hello
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
}

test_sync_deep_symlink(){
    prepare_test
    cd /jfs
    echo hello > hello
    ln -s hello symlink_1
    for i in {1..40}; do
        ln -s symlink_$i symlink_$((i+1))
    done
    cat symlink_40 | grep hello
    cat symlink_41 && echo "cat symlink_41 fail" && exit 1 || true
    cd -
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/ minio://minioadmin:minioadmin@localhost:9000/myjfs/
    for i in {1..40}; do
        ./mc cat myminio/myjfs/symlink_$i | grep "^hello$"
    done
}

test_sync_list_object_symlink(){
    prepare_test
    cd /jfs
    mkdir dir1
    mkdir -p dir2/src_dir
    echo abc > dir2/src_dir/afile
    ln -s ./../dir2/src_dir dir1/symlink_dir
    cd -
    ./juicefs sync minio://minioadmin:minioadmin@localhost:9005/myjfs/dir1/ minio://minioadmin:minioadmin@localhost:9000/myjfs/dir3/
    ./mc cat myminio/myjfs/dir3/symlink_dir/afile | grep abc || (echo "content should be abc" && exit 1)
}

prepare_test(){
    umount_jfs /jfs $META_URL
    python3 .github/scripts/flush_meta.py $META_URL
    rm -rf /var/jfs/myjfs
    rm -rf /var/jfsCache/myjfs
    (./mc rb myminio/myjfs > /dev/null 2>&1 --force || true) && ./mc mb myminio/myjfs
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL /jfs
    lsof -i :9005 | awk 'NR!=1 {print $2}' | xargs -r kill -9 || true
    MINIO_ROOT_USER=minioadmin MINIO_ROOT_PASSWORD=minioadmin ./juicefs gateway $META_URL localhost:9005 &
    wait_gateway_ready
    ./mc alias set juicegw http://localhost:9005 minioadmin minioadmin --api S3v4
}

wait_gateway_ready(){
    timeout=30
    for i in $(seq 1 $timeout); do
        if [[ -z $(lsof -i :9005) ]]; then
            echo "$i Waiting for port 9005 to be ready..."
            sleep 1
        else
            echo "gateway is now ready on port 9005"
            break
        fi
    done
    if [[ -z $(lsof -i :9005) ]]; then
        echo "gateway is not ready after $timeout seconds"
        exit 1
    fi
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/test-mac/mac_commands.sh
================================================
#!/bin/bash -e

source .github/scripts/common/common.sh
source .github/scripts/test-mac/start_meta_engine.sh


[[ -z "$META" ]] && META=redis
start_meta_engine $META
META_URL=$(get_meta_url $META)
user=$(whoami)
mount_point="/Users/$user/jfs"
HEARTBEAT_INTERVAL=3
HEARTBEAT_SLEEP=3
DIR_QUOTA_FLUSH_INTERVAL=4
VOLUME_QUOTA_FLUSH_INTERVAL=2

wget https://dl.min.io/client/mc/release/darwin-amd64/archive/mc.RELEASE.2021-04-22T17-40-00Z -O mc
chmod +x mc
export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=admin123
export MINIO_REFRESH_IAM_INTERVAL=10s

[[ ! -f my-priv-key.pem ]] && openssl genrsa -out my-priv-key.pem -aes256  -passout pass:12345678 2048


skip_test_modify_acl_config()
{
    prepare_test
    ./juicefs format $META_URL myjfs --trash-days 0
    ./juicefs mount -d $META_URL $mount_point
    touch $mount_point/test
    sudo chmod +a "$user allow read,write" $mount_point/test && echo "setfacl should failed" && exit 1
    ./juicefs config $META_URL --enable-acl=true
    ./juicefs umount $mount_point
    sleep 2
    ./juicefs mount -d $META_URL $mount_point
    sudo chmod +a "$user allow read,write" $mount_point/test
    ./juicefs config $META_URL --enable-acl
    umount_jfs $mount_point $META_URL
    ./juicefs mount -d $META_URL $mount_point
    sudo chmod +a "$user allow read,write" $mount_point/test
    ./juicefs config $META_URL --enable-acl=false && echo "should not disable acl" && exit 1 || true 
    ./juicefs config $META_URL | grep EnableACL | grep "true" || (echo "EnableACL should be true" && exit 1) 
}

test_clone_with_jfs_source()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point
    [[ ! -d $mount_point/juicefs ]] && git clone https://github.com/juicedata/juicefs.git $mount_point/juicefs --depth 1
    do_clone true
    do_clone false
}

do_clone()
{
    is_preserve=$1
    rm -rf $mount_point/juicefs1
    rm -rf $mount_point/juicefs2
    [[ "$is_preserve" == "true" ]] && preserve="-p" || preserve=""
    cp -r $preserve $mount_point/juicefs $mount_point/juicefs1
    ./juicefs clone $mount_point/juicefs $mount_point/juicefs2 --preserve
    diff -r $mount_point/juicefs1 $mount_point/juicefs2
    cd $mount_point/juicefs1/ && find . -exec stat -f "%p %u %g %N" {} \; | sort >/tmp/log1 && cd -
    cd $mount_point/juicefs2/ && find . -exec stat -f "%p %u %g %N" {} \; | sort >/tmp/log2 && cd -
    diff /tmp/log1 /tmp/log2
}

check_debug_file(){
   files=("system-info.log" "juicefs.log" "config.txt" "stats.txt" "stats.5s.txt" "pprof")
   debug_dir="debug"
   if [ ! -d "$debug_dir" ]; then
    echo "error:no debug dir"
    exit 1
   fi
   all_files_exist=true
   for file in "${files[@]}"; do
     exist=`find "$debug_dir" -name $file | wc -l`
     if [ "$exist" == 0 ]; then
        echo "no $file"
        all_files_exist=false
     fi
   done
   if [ "$all_files_exist" = true ]; then
    echo "pass"
   else
    exit 1
   fi
}

test_debug_juicefs(){
    ./juicefs format $META_URL myjfs 
    ./juicefs mount -d $META_URL $mount_point
    dd if=/dev/urandom of=$mount_point/bigfile bs=1M count=128
    ./juicefs debug $mount_point/
    check_debug_file
    ./juicefs rmr $mount_point/bigfile
}

test_sync_dir_stat()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point
    ./juicefs mdtest $META_URL /d --depth 15 --dirs 2 --files 100 --threads 10 & 
    pid=$!
    sleep 10
    kill -9 $pid
    pkill -P "$pid" 2>/dev/null || true
    ./juicefs info -r $mount_point/d
    ./juicefs info -r $mount_point/d --strict 
    ./juicefs fsck $META_URL --path /d --sync-dir-stat --repair -r
    ./juicefs info -r $mount_point/d | tee info1.log
    ./juicefs info -r $mount_point/d --strict | tee info2.log
    diff info1.log info2.log
    rm info*.log
    ./juicefs fsck $META_URL --path / --sync-dir-stat --repair -r
    ./juicefs info -r $mount_point | tee info1.log
    ./juicefs info -r $mount_point --strict | tee info2.log
    diff info1.log info2.log
}

test_gc_trash_slices(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point
    PATH1=/tmp/test PATH2=$mount_point/test python3 .github/scripts/random_read_write.py 
    ./juicefs status --more $META_URL
    ./juicefs config $META_URL --trash-days 0 --yes
    ./juicefs gc $META_URL 
    ./juicefs gc $META_URL --delete
    ./juicefs status --more $META_URL
}

test_update_non_fuse_option(){
    prepare_test
    JFS_RSA_PASSPHRASE=12345678 ./juicefs format $META_URL myjfs --encrypt-rsa-key my-priv-key.pem
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL $mount_point
    echo abc | tee $mount_point/test
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL $mount_point --read-only
    echo abc | tee $mount_point/test && (echo "should not write read-only file system" && exit 1) || true
    JFS_RSA_PASSPHRASE=12345678 ./juicefs mount -d $META_URL $mount_point 
    echo abc | tee $mount_point/test
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l | tr -d ' ')
    [[ $count -ne 2 ]] && echo "mount process count should be 2, count=$count" && exit 1 || true
    umount $mount_point
    sleep 2
    ps -ef | grep juicefs | grep mount | grep -v grep || true
    count=$(ps -ef | grep juicefs | grep mount | grep -v grep | wc -l | tr -d ' ')
    [[ $count -ne 0 ]] && echo "mount process count should be 0, count=$count" && exit 1 || true
}

test_info_big_file(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point
    dd if=/dev/zero of=$mount_point/bigfile bs=1M count=4096
    ./juicefs info $mount_point/bigfile
    ./juicefs rmr $mount_point/bigfile
    df -h $mount_point
}

test_list_large_dir()
{
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point
    local files_count=100000
    if [[ "$META_URL" == redis://* ]]; then
        files_count=130000
    fi
    ./juicefs mdtest $META_URL /test --depth 0 --dirs 1 --files $files_count --threads 1
    du $mount_point/test & du_pid=$!
    sleep 2
    kill -INT $du_pid || true
    wait $du_pid || true
    if ! [ -d "$mount_point/test" ]; then
        echo >&2 "<FATAL>: directory $mount_point/test is not accessible after ls interruption"
        exit 1
    fi
}

test_total_inodes(){
    prepare_test
    ./juicefs format $META_URL myjfs --inodes 1000
    ./juicefs mount -d $META_URL $mount_point --heartbeat $HEARTBEAT_INTERVAL
    set +x
    for i in {1..1000}; do
        echo $i | tee $mount_point/test$i > /dev/null
    done
    set -x
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee $mount_point/test1001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
    grep "No space left on device" error.log
    ./juicefs config $META_URL --inodes 2000
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    set +x
    for i in {1001..2000}; do
        echo $i | tee $mount_point/test$i > /dev/null || (df -i $mount_point && ls $mount_point/ -l | wc -l  && exit 1)
    done
    set -x
    sleep $VOLUME_QUOTA_FLUSH_INTERVAL
    echo a | tee $mount_point/test2001 2>error.log && echo "write should fail on out of inodes" && exit 1 || true
}

test_remove_and_restore(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p $mount_point/d
    ./juicefs quota set $META_URL --path /d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=$mount_point/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota get $META_URL --path /d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    echo a | tee -a $mount_point/d/test2 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep -i "Disc quota exceeded" error.log || (echo "grep failed" && exit 1)

    echo "remove test1" && rm -rf $mount_point/d/test1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota get $META_URL --path /d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "0%" ]] && echo "used should be 0%" && exit 1 || true

    trash_dir=$(ls $mount_point/.trash)
    sudo ./juicefs restore $META_URL $trash_dir --put-back
    ./juicefs quota get $META_URL --path /d 2>&1 | tee quota.log
    used=$(cat quota.log | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    echo a | tee -a $mount_point/d/test2 2>error.log && echo "write should fail on out of space" && exit 1 || true
    grep -i "Disc quota exceeded" error.log || (echo "grep failed" && exit 1)

    echo "remove test1" && rm -rf $mount_point/d/test1
    dd if=/dev/zero of=$mount_point/d/test2 bs=1M count=1
    trash_dir=$(ls $mount_point/.trash)
    sudo ./juicefs restore $META_URL $trash_dir --put-back 2>&1 | tee restore.log
    grep "disc quota exceeded" restore.log || (echo "check restore log failed" && exit 1)
}

test_dir_capacity(){
    prepare_test
    ./juicefs format $META_URL myjfs
    ./juicefs mount -d $META_URL $mount_point --heartbeat $HEARTBEAT_INTERVAL
    mkdir -p $mount_point/d
    ./juicefs quota set $META_URL --path /d --capacity 1
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=$mount_point/d/test1 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota get $META_URL --path /d
    used=$(./juicefs quota get $META_URL --path /d 2>&1 | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "100%" ]] && echo "used should be 100%" && exit 1 || true
    echo a | tee -a $mount_point/d/test2 2>error.log && echo "echo should fail on out of space" && exit 1 || true
    grep -i "Disc quota exceeded" error.log || (echo "grep failed" && exit 1)

    ./juicefs quota set $META_URL --path /d --capacity 2
    sleep $((HEARTBEAT_INTERVAL+HEARTBEAT_SLEEP))
    dd if=/dev/zero of=$mount_point/d/test2 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    echo a | tee -a $mount_point/d/test3 2>error.log && echo "echo should fail on out of space" && exit 1 || true
    grep -i "Disc quota exceeded" error.log || (echo "grep failed" && exit 1)
    rm -rf $mount_point/d/test1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    used=$(./juicefs quota get $META_URL --path /d 2>&1 | grep "/d" | awk -F'|' '{print $5}'  | tr -d '[:space:]')
    [[ $used != "50%" ]] && echo "used should be 50%" && exit 1 || true
    dd if=/dev/zero of=$mount_point/d/test3 bs=1G count=1
    sleep $DIR_QUOTA_FLUSH_INTERVAL
    ./juicefs quota check $META_URL --path /d --strict
}

kill_gateway() {
    port=$1
    lsof -i:$port || true
    lsof -t -i :$port | xargs -r kill -9 || true
}

trap 'kill_gateway 9001; kill_gateway 9002' EXIT

start_two_gateway()
{  
    kill_gateway 9001
    kill_gateway 9002
    prepare_test
    ./juicefs format $META_URL myjfs  --trash-days 0
    ./juicefs mount -d $META_URL $mount_point
    export MINIO_ROOT_USER=admin
    export MINIO_ROOT_PASSWORD=admin123
    ./juicefs gateway $META_URL 127.0.0.1:9001 --multi-buckets --keep-etag --object-tag -background
    sleep 1
    ./juicefs gateway $META_URL 127.0.0.1:9002 --multi-buckets --keep-etag --object-tag -background
    sleep 2
    ./mc alias set gateway1 http://127.0.0.1:9001 admin admin123
    ./mc alias set gateway2 http://127.0.0.1:9002 admin admin123
}

test_user_management()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    sleep 12
    user=$(./mc admin user list gateway2 | grep user1) || true
    if [ -z "$user" ]
    then
      echo "user synchronization error"
      exit 1
    fi
    ./mc mb gateway1/test1
    ./mc alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    if ./mc cp mc gateway1_user1/test1/file1
    then
      echo "By default, the user has no read and write permission"
      exit 1
    fi
    ./mc admin policy set gateway1 readwrite user=user1
    if ./mc cp mc gateway1_user1/test1/file1
    then 
      echo "readwrite policy can read and write objects" 
    else
      echo "set readwrite policy fail"
      exit 1
    fi
    ./mc cp gateway2/test1/file1 .
    compare_md5sum file1 mc  
    ./mc admin user disable gateway1 user1
    ./mc admin user remove gateway2 user1
    sleep 12
    user=$(./mc admin user list gateway1 | grep user1) || true
    if [ ! -z "$user" ]
    then
      echo "remove user user1 fail"
      echo $user
      exit 1
    fi
}

test_group_management()
{
    prepare_test
    start_two_gateway
    ./mc admin user add gateway1 user1 admin123
    ./mc admin user add gateway1 user2 admin123
    ./mc admin user add gateway1 user3 admin123
    ./mc admin group add gateway1 testcents user1 user2 user3
    result=$(./mc admin group info gateway1 testcents | grep Members |awk '{print $2}') || true
    if [ "$result" != "user1,user2,user3" ]
    then
      echo "error,result is '$result'"
      exit 1
    fi
    ./mc admin policy set gateway1 readwrite group=testcents
    sleep 5
    ./mc alias set gateway1_user1 http://127.0.0.1:9001 user1 admin123
    ./mc mb gateway1/test1
    if ./mc cp mc gateway1_user1/test1/file1
    then
      echo "readwrite policy can read write"
    else
      echo "the readwrite group has no read and write permission"
      exit 1
    fi
    ./mc admin policy set gateway1 readonly group=testcents
    sleep 5
    if ./mc cp mc gateway1_user1/test1/file1
    then
      echo "readonly group policy can not write"
      exit 1
    else
      echo "the readonly group has no write permission"
    fi

    ./mc admin group remove gateway1 testcents user1 user2 user3 
    ./mc admin group remove gateway1 testcents
}

source .github/scripts/common/run_test.sh && run_test $@


================================================
FILE: .github/scripts/test-mac/start_meta_engine.sh
================================================
#!/bin/bash -e

REDIS_CSC_QUERY="client-cache=true&client-cache-size=500&client-cache-expire=60s&client-cache-preload=100"

# Helper function to install packages via Homebrew
brew_install() {
    if ! brew list "$1" &>/dev/null; then
        echo "Installing $1..."
        brew install "$1"
    fi
}

start_redis() {
    if pgrep redis-server >/dev/null; then
        echo "Redis is already running"
        return 0
    fi

    if brew services start redis 2>/dev/null; then
        echo "Redis started via brew services"
    elif [ -f /usr/local/bin/redis-server ]; then
        echo "Starting Redis directly..."
        /usr/local/bin/redis-server /usr/local/etc/redis.conf &
    else
        echo "Failed to start Redis"
        return 1
    fi

    sleep 2
    if ! pgrep redis-server >/dev/null; then
        echo "Redis failed to start"
        return 1
    fi
}

clean_minio() {
    if command -v mc >/dev/null; then
        mc ls local/ 2>/dev/null | awk '{print $5}' | while read -r bucket; do
            if [ -n "$bucket" ]; then
                echo "Cleaning bucket: $bucket"
                mc rb --force local/"$bucket" 2>/dev/null || true
            fi
        done
    fi
}

start_minio() {
    if ! command -v minio >/dev/null; then
        brew_install minio/stable/minio
    fi
    
    if ! command -v mc >/dev/null; then
        brew_install minio/stable/mc
    fi

    clean_minio
    
    if ! pgrep minio >/dev/null; then
        mkdir -p /tmp/data
        rm -rf /tmp/data/*
        minio server /tmp/data --console-address :9001 &
        sleep 3
    fi

    mc alias set local http://127.0.0.1:9000 minioadmin minioadmin || true
    
    mc mb local/jfs || true
    mc mb local/test || true
}

start_meta_engine() {
    local meta=$1
    local storage=$2

    case "$meta" in
        redis)
            brew_install redis
            if ! start_redis; then
                echo >&2 "Failed to start Redis"
                return 1
            fi
            ;;
        sqlite3)
            brew_install sqlite3
            echo "SQLite3 ready to use"
            ;;
        *)
            echo >&2 "<FATAL>: Unsupported meta engine: $meta"
            return 1
            ;;
    esac

    if [ "$storage" = "minio" ]; then
        if ! start_minio; then
            echo >&2 "Failed to start MinIO"
            return 1
        fi
    fi
}

get_meta_url() {
    case "$1" in
        redis) echo "redis://127.0.0.1:6379/1?${REDIS_CSC_QUERY}" ;;
        sqlite3) echo "sqlite3://test.db" ;;
        *)     echo >&2 "<FATAL>: Unsupported meta: $1"; return 1 ;;
    esac
}

get_meta_url2() {
    case "$1" in
        redis) echo "redis://127.0.0.1:6379/2?${REDIS_CSC_QUERY}" ;;
        sqlite3) echo "sqlite3://test2.db" ;;
        *)     echo >&2 "<FATAL>: Unsupported meta: $1"; return 1 ;;
    esac
}

retry() {
    local retries=5
    local delay=3
    local exit=0

    for i in $(seq 1 "$retries"); do
        if "$@"; then
            return 0
        else
            exit=$?
            if [ "$i" -eq "$retries" ]; then
                return "$exit"
            fi
            sleep "$delay"
        fi
    done
}

================================================
FILE: .github/scripts/testVersionCompatible.py
================================================
import subprocess
try:
    __import__("hypothesis")
except ImportError:
    subprocess.check_call(["pip", "install", "hypothesis"])
from datetime import datetime
import json
import os
from pickle import FALSE
import platform
import shutil
import sys
from termios import TIOCPKT_DOSTOP
import time
import unittest
from xmlrpc.client import boolean
import hypothesis
from hypothesis.stateful import rule, precondition, RuleBasedStateMachine
from hypothesis import Phase, Verbosity, assume, strategies as st
from hypothesis import seed
from packaging import version
import subprocess
try:
    __import__("minio")
except ImportError:
    subprocess.check_call(["pip", "install", "minio"])
from minio import Minio
import uuid
from utils import *
from fsrand import *
from cmptree import *
import random

@seed(random.randint(10000, 1000000))
@hypothesis.settings(
    verbosity=Verbosity.debug, 
    max_examples=100, 
    stateful_step_count=30, 
    deadline=None, 
    report_multiple_bugs=False, 
    phases=[Phase.explicit, Phase.reuse, Phase.generate, Phase.target, Phase.shrink, Phase.explain])
class JuicefsMachine(RuleBasedStateMachine):
    MIN_CLIENT_VERSIONS = ['0.0.1', '0.0.17','1.0.0-beta1', '1.0.0-rc1']
    MAX_CLIENT_VERSIONS = ['1.2.0', '2.0.0']
    JFS_BINS = ['./'+os.environ.get('OLD_JFS_BIN'), './'+os.environ.get('NEW_JFS_BIN')]
    meta_dict = {'redis':'redis://localhost/1', 'mysql':'mysql://root:root@(127.0.0.1)/test', 'postgres':'postgres://postgres:postgres@127.0.0.1:5432/test?sslmode=disable', \
        'tikv':'tikv://127.0.0.1:2379', 'badger':'badger://badger-data', 'mariadb': 'mysql://root:root@(127.0.0.1)/test', \
            'sqlite3': 'sqlite3://test.db', 'fdb':'fdb:///home/runner/fdb.cluster?prefix=jfs'}
    META_URL = meta_dict[os.environ.get('META')]
    STORAGE = os.environ.get('STORAGE')
    MOUNT_POINT = '/tmp/sync-test/'
    VOLUME_NAME = 'test-volume'
    # valid_file_name = st.text(st.characters(max_codepoint=1000, blacklist_categories=('Cc', 'Cs')), min_size=2).map(lambda s: s.strip()).filter(lambda s: len(s) > 0)

    def __init__(self):
        super(JuicefsMachine, self).__init__()
        print(f"seed is: {self._hypothesis_internal_use_seed}")
        self.run_id = uuid.uuid4().hex
        print(f'\ninit with run_id: {self.run_id}')
        with open(os.path.expanduser('~/command.log'), 'a') as f:
            f.write(f'init with run_id: {self.run_id}\n')
        self.formatted = False
        self.mounted = False
        # mount at least once, see ref: https://github.com/juicedata/juicefs/issues/2717
        self.mounted_by = []
        self.formatted_by = ''
        self.dumped_by = ''
        if JuicefsMachine.META_URL.startswith('badger://'):
            # change url for each run
            JuicefsMachine.META_URL = f'badger://badger-{uuid.uuid4().hex}'
        if JuicefsMachine.STORAGE == 'minio':
            run_cmd(f'mc alias set myminio http://localhost:9000 minioadmin minioadmin')
        if os.path.isfile('dump.json'):
            os.remove('dump.json')
        os.environ['PGPASSWORD'] = 'postgres'

    @rule(
          juicefs=st.sampled_from(JFS_BINS),
          block_size=st.integers(min_value=1, max_value=4096*10), 
          capacity=st.integers(min_value=0, max_value=1024),
          inodes=st.integers(min_value=1024*1024, max_value=1024*1024*1024),
          compress=st.sampled_from(['lz4', 'zstd', 'none']),
          shards=st.integers(min_value=0, max_value=1),
          storage=st.just(STORAGE), 
          encrypt_rsa_key = st.booleans(), 
          encrypt_algo = st.sampled_from(['aes256gcm-rsa','chacha20-rsa']),
          trash_days=st.integers(min_value=0, max_value=10000), 
          hash_prefix=st.booleans(), 
          force = st.booleans(), 
          no_update = st.booleans()
          )
    def format(self, juicefs, block_size, capacity, inodes, compress, shards, storage, encrypt_rsa_key, encrypt_algo, trash_days, hash_prefix, force, no_update):
        assume (self.greater_than_version_formatted(juicefs))
        print('start format')
        options = [juicefs, 'format',  JuicefsMachine.META_URL, JuicefsMachine.VOLUME_NAME]
        if not self.formatted:
            options.extend(['--block-size', str(block_size)])
            options.extend(['--compress', compress])
            options.extend(['--shards', str(shards)])
            options.extend(['--storage', storage])
            if hash_prefix and run_cmd(f'{juicefs} format --help | grep hash-prefix') == 0:
                options.append('--hash-prefix')
        options.extend(['--capacity', str(capacity)])
        options.extend(['--inodes', str(inodes)])
        if run_cmd(f'{juicefs} format --help | grep trash-days') == 0:
            options.extend(['--trash-days', str(trash_days)])
        
        if force:
            options.append('--force')
        if no_update:
            options.append('--no-update')
        if encrypt_rsa_key:
            if not os.path.exists('my-priv-key.pem'):
                subprocess.check_call('openssl genrsa -out my-priv-key.pem -aes256  -passout pass:12345678 2048'.split())
            os.environ['JFS_RSA_PASSPHRASE'] = '12345678'
            options.extend(['--encrypt-rsa-key', 'my-priv-key.pem'])
            if run_cmd(f'{juicefs} format --help | grep encrypt-algo') == 0:
                options.extend(['--encrypt-algo', encrypt_algo])
        
        if storage == 'minio':
            bucket = 'http://localhost:9000/testbucket'
            options.extend(['--bucket', bucket])
            options.extend(['--access-key', 'minioadmin'])
            options.extend(['--secret-key', 'minioadmin'])
            if self.formatted and version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-rc1'):
                # use the latest version to change secret-key because rc1 has a bug for secret-key
                options[0] = JuicefsMachine.JFS_BINS[1]
        elif storage == 'file':
            bucket = os.path.expanduser('~/.juicefs/local/')
            options.extend(['--bucket', bucket])
        elif storage == 'mysql':
            bucket = '(localhost:3306)/testbucket'
            options.extend(['--bucket', bucket])
            options.extend(['--access-key', 'root'])
            options.extend(['--secret-key', 'root'])
        elif storage == 'postgres':
            bucket = 'localhost:5432/testbucket?sslmode=disable'
            options.extend(['--bucket', bucket])
            options.extend(['--access-key', 'postgres'])
            options.extend(['--secret-key', 'postgres'])
        else:
            print(f'storage is {storage}')
            raise Exception(f'storage value error: {storage}')

        if not self.formatted:
            if os.path.exists(JuicefsMachine.MOUNT_POINT) and os.path.exists(JuicefsMachine.MOUNT_POINT+'.accesslog'):
                run_cmd('umount %s'%JuicefsMachine.MOUNT_POINT)
                print(f'umount {JuicefsMachine.MOUNT_POINT} succeed')
            clear_storage(storage, bucket, JuicefsMachine.VOLUME_NAME)
            flush_meta(JuicefsMachine.META_URL)
        print(f'format options: {" ".join(options)}' )
        run_jfs_cmd(options)
        self.formatted = True
        self.formatted_by = juicefs
        print('format succeed')


    @rule(
        juicefs=st.sampled_from(JFS_BINS),
        capacity=st.integers(min_value=0, max_value=1024), 
        inodes=st.integers(min_value=1024*1024, max_value=1024*1024*1024),
        change_bucket=st.booleans(), 
        change_aksk=st.booleans(), 
        encrypt_secret = st.booleans(), 
        trash_days =  st.integers(min_value=0, max_value=10000),
        min_client_version = st.sampled_from(MIN_CLIENT_VERSIONS), 
        max_client_version = st.sampled_from(MAX_CLIENT_VERSIONS), 
        force = st.booleans(),
    )
    @precondition(lambda self: self.formatted)
    def config(self, juicefs, capacity, inodes, change_bucket, change_aksk, encrypt_secret, trash_days, min_client_version, max_client_version, force):
        assume (self.greater_than_version_formatted(juicefs))
        assume(run_cmd(f'{juicefs} --help | grep config') == 0)
        print('start config')
        options = [juicefs, 'config', JuicefsMachine.META_URL]
        options.extend(['--trash-days', str(trash_days)])
        options.extend(['--capacity', str(capacity)])
        options.extend(['--inodes', str(inodes)])
        assert version.parse(min_client_version) <= version.parse(max_client_version)
        if run_cmd(f'{juicefs} config --help | grep min-client-version') == 0:
            options.extend(['--min-client-version', min_client_version])
        if run_cmd(f'{juicefs} config --help | grep max-client-version') == 0:
            options.extend(['--max-client-version', max_client_version])
        storage = get_storage(juicefs, JuicefsMachine.META_URL)
        
        if change_bucket:
            if storage == 'file':
                options.extend(['--bucket', os.path.expanduser('~/.juicefs/local2')])
            elif storage == 'minio': 
                c = Minio('localhost:9000', access_key='minioadmin', secret_key='minioadmin', secure=False)
                if not c.bucket_exists('testbucket2'):
                    run_cmd('mc mb myminio/testbucket2')
                    # assert c.bucket_exists('testbucket2')
                options.extend(['--bucket', 'http://localhost:9000/testbucket2'])
            elif storage == 'mysql':
                create_mysql_db('mysql://root:root@(localhost:3306)/testbucket2')
                options.extend(['--bucket', '(localhost:3306)/testbucket2'])
            elif storage == 'postgres':
                create_postgres_db('postgres://postgres:postgres@localhost:5432/testbucket2?sslmode=disable')
                options.extend(['--bucket', 'localhost:5432/testbucket2?sslmode=disable'])
        if change_aksk and storage == 'minio':
            output = subprocess.check_output('mc admin user list myminio'.split())
            if not output:
                run_cmd('mc admin user add myminio juicedata 12345678')
                run_cmd('mc admin policy attach myminio consoleAdmin --user juicedata')
            options.extend(['--access-key', 'juicedata'])
            options.extend(['--secret-key', '12345678'])
            if version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-rc1'):
                # use the latest version to set secret-key because rc1 has a bug for secret-key
                options[0] = JuicefsMachine.JFS_BINS[1]
        if encrypt_secret and run_cmd(f'{juicefs} config --help | grep encrypt-secret') == 0:
            # 0.17.5 store the secret without encrypt, ref: https://github.com/juicedata/juicefs/issues/2721
            #if version.parse('-'.join(juicefs.split('-')[1:])) > version.parse('0.17.5'):
            options.append('--encrypt-secret')
        options.append('--force')
        run_jfs_cmd(options)
        if change_bucket:
            # change bucket back to avoid fsck fail.
            if storage == 'file':
                run_jfs_cmd([juicefs, 'config', JuicefsMachine.META_URL, '--bucket', os.path.expanduser('~/.juicefs/local')])
            elif storage == 'minio':
                run_jfs_cmd([juicefs, 'config', JuicefsMachine.META_URL, '--bucket', 'http://localhost:9000/testbucket'])
            elif storage == 'mysql':
                run_jfs_cmd([juicefs, 'config', JuicefsMachine.META_URL, '--bucket', '(localhost:3306)/testbucket'])
            elif storage == 'postgres':
                run_jfs_cmd([juicefs, 'config', JuicefsMachine.META_URL, '--bucket', 'localhost:5432/testbucket?sslmode=disable'])
        self.formatted_by = juicefs
        print('config succeed')


    @rule(juicefs=st.sampled_from(JFS_BINS))
    @precondition(lambda self: self.formatted )
    def status(self, juicefs):
        assume (self.greater_than_version_formatted(juicefs))
        print('start status')
        output = subprocess.run([juicefs, 'status', JuicefsMachine.META_URL], check=True, stdout=subprocess.PIPE).stdout.decode()
        if 'get timestamp too slow' in output: 
            # remove the first line caust it is tikv log message
            output = '\n'.join(output.split('\n')[1:])
        print(f'status output: {output}')
        try:
            uuid = json.loads(output.replace("'", '"'))['Setting']['UUID']
        except:
            raise Exception(f'parse uuid failed, output: {output}')
        assert len(uuid) != 0
        if self.mounted and not is_readonly(JuicefsMachine.MOUNT_POINT) and self.greater_than_version_mounted(juicefs):
            sessions = json.loads(output.replace("'", '"'))['Sessions']
            assert len(sessions) != 0 
        print('status succeed')


    @rule(juicefs=st.sampled_from(JFS_BINS), 
        no_syslog=st.booleans(),
        other_fuse_options=st.lists(st.sampled_from(['debug', 'allow_other', 'writeback_cache']), unique=True), 
        enable_xattr=st.booleans(),
        attr_cache=st.integers(min_value=1, max_value=10), 
        entry_cache=st.integers(min_value=1, max_value=10), 
        dir_entry_cache=st.integers(min_value=1, max_value=10), 
        get_timeout=st.integers(min_value=30, max_value=60), 
        put_timeout=st.integers(min_value=30, max_value=60), 
        io_retries=st.integers(min_value=5, max_value=15), 
        max_uploads=st.integers(min_value=5, max_value=100), 
        max_deletes=st.integers(min_value=5, max_value=100), 
        buffer_size=st.integers(min_value=100, max_value=1000), 
        upload_limit=st.integers(min_value=100, max_value=1000), 
        download_limit=st.integers(min_value=100, max_value=1000), 
        prefetch=st.integers(min_value=0, max_value=100), 
        writeback=st.just(False),
        upload_delay=st.sampled_from([0, 2]), 
        cache_dir=st.sampled_from(['cache1', 'cache2']),
        cache_size=st.integers(min_value=0, max_value=1024000), 
        free_space_ratio=st.floats(min_value=0.1, max_value=0.5), 
        cache_partial_only=st.booleans(),
        backup_meta=st.integers(min_value=300, max_value=1000),
        heartbeat=st.integers(min_value=5, max_value=12), 
        read_only=st.booleans(),
        no_bgjob=st.booleans(),
        open_cache=st.integers(min_value=0, max_value=100),
        sub_dir=st.sampled_from(['dir1', 'dir2']),
        metrics=st.sampled_from(['127.0.0.1:9567', '127.0.0.1:9568']), 
        consul=st.sampled_from(['127.0.0.1:8500', '127.0.0.1:8501']), 
    )
    @precondition(lambda self: self.formatted  )
    def mount(self, juicefs, no_syslog, other_fuse_options, enable_xattr, attr_cache, entry_cache, dir_entry_cache,
        get_timeout, put_timeout, io_retries, max_uploads, max_deletes, buffer_size, upload_limit, download_limit, prefetch, 
        writeback, upload_delay, cache_dir, cache_size, free_space_ratio, cache_partial_only, backup_meta, heartbeat, read_only,
        no_bgjob, open_cache, sub_dir, metrics, consul):
        assume (self.greater_than_version_formatted(juicefs))
        if JuicefsMachine.META_URL.startswith('badger://'):
            assume(not self.mounted)
        retry = 3
        while os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog') and retry > 0:
            os.system(f'umount {JuicefsMachine.MOUNT_POINT}')
            retry = retry - 1 
            time.sleep(1)
        if os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'):
            print(f'FATAL: umount {JuicefsMachine.MOUNT_POINT} failed.')
        assume(not os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        print('start mount')
        options = [juicefs, 'mount', '-d',  JuicefsMachine.META_URL, JuicefsMachine.MOUNT_POINT]
        if no_syslog:
            options.append('--no-syslog')
        options.extend(['--log', os.path.expanduser(f'~/.juicefs/juicefs.log')])
        if other_fuse_options:
            options.extend(['-o', ','.join(other_fuse_options)])
        if 'allow_other' in other_fuse_options:
            if os.path.exists('/etc/fuse.conf'):
                # subprocess.check_call(['sudo', 'bash',  '-c', '"echo user_allow_other >>/etc/fuse.conf"' ])
                os.system('sudo bash -c "echo user_allow_other >>/etc/fuse.conf"')
                print('add user_allow_other to /etc/fuse.conf succeed')
        if enable_xattr:
            options.append('--enable-xattr')
        options.extend(['--attr-cache', str(attr_cache)])
        options.extend(['--entry-cache', str(entry_cache)])
        options.extend(['--dir-entry-cache', str(dir_entry_cache)])
        options.extend(['--get-timeout', str(get_timeout)])
        options.extend(['--put-timeout', str(put_timeout)])
        options.extend(['--io-retries', str(io_retries)])
        options.extend(['--max-uploads', str(max_uploads)])
        if run_cmd(f'{juicefs} mount --help | grep max-deletes') == 0:
            options.extend(['--max-deletes', str(max_deletes)])
        options.extend(['--buffer-size', str(buffer_size)])
        options.extend(['--upload-limit', str(upload_limit)])
        options.extend(['--download-limit', str(download_limit)])
        options.extend(['--prefetch', str(prefetch)])
        if writeback:
            options.append('--writeback')
        upload_delay = str(upload_delay)
        if version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-beta2'):
            upload_delay = upload_delay + 's'
        options.extend(['--upload-delay', str(upload_delay)])
        options.extend(['--cache-dir', os.path.expanduser(f'~/.juicefs/{cache_dir}')])
        options.extend(['--cache-size', str(cache_size)])
        options.extend(['--free-space-ratio', str(free_space_ratio)])
        if cache_partial_only:
            options.append('--cache-partial-only')
        backup_meta = str(backup_meta)
        if version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-beta2'):
            backup_meta = '1h0m0s'
        if run_cmd(f'{juicefs} mount --help | grep backup-meta') == 0:
            options.extend(['--backup-meta', backup_meta])
        if run_cmd(f'{juicefs} mount --help | grep heartbeat') == 0:
            options.extend(['--heartbeat', str(heartbeat)])
        if read_only:
            options.append('--read-only')
        if no_bgjob and run_cmd(f'{juicefs} mount --help | grep no-bgjob') == 0:
            options.append('--no-bgjob')

        options.extend(['--open-cache', str(open_cache)])
        print('TODO: subdir')
        # options.extend('--subdir', str(sub_dir))
        if not is_port_in_use( int(metrics.split(':')[1])):
            options.extend(['--metrics', str(metrics)])
        # if run_cmd(f'{juicefs} mount --help | grep consul') == 0:
        #     options.extend(['--consul', str(consul)])
        options.append('--no-usage-report')
        if os.path.exists(JuicefsMachine.MOUNT_POINT):
            run_cmd(f'stat {JuicefsMachine.MOUNT_POINT}')
        run_jfs_cmd(options)
        time.sleep(2)
        if platform.system() == 'Linux':
            inode = subprocess.check_output(f'stat -c %i {JuicefsMachine.MOUNT_POINT}'.split())
        elif platform.system() == 'Darwin':
            inode = subprocess.check_output(f'stat -f %i {JuicefsMachine.MOUNT_POINT}'.split())
        print(f'inode number: {inode}')
        assert(inode.decode()[:-1] == '1')
        output = subprocess.run([juicefs, 'status', JuicefsMachine.META_URL], check=True, stdout=subprocess.PIPE).stdout.decode()
        if 'get timestamp too slow' in output: 
            # remove the first line caust it is tikv log message
            output = '\n'.join(output.split('\n')[1:])
        print(f'status output: {output}')
        sessions = json.loads(output.replace("'", '"'))['Sessions']
        if not read_only: 
            assert len(sessions) != 0 
        self.mounted = True
        if not read_only:
            self.mounted_by.append(juicefs)
        print('mount succeed')

    @rule(juicefs=st.sampled_from(JFS_BINS), 
        file_name=st.just('file_to_info'), 
        data = st.binary())
    @precondition(lambda self: self.formatted and self.mounted )
    def info(self, juicefs, file_name, data):
        assume (self.greater_than_version_formatted(juicefs))
        assume (self.greater_than_version_mounted(juicefs))
        assume(not is_readonly(f'{JuicefsMachine.MOUNT_POINT}'))
        assert(os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        print('start info')
        path = JuicefsMachine.MOUNT_POINT+file_name
        write_data(JuicefsMachine.MOUNT_POINT, path, data)
        options = [juicefs, 'info', path]
        run_jfs_cmd(options)
        print('info succeed')

    @rule(juicefs=st.sampled_from(JFS_BINS), 
    file_name=st.just('file_to_rmr'))
    @precondition(lambda self: self.formatted and self.mounted )
    def rmr(self, juicefs, file_name):
        assume (self.greater_than_version_formatted(juicefs))
        assume (self.greater_than_version_mounted(juicefs))
        assume(not is_readonly(f'{JuicefsMachine.MOUNT_POINT}'))
        # ref: https://github.com/juicedata/juicefs/pull/2776
        assert(len(self.mounted_by) > 0)
        assume(version.parse('-'.join(self.mounted_by[-1].split('-')[1:])) >= version.parse('1.1.0-dev'))
        assume(version.parse('-'.join(juicefs.split('-')[1:])) >= version.parse('1.1.0-dev'))
        # TODO: should test upload delay.
        assume(get_upload_delay_seconds(JuicefsMachine.MOUNT_POINT) == 0)
        assert(os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        print('start rmr')
        path = f'{JuicefsMachine.MOUNT_POINT}{file_name}'
        write_block(JuicefsMachine.MOUNT_POINT, path, 1048576, 3)
        os.system(f'ls -l {path}')
        assert(os.path.exists(path))
        run_cmd(f'stat {path}')
        options = [juicefs, 'rmr', path]
        run_jfs_cmd(options)
        # TODO: should uncomment the assert
        # assert(not os.path.exists(path))
        print('rmr succeed')

    @rule(juicefs=st.sampled_from(JFS_BINS), 
    force=st.booleans())
    @precondition(lambda self: self.mounted)
    def umount(self, juicefs, force):
        assume (self.greater_than_version_formatted(juicefs))
        print('start umount')
        options = [juicefs, 'umount', JuicefsMachine.MOUNT_POINT]
        # don't force umount because it may not unmounted succeed.
        # if force:
        #    options.append('--force')
        run_jfs_cmd(options)
        self.mounted = False
        print('umount succeed')

    @rule(juicefs=st.sampled_from(JFS_BINS))
    @precondition(lambda self: self.formatted and not self.mounted)
    def destroy(self, juicefs):
        assume (self.greater_than_version_formatted(juicefs))
        assume(run_cmd(f'{juicefs} --help | grep destroy') == 0)
        print('start destroy')
        output = subprocess.run([juicefs, 'status', JuicefsMachine.META_URL], check=True, stdout=subprocess.PIPE).stdout.decode()
        if 'get timestamp too slow' in output: 
            # remove the first line caust it is tikv log message
            output = '\n'.join(output.split('\n')[1:]) 
        print(f'status output: {output}')
        uuid = json.loads(output.replace("'", '"'))['Setting']['UUID']
        print(f'uuid is: {uuid}')
        assert len(uuid) != 0
        options = [juicefs, 'destroy', JuicefsMachine.META_URL, uuid]
        options.append('--force')
        run_jfs_cmd(options)
        self.formatted = False
        self.mounted = False
        self.mounted_by = []
        self.formatted_by = ''
        print('destroy succeed')

    @rule(file_name=st.sampled_from(['myfile1', 'myfile2']), 
        data=st.binary() )
    @precondition(lambda self: self.mounted )
    def write_and_read(self, file_name, data):
        assume(not is_readonly(f'{JuicefsMachine.MOUNT_POINT}'))
        assert(os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        print('start write and read')
        path = JuicefsMachine.MOUNT_POINT+file_name
        write_data(JuicefsMachine.MOUNT_POINT, path, data)
        with open(path, "rb") as f:
            result = f.read()
        assert str(result) == str(data)
        print('write and read succeed')
    
    def write_rand_files(self, path, seed):
        count = 50
        if os.path.isdir(path):
            shutil.rmtree(path)
        os.mkdir(path)
        fsrand = FsRandomizer(path, count, seed)
        fsrand.stdout = sys.stdout
        fsrand.stderr = sys.stderr
        fsrand.verbose = False
        fsrand.randomize()

    # @rule()
    @precondition(lambda self: self.mounted )
    def write_rand_files_and_compare(self):
        start = time.time()
        assume(not is_readonly(f'{JuicefsMachine.MOUNT_POINT}'))
        assert(os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        seed = int(time.time())
        self.write_rand_files(JuicefsMachine.MOUNT_POINT+'fsrand', seed)
        self.write_rand_files('/tmp/fsrand', seed)
        tcmp = TreeComparator(JuicefsMachine.MOUNT_POINT+'fsrand', '/tmp/fsrand')
        tcmp.compare()
        res = len(tcmp.left_only) + len(tcmp.right_only) + \
            len(tcmp.common_funny) + len(tcmp.funny_files) + len(tcmp.diff_files)
        if res > 0:
            raise Exception("compare failed")
        os.system(f"rm -rf {JuicefsMachine.MOUNT_POINT}/fsrand")
        os.system(f"rm -rf /tmp/fsrand")
        print('write_rand_files_and_compare execution time:', time.time()-start, 'seconds')

    @rule(juicefs = st.sampled_from(JFS_BINS))
    @precondition(lambda self: self.formatted )
    def dump(self, juicefs):
        assume (self.greater_than_version_formatted(juicefs))
        # check this because of: https://github.com/juicedata/juicefs/issues/2717
        assume(juicefs in self.mounted_by)
        print('start dump')
        run_jfs_cmd([juicefs, 'dump', JuicefsMachine.META_URL, 'dump.json'])
        self.dumped_by = juicefs
        print('dump succeed')

    @rule(juicefs = st.sampled_from(JFS_BINS))
    @precondition(lambda self: self.formatted and os.path.exists('dump.json'))
    def load(self, juicefs):
        assume (self.greater_than_version_formatted(juicefs))
        assume (self.greater_than_version_dumped(juicefs))
        print('start load')
        if os.path.exists(JuicefsMachine.MOUNT_POINT) and os.path.exists(JuicefsMachine.MOUNT_POINT+'.accesslog'):
            run_cmd('umount %s'%JuicefsMachine.MOUNT_POINT)
            print(f'umount {JuicefsMachine.MOUNT_POINT} succeed')
            self.mounted = False
        flush_meta(JuicefsMachine.META_URL)
        run_jfs_cmd([juicefs, 'load', JuicefsMachine.META_URL, 'dump.json'])
        print('load succeed')
        options = [juicefs, 'config', JuicefsMachine.META_URL]
        if version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-rc1'):
            # use the latest version to change secret-key because rc1 has a bug for secret-key
            options[0] = JuicefsMachine.JFS_BINS[1]
        storage = get_storage(juicefs, JuicefsMachine.META_URL)
        if storage == 'minio':
            run_jfs_cmd([JuicefsMachine.JFS_BINS[1], 'config', JuicefsMachine.META_URL, '--access-key', 'minioadmin', '--secret-key', 'minioadmin'])
        elif storage == 'mysql':
            run_jfs_cmd([JuicefsMachine.JFS_BINS[1], 'config', JuicefsMachine.META_URL, '--access-key', 'root', '--secret-key', 'root'])
        elif storage == 'postgres':
            run_jfs_cmd([JuicefsMachine.JFS_BINS[1], 'config', JuicefsMachine.META_URL, '--access-key', 'postgres', '--secret-key', 'postgres'])
        
        os.remove('dump.json')

    @rule(juicefs=st.sampled_from(JFS_BINS))
    @precondition(lambda self: self.formatted)
    def fsck(self, juicefs):
        assume (self.greater_than_version_formatted(juicefs))
        assume(juicefs in self.mounted_by)
        print('start fsck')
        run_jfs_cmd([juicefs, 'fsck', JuicefsMachine.META_URL])
        print('fsck succeed')

    # @rule(juicefs=st.sampled_from(JFS_BINS),
    #  block_size=st.integers(min_value=1, max_value=32),
    #  big_file_size=st.integers(min_value=100, max_value=200),
    #  small_file_size=st.integers(min_value=1, max_value=256),
    #  small_file_count=st.integers(min_value=100, max_value=256), 
    #  threads=st.integers(min_value=1, max_value=100))
    @precondition(lambda self: self.mounted and False)
    def bench(self, juicefs, block_size, big_file_size, small_file_size, small_file_count, threads):
        assume (self.greater_than_version_formatted(juicefs))
        assert(os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        print('start bench')
        run_cmd(f'df | grep {JuicefsMachine.MOUNT_POINT}')
        options = [juicefs, 'bench', JuicefsMachine.MOUNT_POINT]
        options.extend(['--block-size', str(block_size)])
        options.extend(['--big-file-size', str(big_file_size)])
        options.extend(['--small-file-size', str(small_file_size)])
        options.extend(['--small-file-count', str(small_file_count)])
        options.extend(['--threads', str(threads)])
        output = run_jfs_cmd(options)
        summary = output.decode('utf8').split('\n')[2]
        expected = f'BlockSize: {block_size} MiB, BigFileSize: {big_file_size} MiB, SmallFileSize: {small_file_size} KiB, SmallFileCount: {small_file_count}, NumThreads: {threads}'
        assert summary == expected
        print('bench succeed')

    @rule(juicefs=st.sampled_from(JFS_BINS),
        threads=st.integers(min_value=1, max_value=100), 
        background = st.booleans(), 
        from_file = st.booleans(),
        directory = st.booleans() )
    @precondition(lambda self: self.mounted)
    def warmup(self, juicefs, threads, background, from_file, directory):
        assume (self.greater_than_version_formatted(juicefs))
        assume (self.greater_than_version_mounted(juicefs))
        assume(not is_readonly(f'{JuicefsMachine.MOUNT_POINT}'))
        assert(os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/.accesslog'))
        print('start warmup')
        clear_cache()
        options = [juicefs, 'warmup']
        options.extend(['--threads', str(threads)])
        if background:
            options.append('--background')
        if from_file:
            path_list = [JuicefsMachine.MOUNT_POINT+'file1', JuicefsMachine.MOUNT_POINT+'file2', JuicefsMachine.MOUNT_POINT+'file3']
            for filepath in path_list:
                if not os.path.exists(filepath):
                    write_block(JuicefsMachine.MOUNT_POINT, filepath, 4096, 100)
            with open('file.list', 'w') as f:
                for path in path_list:
                    f.write(path+'\n')
            time.sleep(get_upload_delay_seconds(f'{JuicefsMachine.MOUNT_POINT}')+1)
            while(get_stage_blocks(JuicefsMachine.MOUNT_POINT) != 0):
                print('sleep for stage')
                time.sleep(1)
            options.extend(['--file', 'file.list'])
        else:
            if directory:
                options.append(JuicefsMachine.MOUNT_POINT)
            else:
                write_block(JuicefsMachine.MOUNT_POINT, f'{JuicefsMachine.MOUNT_POINT}/file_to_warmup', 1048576, 100)
                assert os.path.exists(f'{JuicefsMachine.MOUNT_POINT}/file_to_warmup')
                options.append(f'{JuicefsMachine.MOUNT_POINT}/file_to_warmup')
                
        run_jfs_cmd(options)
        # print(output)
        print('warmup succeed')
        # assert output.decode('utf8').split('\n')[0].startswith('Warming up count: ')
        # assert output.decode('utf8').split('\n')[0].startswith('Warming up bytes: ')

    @rule(
        juicefs = st.sampled_from(JFS_BINS), 
        compact=st.booleans(), 
        delete=st.booleans(),
        threads=st.integers(min_value=1, max_value=100) )
    @precondition(lambda self: self.formatted)
    def gc(self, juicefs, compact, delete, threads):
        assume (self.greater_than_version_formatted(juicefs))
        assume(juicefs in self.mounted_by)
        print('start gc')
        options = [juicefs, 'gc', JuicefsMachine.META_URL]
        if compact:
            options.append('--compact')
        if delete:
            options.append('--delete')
        options.extend(['--threads', str(threads)])
        run_jfs_cmd(options)
        # print(output)
        print('gc succeed')


    @rule(juicefs=st.sampled_from(JFS_BINS), 
        get_timeout=st.integers(min_value=30, max_value=59), 
        put_timeout=st.integers(min_value=30, max_value=59), 
        io_retries=st.integers(min_value=5, max_value=15), 
        max_uploads=st.integers(min_value=1, max_value=100), 
        max_deletes=st.integers(min_value=1, max_value=100), 
        buffer_size=st.integers(min_value=100, max_value=1000), 
        upload_limit=st.integers(min_value=0, max_value=1000), 
        download_limit=st.integers(min_value=0, max_value=1000), 
        prefetch=st.integers(min_value=0, max_value=100), 
        writeback=st.just(False),
        upload_delay=st.sampled_from([0, 2]), 
        cache_dir=st.sampled_from(['cache1', 'cache2']),
        cache_size=st.integers(min_value=0, max_value=1024000), 
        free_space_ratio=st.floats(min_value=0.1, max_value=0.5), 
        cache_partial_only=st.booleans(),
        backup_meta=st.integers(min_value=300, max_value=1000),
        heartbeat=st.integers(min_value=5, max_value=30), 
        read_only=st.booleans(),
        no_bgjob=st.booleans(),
        open_cache=st.integers(min_value=0, max_value=100),
        attr_cache=st.integers(min_value=1, max_value=10), 
        entry_cache=st.integers(min_value=1, max_value=10), 
        dir_entry_cache=st.integers(min_value=1, max_value=10), 
        access_log=st.sampled_from(['accesslog1', 'accesslog2']),
        no_banner=st.booleans(),
        multi_buckets=st.booleans(), 
        keep_etag=st.booleans(),
        umask=st.sampled_from(['022', '755']), 
        metrics=st.sampled_from(['127.0.0.1:9567', '127.0.0.1:9568']), 
        consul=st.sampled_from(['127.0.0.1:8500', '127.0.0.1:8501']), 
        sub_dir=st.sampled_from(['dir1', 'dir2']),
        port=st.integers(min_value=9001, max_value=10000)
    )
    @precondition(lambda self: self.formatted and False)
    def gateway(self, juicefs, get_timeout, put_timeout, io_retries, max_uploads, max_deletes, buffer_size, upload_limit, 
        download_limit, prefetch, writeback, upload_delay, cache_dir, cache_size, free_space_ratio, cache_partial_only, 
        backup_meta,heartbeat, read_only, no_bgjob, open_cache, attr_cache, entry_cache, dir_entry_cache, access_log, 
        no_banner, multi_buckets, keep_etag, umask, metrics, consul, sub_dir, port):
        assume (self.greater_than_version_formatted(juicefs))
        assume(not is_port_in_use(port))
        if JuicefsMachine.META_URL.startswith('badger://'):
            assume(not self.mounted)
        print('start gateway')
        os.environ['MINIO_ROOT_USER'] = 'admin'
        os.environ['MINIO_ROOT_PASSWORD'] = '12345678'
        options = [juicefs, 'gateway', JuicefsMachine.META_URL, f'localhost:{port}']
        
        options.extend(['--attr-cache', str(attr_cache)])
        options.extend(['--entry-cache', str(entry_cache)])
        options.extend(['--dir-entry-cache', str(dir_entry_cache)])
        options.extend(['--get-timeout', str(get_timeout)])
        options.extend(['--put-timeout', str(put_timeout)])
        options.extend(['--io-retries', str(io_retries)])
        options.extend(['--max-uploads', str(max_uploads)])
        if run_cmd(f'{juicefs} gateway --help | grep max-deletes') == 0:
            options.extend(['--max-deletes', str(max_deletes)])
        options.extend(['--buffer-size', str(buffer_size)])
        options.extend(['--upload-limit', str(upload_limit)])
        options.extend(['--download-limit', str(download_limit)])
        options.extend(['--prefetch', str(prefetch)])
        if writeback:
            options.append('--writeback')
        upload_delay = str(upload_delay)
        if version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-beta2'):
            upload_delay = upload_delay + 's'
        options.extend(['--upload-delay', upload_delay])
        options.extend(['--cache-dir', os.path.expanduser(f'~/.juicefs/{cache_dir}')])
        options.extend(['--access-log', os.path.expanduser(f'~/.juicefs/{access_log}')])
        options.extend(['--cache-size', str(cache_size)])
        options.extend(['--free-space-ratio', str(free_space_ratio)])
        if cache_partial_only:
            options.append('--cache-partial-only')
        backup_meta = str(backup_meta)
        if version.parse('-'.join(juicefs.split('-')[1:])) <= version.parse('1.0.0-beta2'):
            backup_meta = '1h0m0s'
        if run_cmd(f'{juicefs} gateway --help | grep backup-meta') == 0:
            options.extend(['--backup-meta', backup_meta])
        if run_cmd(f'{juicefs} gateway --help | grep heartbeat') == 0:
            options.extend(['--heartbeat', str(heartbeat)])
        if read_only:
            options.append('--read-only')
        if no_bgjob and run_cmd(f'{juicefs} gateway --help | grep no-bgjob') == 0:
            options.append('--no-bgjob')
        if no_banner:
            options.append('--no-banner')
        if multi_buckets and run_cmd(f'{juicefs} gateway --help | grep multi-buckets') == 0:
            options.append('--multi-buckets')
        if keep_etag and run_cmd(f'{juicefs} gateway --help | grep keep-etag') == 0:
            options.append('--keep-etag')
        if run_cmd(f'{juicefs} gateway --help | grep umask') == 0:
            options.extend(['--umask', umask])

        options.extend(['--open-cache', str(open_cache)])
        print(f'TODO: subdir:{sub_dir}')
        # options.extend('--subdir', str(sub_dir))
        if not is_port_in_use( int(metrics.split(':')[1])):
            options.extend(['--metrics', str(metrics)])
        # if run_cmd(f'{juicefs} mount --help | grep consul') == 0:
        #     options.extend(['--consul', str(consul)])
        options.append('--no-usage-report')

        proc=subprocess.Popen(options)
        time.sleep(2.0)
        subprocess.Popen.kill(proc)
        print('gateway succeed')


    @rule(juicefs = st.sampled_from(JFS_BINS), 
        port=st.integers(min_value=10001, max_value=11000)) 
    @precondition(lambda self: self.formatted and False)
    def webdav(self, juicefs, port):
        assume (self.greater_than_version_formatted(juicefs))
        assert version.parse('-'.join(juicefs.split('-')[1:])) >=  version.parse('-'.join(self.formatted_by.split('-')[1:]))
        assume (not is_port_in_use(port))
        if JuicefsMachine.META_URL.startswith('badger://'):
            assume(not self.mounted)
        print('start webdav')
        
        options = [juicefs, 'webdav', JuicefsMachine.META_URL, f'localhost:{port}']
        proc = subprocess.Popen(options)
        time.sleep(2.0)
        subprocess.Popen.kill(proc)
        print('webdav succeed')

    def greater_than_version_formatted(self, ver):
        print(f'ver is {ver}, formatted_by is {self.formatted_by}')
        if not self.formatted_by:
            return True
        return version.parse('-'.join(ver.split('-')[1:])) >=  version.parse('-'.join(self.formatted_by.split('-')[1:]))

    def greater_than_version_dumped(self, ver):
        if not self.dumped_by:
            return True
        return version.parse('-'.join(ver.split('-')[1:])) >=  version.parse('-'.join(self.dumped_by.split('-')[1:]))

    def greater_than_version_mounted(self, ver):
        for mounted_version in self.mounted_by:
            if version.parse('-'.join(ver.split('-')[1:])) <  version.parse('-'.join(mounted_version.split('-')[1:])):
                return False
        return True


TestJuiceFS = JuicefsMachine.TestCase

if __name__ == "__main__":
    unittest.main(failfast=True)

================================================
FILE: .github/scripts/upload_coverage_report.sh
================================================
#!/bin/bash

# 参数检查
if [ "$#" -ne 3 ]; then
  echo "Usage: $0 <coverage_file> <upload_path> <token>"
  exit 1
fi

COVERAGE_FILE=$1
UPLOAD_PATH=$2
TOKEN=$3
attempt=1
max_attempts=3

while [ $attempt -le $max_attempts ]; do
  response=$(curl -w '%{http_code}' -s -o /dev/null --form "file=@${COVERAGE_FILE}" "https://juicefs.com/upload-file-u80sdvuke/${UPLOAD_PATH}?token=${TOKEN}")
  if [ "$response" -eq 200 ]; then
    echo "Coverage Report: https://i.juicefs.io/ci-coverage/${UPLOAD_PATH}"
    break
  else
    echo "Upload attempt $attempt failed with status code $response. Retrying..."
    attempt=$((attempt + 1))
    sleep 5  # 等待5秒钟后重试
  fi
done

if [ "$response" -ne 200 ]; then
  echo "Upload failed after $max_attempts attempts with status code $response"
  exit 1
fi

================================================
FILE: .github/scripts/utils.py
================================================
import subprocess
try:
    __import__("minio")
except ImportError:
    subprocess.check_call(["pip", "install", "minio"])
import json
import os
from posixpath import expanduser
import shutil
import subprocess
import sys
import time
from urllib.parse import urlparse
from minio import Minio

def flush_meta(meta_url:str):
    print(f'start flush meta: {meta_url}')
    if meta_url.startswith('sqlite3://'):
        path = meta_url[len('sqlite3://'):]
        if os.path.isfile(path):
            os.remove(path)
            print(f'remove meta file {path} succeed')
    elif meta_url.startswith('badger://'):
        path = meta_url[len('badger://'):]
        if os.path.isdir(path):
            shutil.rmtree(path)
            print(f'remove badger dir {path} succeed')
    elif meta_url.startswith('redis://') or meta_url.startswith('tikv://'):
        default_port = {"redis": 6379, "tikv": 2379}
        parsed = urlparse(meta_url)
        protocol = parsed.scheme
        host = parsed.hostname
        port = parsed.port if parsed.port else default_port[protocol]
        db = parsed.path.lstrip('/').split('/')[0]
        assert db
        print(f'flushing {protocol}://{host}:{port}/{db}')
        if protocol == 'redis':
            run_cmd(f'redis-cli -h {host} -p {port} -n {db} flushdb')
        elif protocol == 'tikv':
            # TODO: should only flush the specified db
            run_cmd(f'echo "delall --yes" |tcli -pd {host}:{port}')
        else:
            raise Exception(f'{protocol} not supported')
        print(f'flush {protocol}://{host}:{port}/{db} succeed')
    elif meta_url.startswith('mysql://'):
        create_mysql_db(meta_url)
    elif meta_url.startswith('postgres://'): 
        create_postgres_db(meta_url)
    elif meta_url.startswith('fdb://'):
        # fdb:///home/runner/fdb.cluster?prefix=jfs2
        prefix = meta_url.split('?prefix=')[1] if '?prefix=' in meta_url else ""
        cluster_file = meta_url.split('fdb://')[1].split('?')[0]
        print(f'flushing fdb: cluster_file: {cluster_file}, prefix: {prefix}')
        run_cmd(f'echo "writemode on; clearrange {prefix} {prefix}\\xff" | fdbcli -C {cluster_file}')
        print(f'flush fdb succeed')
    else:
        raise Exception(f'{meta_url} not supported')
    print('flush meta succeed')

def create_mysql_db(meta_url):
    db_name = meta_url[8:].split('@')[1].split('/')[1].split('?')[0]
    user = meta_url[8:].split('@')[0].split(':')[0]
    password = meta_url[8:].split('@')[0].split(':')[1]
    if password: 
        password = f'-p{password}'
    host_port= meta_url[8:].split('@')[1].split('/')[0].replace('(', '').replace(')', '')
    if ':' in host_port:
        host = host_port.split(':')[0]
        port = host_port.split(':')[1]
    else:
        host = host_port
        port = '3306'
    run_cmd(f'mysql -u{user} {password} -h {host} -P {port} -e "drop database if exists {db_name}; create database {db_name};"')

def create_postgres_db(meta_url):
    os.environ['PGPASSWORD'] = 'postgres'
    db_name = meta_url[8:].split('@')[1].split('/')[1]
    if '?' in db_name:
        db_name = db_name.split('?')[0]
    run_cmd(f'printf "\set AUTOCOMMIT on\ndrop database if exists {db_name}; create database {db_name}; " |  psql -U postgres -h localhost')

def clear_storage(storage, bucket, volume):
    print('start clear storage')
    if storage == 'file':
        storage_dir = os.path.join(bucket, volume) 
        if os.path.exists(storage_dir):
            try:
                shutil.rmtree(storage_dir)
                print(f'remove cache dir {storage_dir} succeed')
            except OSError as e:
                print("Error: %s : %s" % (storage_dir, e.strerror))
    elif storage == 'minio':
        from urllib.parse import urlparse
        url = urlparse(bucket)
        c = Minio('localhost:9000', access_key='minioadmin', secret_key='minioadmin', secure=False)
        bucket_name = url.path[1:]
        while c.bucket_exists(bucket_name) and list(c.list_objects(bucket_name)) :
            print(f'try to remove bucket {url.path[1:]}')
            result = run_cmd(f'mc rm --recursive --force  myminio/{bucket_name}')
            if result != 0:
                raise Exception(f'remove {bucket_name} failed')
            if c.bucket_exists(url.path[1:]) and list(c.list_objects(bucket_name)):
                time.sleep(1)
        print(f'remove bucket {bucket_name} succeed')
        if c.bucket_exists(bucket_name):
            assert not list(c.list_objects(bucket_name))
    elif storage == 'mysql':
        db_name = bucket.split('/')[-1]
        run_cmd(f'mysql -uroot -proot -h localhost -P 3306 -e "drop database if exists {db_name};create database {db_name};"')
    elif storage == 'postgres':
        db_name = bucket.split('/')[1]
        if '?' in db_name:
            db_name = db_name.split('?')[0]
        run_cmd(f'printf "\set AUTOCOMMIT on\ndrop database if exists {db_name}; create database {db_name}; " |  psql -U postgres -h localhost')
    print('clear storage succeed')


def clear_cache():
    run_cmd('sudo rm -rf /var/jfsCache')
    run_cmd(f'sudo rm -rf {os.path.expanduser("~/.juicefs/cache")}')
    if sys.platform.startswith('linux') :
        os.system('sudo bash -c  "echo 3> /proc/sys/vm/drop_caches"')

def is_readonly(filesystem):
    if not os.path.exists(f'{filesystem}/.config'):
        return False
    with open(f'{filesystem}/.config') as f:
        config = json.load(f)
        return config['Meta']['ReadOnly']

def get_upload_delay_seconds(filesystem):
    if not os.path.exists(f'{filesystem}/.config'):
        return 0
    with open(f'{filesystem}/.config') as f:
        config = json.load(f)
        return config['Chunk']['UploadDelay']/1000000000
    
def get_stage_blocks(filesystem):
    try:
        ps = subprocess.Popen(('cat', f'{filesystem}/.stats'), stdout=subprocess.PIPE)
        output = subprocess.check_output(('grep', 'juicefs_staging_blocks'), stdin=ps.stdout)
        ps.wait()
        return int(output.decode().split()[1])
    except subprocess.CalledProcessError:
        print('get_stage_blocks: no juicefs_staging_blocks find')
        return 0

def write_data(filesystem, path, data):
    with open(path, "wb") as f:
        f.write(data)
    retry = get_upload_delay_seconds(filesystem) + 10
    while get_stage_blocks(filesystem) != 0 and retry > 0:
        print('sleep for stage')
        retry = retry - 1
        time.sleep(1)
    # assert get_stage_blocks(filesystem) == 0

def write_block(filesystem, filepath, bs, count):
    run_cmd(f'dd if=/dev/urandom of={filepath} bs={bs} count={count}')
    retry = get_upload_delay_seconds(filesystem) + 10
    while get_stage_blocks(filesystem) != 0 and retry > 0:
        print('sleep for stage')
        retry = retry - 1
        time.sleep(1)
    # assert get_stage_blocks(filesystem) == 0

def mdtest(filesystem, meta_url):
    juicefs_new = './'+os.environ.get('NEW_JFS_BIN')
    cwd = os.getcwd()
    if not os.path.exists(f'{filesystem}/{juicefs_new}'):
        run_cmd(f'ln -s {cwd}/{juicefs_new} {filesystem}/{juicefs_new}')
    os.chdir(filesystem)
    run_jfs_cmd(f'{juicefs_new} mdtest {meta_url} mdtest --dirs 5 --depth 2 --files 5 --threads 5 --write 8192'.split())
    os.chdir(cwd)
    time.sleep(get_upload_delay_seconds(filesystem)+1)
    retry = 5
    while get_stage_blocks(filesystem) != 0 and retry > 0:
        print('sleep for stage')
        retry = retry - 1
        time.sleep(1)
    assert os.path.exists(filesystem+'mdtest')

def run_jfs_cmd( options):
    # options.append('--debug')
    print('run_jfs_cmd:'+' '.join(options))
    with open(os.path.expanduser('~/command.log'), 'a') as f:
        f.write(' '.join(options).replace('/home/runner', '~'))
        f.write('\n')
    try:
        output = subprocess.run(options, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        print(f'<FATAL>: subprocess run error, return code: {e.returncode} , error message: {e.output.decode()}')
        raise Exception('subprocess run error')
    print(f'run_jfs_cmd return code: {output.returncode}, output: {output.stdout.decode()}')
    print('run_jfs_cmd succeed')
    return output.stdout.decode()

def run_cmd(command):
    print('run_cmd:'+command)
    if '|' in command or '"' in command:
        return os.system(command)
    try:
        output = subprocess.run(command.split(), check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as e:
        print(f'<FATAL>: subprocess run error, return code: {e.returncode} , error message: {e.output.decode()}')
        return e.returncode
    if output.stdout:
        print(output.stdout.decode())
    print('run_cmd succeed')
    return output.returncode

def is_port_in_use(port: int) -> bool:
    import socket
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        return s.connect_ex(('localhost', port)) == 0

def get_storage(juicefs, meta_url):
    output = subprocess.run([juicefs, 'status', meta_url], check=True, stdout=subprocess.PIPE).stdout.decode()
    if 'get timestamp too slow' in output: 
        # remove the first line caust it is tikv log message
        output = '\n'.join(output.split('\n')[1:])
    print(f'status output: {output}')
    storage = json.loads(output.replace("'", '"'))['Setting']['Storage']
    return storage

if __name__ == "__main__":
    run_jfs_cmd(['./juicefs-1.1.0-dev', 'rmr', '/tmp/sync-test/file_to_rmr', '--debug'])

================================================
FILE: .github/scripts/wins_fs_test.py
================================================
import os
import sys
import time
import shutil
import random
import string
import threading
import unittest
from pathlib import Path

class WindowsFSTest(unittest.TestCase):
    def setUp(self):
        self.test_dir = "Z:\\test_fs"
        self.ensure_clean_dir(self.test_dir)
        
    def tearDown(self):
        if os.path.exists(self.test_dir):
            shutil.rmtree(self.test_dir, ignore_errors=True)
    
    def ensure_clean_dir(self, path):
        if os.path.exists(path):
            shutil.rmtree(path, ignore_errors=True)
        os.makedirs(path)
    
    def random_string(self, length=10):
        return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
    
    def test_basic_operations(self):
        test_file = os.path.join(self.test_dir, "test.txt")
        content = "Hello, Windows!"
        with open(test_file, 'w') as f:
            f.write(content)
        
        with open(test_file, 'r') as f:
            self.assertEqual(f.read(), content)
        
        new_file = os.path.join(self.test_dir, "new.txt")
        os.rename(test_file, new_file)
        self.assertTrue(os.path.exists(new_file))
        
        os.remove(new_file)
        self.assertFalse(os.path.exists(new_file))
    
    def test_rename_case_change(self):
        test_file = os.path.join(self.test_dir, "a")
        content = "Hello, Windows!"
        with open(test_file, 'w') as f:
            f.write(content)
        new_file_lower = os.path.join(self.test_dir, "A")
        os.rename(test_file, new_file_lower)
        self.assertTrue(os.path.exists(new_file_lower))
        new_file_upper = os.path.join(self.test_dir, "a")
        os.rename(new_file_lower, new_file_upper)
        self.assertTrue(os.path.exists(new_file_upper))
        os.remove(new_file_upper)

    def test_directory_operations(self):
        nested_dir = os.path.join(self.test_dir, "dir1", "dir2", "dir3")
        os.makedirs(nested_dir)
        
        self.assertTrue(os.path.exists(nested_dir))
        
        test_file = os.path.join(nested_dir, "test.txt")
        Path(test_file).touch()
        
        files = list(Path(self.test_dir).rglob("*"))
        self.assertTrue(len(files) > 0)
        
        new_dir = os.path.join(self.test_dir, "new_dir")
        shutil.move(os.path.join(self.test_dir, "dir1"), new_dir)
        self.assertTrue(os.path.exists(new_dir))
    
    def test_concurrent_operations(self):
        file_count = 10
        thread_count = 5
        
        def write_files(start_idx):
            for i in range(start_idx, start_idx + file_count):
                file_path = os.path.join(self.test_dir, f"concurrent_{i}.txt")
                with open(file_path, 'w') as f:
                    f.write(self.random_string(100))
        
        threads = []
        for i in range(thread_count):
            t = threading.Thread(target=write_files, args=(i * file_count,))
            threads.append(t)
            t.start()
        
        for t in threads:
            t.join()
        
        files = os.listdir(self.test_dir)
        self.assertEqual(len(files), file_count * thread_count)
    
    def test_special_characters(self):
        special_chars = [
            "test with spaces",
            "test_with_unicode_中文",
            "test_with_symbols_!@#$%",
            "test.with.multiple.dots"
        ]
        
        for name in special_chars:
            file_path = os.path.join(self.test_dir, name)
            with open(file_path, 'w') as f:
                f.write("test")
            self.assertTrue(os.path.exists(file_path))
            with open(file_path, 'r') as f:
                self.assertEqual(f.read(), "test")
    
    def test_large_files(self):
        large_file = os.path.join(self.test_dir, "large_file.dat")
        size_mb = 10
        chunk_size = 1024 * 1024  # 1MB
        
        with open(large_file, 'wb') as f:
            for _ in range(size_mb):
                f.write(os.urandom(chunk_size))
        
        self.assertEqual(os.path.getsize(large_file), size_mb * chunk_size)
        
        with open(large_file, 'rb') as f:
            chunks = 0
            while f.read(chunk_size):
                chunks += 1
        self.assertEqual(chunks, size_mb)
    
    def test_file_attributes(self):
        test_file = os.path.join(self.test_dir, "attrs.txt")
        with open(test_file, 'w') as f:
            f.write("test")
        
        
        os.system(f'attrib +R "{test_file}"')
        
        with self.assertRaises(PermissionError):
            with open(test_file, 'w') as f:
                f.write("new content")
        
        os.system(f'attrib -R "{test_file}"')
    @unittest.skip("Windows Do not support")
    def test_symlinks(self):
        source_dir_root = os.path.join(self.test_dir, "source")
        link_dir_root = os.path.join(self.test_dir, "links")
        os.makedirs(source_dir_root)
        os.makedirs(link_dir_root)
        
        source_file = os.path.join(source_dir_root, "source_file.txt")
        source_dir = os.path.join(source_dir_root, "source_dir")
        with open(source_file, 'w') as f:
            f.write("test content")
        os.makedirs(source_dir)
        
        link_file = os.path.join(link_dir_root, "link_file.txt")
        os.symlink(source_file, link_file)
        self.assertTrue(os.path.exists(link_file))
        with open(link_file, 'r') as f:
            self.assertEqual(f.read(), "test content")
            
        link_dir = os.path.join(link_dir_root, "link_dir")
        os.symlink(source_dir, link_dir, target_is_directory=True)
        self.assertTrue(os.path.exists(link_dir))
        
        link_test_file = os.path.join(link_dir, "test.txt")
        with open(link_test_file, 'w') as f:
            f.write("test through link")
        
        source_test_file = os.path.join(source_dir, "test.txt")
        self.assertTrue(os.path.exists(source_test_file))
        with open(source_test_file, 'r') as f:
            self.assertEqual(f.read(), "test through link")
        
        os.remove(source_file)
        self.assertFalse(os.path.exists(link_file))

    def test_long_paths(self):
        deep_dir = self.test_dir
        for i in range(10):  
            deep_dir = os.path.join(deep_dir, f"dir_{i}")
        
        os.makedirs(deep_dir, exist_ok=True)
        test_file = os.path.join(deep_dir, "test.txt")
        
        with open(test_file, 'w') as f:
            f.write("test")
        
        self.assertTrue(os.path.exists(test_file))

if __name__ == '__main__':
    unittest.main(verbosity=2)

================================================
FILE: .github/workflows/bash/rm_fs
================================================
gf01 growfiles -W gf01 -b -e 1 -u -i 0 -L 20 -w -C 1 -l -I r -T 10 -f glseek20 -S 2 -d $TMPDIR
gf02 growfiles -W gf02 -b -e 1 -L 10 -i 100 -I p -S 2 -u -f gf03_ -d $TMPDIR
gf03 growfiles -W gf03 -b -e 1 -g 1 -i 1 -S 150 -u -f gf05_ -d $TMPDIR
gf04 growfiles -W gf04 -b -e 1 -g 4090 -i 500 -t 39000 -u -f gf06_ -d $TMPDIR
gf05 growfiles -W gf05 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -u -f gf07_ -d $TMPDIR
gf06 growfiles -W gf06 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -f g_rand10 -S 2 -d $TMPDIR
gf07 growfiles -W gf07 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -I p -f g_rand13 -S 2 -d $TMPDIR
gf08 growfiles -W gf08 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -f g_rand11 -S 2 -d $TMPDIR
gf09 growfiles -W gf09 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -I p -f g_rand12 -S 2 -d $TMPDIR
gf10 growfiles -W gf10 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I l -f g_lio14 -S 2 -d $TMPDIR
gf11 growfiles -W gf11 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I L -f g_lio15 -S 2 -d $TMPDIR
gf12 mkfifo $TMPDIR/gffifo17; growfiles -b -W gf12 -e 1 -u -i 0 -L 30 $TMPDIR/gffifo17
gf13 mkfifo $TMPDIR/gffifo18; growfiles -b -W gf13 -e 1 -u -i 0 -L 30 -I r -r 1-4096 $TMPDIR/gffifo18
gf14 growfiles -W gf14 -b -e 1 -u -i 0 -L 20 -w -l -C 1 -T 10 -f glseek19 -S 2 -d $TMPDIR
gf15 growfiles -W gf15 -b -e 1 -u -r 1-49600 -I r -u -i 0 -L 120 -f Lgfile1 -d $TMPDIR
gf16 growfiles -W gf16 -b -e 1 -i 0 -L 120 -u -g 4090 -T 101 -t 408990 -l -C 10 -c 1000 -S 10 -f Lgf02_ -d $TMPDIR
gf17 growfiles -W gf17 -b -e 1 -i 0 -L 120 -u -g 5000 -T 101 -t 499990 -l -C 10 -c 1000 -S 10 -f Lgf03_ -d $TMPDIR
gf18 growfiles -W gf18 -b -e 1 -i 0 -L 120 -w -u -r 10-5000 -I r -l -S 2 -f Lgf04_ -d $TMPDIR
gf19 growfiles -W gf19 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -o O_RDWR,O_CREAT,O_TRUNC -u -f gf08i_ -d $TMPDIR
gf20 growfiles -W gf20 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1-256000:512 -R 512-256000 -T 4 -f gfbigio-$$ -d $TMPDIR
gf21 growfiles -W gf21 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bld-$$ -d $TMPDIR
gf22 growfiles -W gf22 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bldf-$$ -d $TMPDIR
gf23 growfiles -W gf23 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 512-64000:1024 -R 1-384000 -T 4 -f gf-inf-$$ -d $TMPDIR
gf24 growfiles -W gf24 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -f gf-jbld-$$ -d $TMPDIR
gf25 growfiles -W gf25 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1024000-2048000:2048 -R 4095-2048000 -T 1 -f gf-large-gs-$$ -d $TMPDIR
gf26 growfiles -W gf26 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 128-32768:128 -R 512-64000 -T 4 -f gfsmallio-$$ -d $TMPDIR
gf27 growfiles -W gf27 -b -D 0 -w -g 8b -C 1 -b -i 1000 -u -f gfsparse-1-$$ -d $TMPDIR
gf28 growfiles -W gf28 -b -D 0 -w -g 16b -C 1 -b -i 1000 -u -f gfsparse-2-$$ -d $TMPDIR
gf29 growfiles -W gf29 -b -D 0 -r 1-4096 -R 0-33554432 -i 0 -L 60 -C 1 -u -f gfsparse-3-$$ -d $TMPDIR
gf30 growfiles -W gf30 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -o O_RDWR,O_CREAT,O_SYNC -g 20480 -T 10 -t 20480 -f gf-sync-$$ -d $TMPDIR
rwtest01 export LTPROOT; rwtest -N rwtest01 -c -q -i 60s  -f sync 10%25000:$TMPDIR/rw-sync-$$
rwtest02 export LTPROOT; rwtest -N rwtest02 -c -q -i 60s  -f buffered 10%25000:$TMPDIR/rw-buffered-$$
rwtest03 export LTPROOT; rwtest -N rwtest03 -c -q -i 60s -n 2  -f buffered -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-buff-$$
rwtest04 export LTPROOT; rwtest -N rwtest04 -c -q -i 60s -n 2  -f sync -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-sync-$$
rwtest05 export LTPROOT; rwtest -N rwtest05 -c -q -i 50 -T 64b 500b:$TMPDIR/rwtest01%f
iogen01 export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$
quota_remount_test01 quota_remount_test01.sh
isofs isofs.sh
fs_fill fs_fill

================================================
FILE: .github/workflows/bash/rm_list.sh
================================================
#!/bin/bash
LIST=`cat $1`

for LINE in $LIST; do
      # should remove empty line and comment line
      sed -i -e "\!^${LINE}.*!d" -e "\!^#!d" -e "\!^\s*\$!d" $2
done


================================================
FILE: .github/workflows/bash/rm_syscalls
================================================
alarm02 alarm02
alarm03 alarm03
alarm05 alarm05
alarm06 alarm06
alarm07 alarm07
bind01 bind01
bind02 bind02
bind03 bind03
bind04 bind04
bind05 bind05
bind06 bind06
bpf_prog05 bpf_prog05
cacheflush01 cacheflush01
chown01_16 chown01_16
chown02_16 chown02_16
chown03_16 chown03_16
chown04_16 chown04_16
chown05_16 chown05_16
clock_adjtime01 clock_adjtime01
clock_adjtime02 clock_adjtime02
clock_getres01 clock_getres01
clock_nanosleep01 clock_nanosleep01
clock_nanosleep02 clock_nanosleep02
clock_nanosleep03 clock_nanosleep03
clock_nanosleep04 clock_nanosleep04
clock_gettime01 clock_gettime01
clock_gettime02 clock_gettime02
clock_gettime03 clock_gettime03
clock_gettime04 clock_gettime04
leapsec01 leapsec01
clock_settime01 clock_settime01
clock_settime02 clock_settime02
clock_settime03 clock_settime03
close_range01 close_range01
close_range02 close_range02
fallocate06 fallocate06
fanotify01 fanotify01
fanotify02 fanotify02
fanotify03 fanotify03
fanotify04 fanotify04
fanotify05 fanotify05
fanotify06 fanotify06
fanotify07 fanotify07
fanotify08 fanotify08
fanotify09 fanotify09
fanotify10 fanotify10
fanotify11 fanotify11
fanotify12 fanotify12
fanotify13 fanotify13
fanotify14 fanotify14
fanotify15 fanotify15
fanotify16 fanotify16
fanotify17 fanotify17
fanotify18 fanotify18
fanotify19 fanotify19
fchown01_16 fchown01_16
fchown02_16 fchown02_16
fchown03_16 fchown03_16
fchown04_16 fchown04_16
fchown05_16 fchown05_16
fcntl06 fcntl06
fcntl06_64 fcntl06_64
fork01 fork01
fork02 fork02
fork03 fork03
fork04 fork04
fork05 fork05
fork06 fork06
fork07 fork07
fork08 fork08
fork09 fork09
fork10 fork10
fork11 fork11
fork13 fork13 -i 1000000
fork14 fork14
getegid01_16 getegid01_16
getegid02_16 getegid02_16
geteuid01_16 geteuid01_16
geteuid02_16 geteuid02_16
getgid01_16 getgid01_16
getgid03_16 getgid03_16
getgroups01_16 getgroups01_16
getgroups03_16 getgroups03_16
getresgid01_16 getresgid01_16
getresgid02_16 getresgid02_16
getresgid03_16 getresgid03_16
getresuid01_16 getresuid01_16
getresuid02_16 getresuid02_16
getresuid03_16 getresuid03_16
getrusage04 getrusage04
gettimeofday01 gettimeofday01
gettimeofday02 gettimeofday02
getuid01_16 getuid01_16
getuid03_16 getuid03_16
ioctl03      ioctl03
ioctl_sg01 ioctl_sg01
fanotify16 fanotify16
fanotify18 fanotify18
fanotify19 fanotify19
keyctl01 keyctl01
keyctl02 keyctl02
keyctl03 keyctl03
keyctl04 keyctl04
keyctl05 keyctl05
keyctl06 keyctl06
keyctl07 keyctl07
keyctl08 keyctl08
kill02 kill02
kill03 kill03
kill05 kill05
kill06 kill06
kill07 kill07
kill08 kill08
kill09 kill09
kill10 kill10
kill11 kill11
kill12 kill12
kill13 kill13
lchown01_16 lchown01_16
lchown02_16 lchown02_16
lchown03_16 lchown03_16
mbind02 mbind02
mbind03 mbind03
mbind04 mbind04
migrate_pages02 migrate_pages02
migrate_pages03 migrate_pages03
modify_ldt01 modify_ldt01
modify_ldt02 modify_ldt02
modify_ldt03 modify_ldt03
move_pages01 move_pages01
move_pages02 move_pages02
move_pages03 move_pages03
move_pages04 move_pages04
move_pages05 move_pages05
move_pages06 move_pages06
move_pages07 move_pages07
move_pages09 move_pages09
move_pages10 move_pages10
move_pages11 move_pages11
move_pages12 move_pages12
msgctl05 msgctl05
msgstress04 msgstress04
nanosleep01 nanosleep01
nanosleep02 nanosleep02
nanosleep04 nanosleep04
openat201 openat201
openat202 openat202
openat203 openat203
madvise06 madvise06
madvise09 madvise09
pselect01 pselect01
pselect01_64 pselect01_64
ptrace04 ptrace04
quotactl01 quotactl01
quotactl04 quotactl04
quotactl06 quotactl06
readdir21 readdir21
recvmsg03 recvmsg03
rt_sigaction01 rt_sigaction01
rt_sigaction02 rt_sigaction02
rt_sigaction03 rt_sigaction03
rt_sigprocmask01 rt_sigprocmask01
rt_sigprocmask02 rt_sigprocmask02
rt_sigqueueinfo01 rt_sigqueueinfo01
rt_sigsuspend01 rt_sigsuspend01
rt_sigtimedwait01 rt_sigtimedwait01
rt_tgsigqueueinfo01 rt_tgsigqueueinfo01
sbrk03 sbrk03
select02 select02
semctl08 semctl08
semctl09 semctl09
sendfile09_64 sendfile09_64
set_mempolicy01 set_mempolicy01
set_mempolicy02 set_mempolicy02
set_mempolicy03 set_mempolicy03
set_mempolicy04 set_mempolicy04
set_thread_area01 set_thread_area01
setfsgid01_16 setfsgid01_16
setfsgid02_16 setfsgid02_16
setfsgid03_16 setfsgid03_16
setfsuid01_16 setfsuid01_16
setfsuid02_16 setfsuid02_16
setfsuid03_16 setfsuid03_16
setfsuid04_16 setfsuid04_16
setgid01_16 setgid01_16
setgid02_16 setgid02_16
setgid03_16 setgid03_16
sgetmask01 sgetmask01
setgroups01_16 setgroups01_16
setgroups02_16 setgroups02_16
setgroups03_16 setgroups03_16
setgroups04_16 setgroups04_16
setregid01_16 setregid01_16
setregid02_16 setregid02_16
setregid03_16 setregid03_16
setregid04_16 setregid04_16
setresgid01_16 setresgid01_16
setresgid02_16 setresgid02_16
setresgid03_16 setresgid03_16
setresgid04_16 setresgid04_16
setresuid01_16 setresuid01_16
setresuid02_16 setresuid02_16
setresuid03_16 setresuid03_16
setresuid04_16 setresuid04_16
setresuid05_16 setresuid05_16
setreuid01_16 setreuid01_16
setreuid02_16 setreuid02_16
setreuid03_16 setreuid03_16
setreuid04_16 setreuid04_16
setreuid05_16 setreuid05_16
setreuid06_16 setreuid06_16
setreuid07_16 setreuid07_16
setsockopt06 setsockopt06
setsockopt07 setsockopt07
setuid01_16 setuid01_16
setuid03_16 setuid03_16
setuid04_16 setuid04_16
shmctl05 shmctl05
shmctl06 shmctl06
socketcall01 socketcall01
socketcall02 socketcall02
socketcall03 socketcall03
ssetmask01 ssetmask01
swapoff01 swapoff01
swapoff02 swapoff02
swapon01 swapon01
swapon02 swapon02
swapon03 swapon03
switch01 endian_switch01
sysinfo03 sysinfo03
syslog01 syslog01
syslog02 syslog02
syslog03 syslog03
syslog04 syslog04
syslog05 syslog05
syslog06 syslog06
syslog07 syslog07
syslog08 syslog08
syslog09 syslog09
syslog10 syslog10
syslog11 syslog11
syslog12 syslog12
times03 times03
timerfd04 timerfd04
timerfd_settime02 timerfd_settime02
perf_event_open02 perf_event_open02
statx07 statx07
io_uring02 io_uring02
ioctl_loop05 ioctl_loop05
# all local filesystems
chdir01 chdir01
copy_file_range01 copy_file_range01
fallocate04 fallocate04
fallocate05 fallocate05
fdatasync03 fdatasync03
fgetxattr01 fgetxattr01
fremovexattr01 fremovexattr01
fremovexattr02 fremovexattr02
fsconfig01 fsconfig01
fsetxattr01 fsetxattr01
fsmount01 fsmount01
fsmount02 fsmount02
fsopen01 fsopen01
fspick01 fspick01
fspick02 fspick02
fsync01 fsync01
fsync04 fsync04
lremovexattr01 lremovexattr01
move_mount01 move_mount01
move_mount02 move_mount02
msync04 msync04
open_tree01 open_tree01
open_tree02 open_tree02
preadv03 preadv03
preadv03_64 preadv03_64
preadv203 preadv203
preadv203_64 preadv203_64
pwritev03 pwritev03
pwritev03_64 pwritev03_64
setxattr01 setxattr01
statx04 statx04
sync01 sync01
sync_file_range02 sync_file_range02
syncfs01 syncfs01
utime03 utime03
writev03 writev03
# cross mount (may fail on multi-zones meta)
inotify03 inotify03
inotify07 inotify07
inotify08 inotify08
lchown03  lchown03
linkat02 linkat02
madvise01 madvise01
mknod07 mknod07
mknodat02 mknodat02
mmap16 mmap16
mount03 mount03
mount05 mount05
mount06 mount06
open12 open12
pivot_root01 pivot_root01
readahead02 readahead02
rename11 rename11
renameat01 renameat01
statx05 statx05
umount01 umount01
umount02 umount02
umount03 umount03
umount2_01 umount2_01
umount2_02 umount2_02
umount2_03 umount2_03
utime06 utime06
# not supported
ioctl_loop05 ioctl_loop05
fcntl17 fcntl17
fcntl17_64 fcntl17_64
setxattr03 setxattr03
getxattr05 getxattr05
# not stable
finit_module02 finit_module02
msgstress03 msgstress03
kill11 kill11
# failed after upgrade github runner from ubuntu 20.04 to 22.04
inotify02 inotify02
ioprio_set03 ioprio_set03

================================================
FILE: .github/workflows/cache.yml
================================================
name: "cache"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/cache.yml'
      - '**/cache.sh'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/cache.yml'
      - '**/cache.sh'
  schedule:
    - cron:  '30 20 * * *'
  workflow_dispatch:

jobs:
  cache:
    timeout-minutes: 60
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Remove unused software
        timeout-minutes: 3
        continue-on-error: true
        run: |
          echo "before remove unused software"
          sudo df -h
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
          echo "after remove unused software"
          sudo df -h

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build

      - name: Test
        run: |
          sudo GOCOVERDIR=$(pwd)/cover .github/scripts/cache.sh

      - name: Check juicefs log
        if: always()
        run: |
          sudo .github/scripts/check_juicefs_log.sh
      
      - name: Check /tmp/juicefs.log
        if: always()
        run: |
          [[ -f /tmp/juicefs.log ]] && sudo tail -n 1000 /tmp/juicefs.log || true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"  
      
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/cancel_outdate_runs.yml
================================================
name: cancel_outdate_runs
on:
  pull_request:
    branches:
      - main
      - release**

jobs:
  cancel-outdate-runs:
    if: github.event.pull_request.head.repo.full_name == github.repository
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v3
        with :
          fetch-depth: 1

      - name: mount jfs dir
        timeout-minutes: 3
        run: |
          sudo mkdir -p /root/.juicefs
          sudo wget -q s.juicefs.com/static/Linux/mount -O /root/.juicefs/jfsmount 
          sudo chmod +x /root/.juicefs/jfsmount
          sudo curl -s -L https://juicefs.com/static/juicefs -o /usr/local/bin/juicefs && sudo chmod +x /usr/local/bin/juicefs
          sudo juicefs auth ci-coverage --access-key ${{ secrets.CI_COVERAGE_AWS_AK }} --secret-key ${{ secrets.CI_COVERAGE_AWS_SK }} --token ${{ secrets.CI_COVERAGE_AWS_TOKEN }} --encrypt-keys
          sudo juicefs mount ci-coverage --subdir juicefs/cancel-outdate-runs /jfs --allow-other
            
      - name: Get previous head_sha
        timeout-minutes: 1
        run: |
          echo get previous head sha from /jfs/${{ github.event.pull_request.number }}/head_sha
          previous_head_sha=/jfs/${{ github.event.pull_request.number }}/head_sha
          if [ ! -f ${previous_head_sha} ]; then
            echo "no previous head sha found"
            exit 0
          else
            previous_head_sha=$(cat ${previous_head_sha})
            echo "previous head sha is ${previous_head_sha}"
            echo "previous_head_sha=${previous_head_sha}" >> $GITHUB_ENV
          fi

      - name: Save head_sha 
        timeout-minutes: 1
        run: |
          mkdir -p /jfs/${{ github.event.pull_request.number }}
          echo ${{ github.event.pull_request.head.sha }} | tee /jfs/${{ github.event.pull_request.number }}/head_sha
          echo save head sha to /jfs/${{ github.event.pull_request.number }}/head_sha
      
      - name : Cancel Outdate Runs
        uses: ./.github/actions/cancel-outdate-runs
        with: 
          per_page: 8
          page: 1
          head_sha: ${{ env.previous_head_sha }}
          github_token: ${{ secrets.GITHUB_TOKEN }}

      - name: Wait Runs Cancelled
        run: |
          sleep 10s

      - name : Cancel Outdate Runs
        uses: ./.github/actions/cancel-outdate-runs
        with: 
          per_page: 8
          page: 1
          head_sha: ${{ env.previous_head_sha }}
          github_token: ${{ secrets.GITHUB_TOKEN }}

================================================
FILE: .github/workflows/chaos.yml
================================================
name: "chaos-test"

on:
  push:
    branches:
      - 'release-**'
      - 'main'
    paths:
      - '**/chaos.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/chaos.yml'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron:  '0 20 * * *'        

jobs:
  chaos-test:
    timeout-minutes: 60
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: false
      matrix:
        # chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth", "redis-bandwidth", "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"]
        chaos: ["minio-io", "minio-memory", "minio-cpu", "minio-bandwidth",  "redis-io", "redis-delay", "redis-memory", "redis-cpu", "juicefs-bandwidth", "juicefs-memory", "juicefs-cpu", "juicefs-delay"]
        # chaos: ["minio-io"]
    steps:        
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - uses: actions/setup-go@v3
        with:
          go-version: 'oldstable'
          cache: true

      - name: Build
        timeout-minutes: 10
        run: | 
          sudo .github/scripts/apt_install.sh musl-tools upx-ucl
          export STATIC=1
          make juicefs

      - name: Creating kind cluster
        uses: helm/kind-action@v1.5.0

      - name: Print cluster information
        run: |
          kubectl config view
          kubectl cluster-info
          kubectl get nodes
          kubectl get pods -n kube-system
          helm version
          kubectl version

      # - name: Build And Load CSI Docker Image
      #   run: |
      #     echo GITHUB_REF is $GITHUB_REF
      #     echo GITHUB_SHA is $GITHUB_SHA
      #     helm repo add juicefs https://juicedata.github.io/charts/
      #     helm repo update
      #     APP_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $3}')
      #     echo APP_VERSION is $APP_VERSION
      #     docker build --build-arg GITHUB_REF=$GITHUB_REF --build-arg GITHUB_SHA=$GITHUB_SHA -f .github/scripts/chaos/juicefs-csi-driver.Dockerfile -t juicedata/juicefs-csi-driver:v$APP_VERSION .
      #     kind load docker-image juicedata/juicefs-csi-driver:v$APP_VERSION --name chart-testing

      - name: Build And Load CSI Docker Image
        run: |
          version=`./juicefs version |awk '{print $3}' | cut -d '-' -f1`
          docker build -f .github/scripts/chaos/juicefs.Dockerfile -t juicedata/mount:ce-v${version} .
          helm repo add juicefs https://juicedata.github.io/charts/
          helm repo update
          kind load docker-image juicedata/mount:ce-v${version} --name chart-testing
          
      - name: Install JuiceFS CSI Driver
        run: |
          CHART_VERSION=$(helm search repo juicefs/juicefs-csi-driver --versions | grep juicefs | head -1 | awk -F" " '{print $2}')
          echo CHART_VERSION is $CHART_VERSION
          helm install juicefs-csi-driver juicefs/juicefs-csi-driver -n kube-system --version $CHART_VERSION
          kubectl -n kube-system get pods -l app.kubernetes.io/name=juicefs-csi-driver

      - name: Deploy redis
        run: |
          kubectl apply -f .github/scripts/chaos/redis.yaml

      - name: Deploy minio
        run: |
          rm -rf /data/minio-data/*
          kubectl apply -f .github/scripts/chaos/minio.yaml

      - name: Mount Juicefs 
        run: |
          version=`./juicefs version |awk '{print $3}' | cut -d '-' -f1`
          sed -i "s/mount:ci/mount:ce-v$version/" .github/scripts/chaos/sc.yaml
          kubectl apply -f .github/scripts/chaos/sc.yaml
          kubectl apply -f .github/scripts/chaos/pvc.yaml

      - name: Start vdbenh
        run: |
          kubectl apply -f .github/scripts/chaos/dynamic.yaml

      - name: Install Chaos Mesh
        run: |
          helm version
          kubectl version
          helm repo add chaos-mesh https://charts.chaos-mesh.org
          kubectl create ns chaos-mesh
          helm install chaos-mesh chaos-mesh/chaos-mesh -n=chaos-mesh --version 2.5.1 \
            --set chaosDaemon.runtime=containerd \
            --set chaosDaemon.socketPath=/run/containerd/containerd.sock \
            --set controllerManager.replicaCount=1
          echo "wait pod status to running"
          for ((k=0; k<120; k++)); do
              kubectl get pods --namespace chaos-mesh -l app.kubernetes.io/instance=chaos-mesh > pods.status
              cat pods.status

              run_num=`grep Running pods.status | wc -l`
              pod_num=$((`cat pods.status | wc -l` - 1))
              if [ $run_num == $pod_num ]; then
                  break
              fi

              sleep 1
          done

      - name: Run chaos mesh action
        run: | 
          chaos=${{matrix.chaos}}
          sed -i "s/# - $chaos/- $chaos/g" .github/scripts/chaos/workflow.yaml 
          cat .github/scripts/chaos/workflow.yaml 
          kubectl apply -f .github/scripts/chaos/workflow.yaml 

      - name: Verify 
        run: |
          for i in {1..1200}; do 
            if kubectl get pods --all-namespaces | grep dynamic-ce | grep -i "Completed"; then
              echo "dynamic-ce is completed in $i seconds"
              break
            else
              if [ $((i % 10)) -eq 0 ]; then
                echo "dynamic-ce is not completed in $i seconds"
              fi
              sleep 1
            fi
          done
          kubectl get pods --all-namespaces
          apps=("dynamic-ce" "juicefs-csi-node" "juicefs-csi-controller" "juicefs-chart-testing-control-plane-pvc"  "redis" "minio")
          for app in ${apps[@]}; do
            echo app is $app
            kubectl get pods --all-namespaces | grep $app | grep -i "Running\|Completed"
            if [ $? != 0 ]; then
              echo status of $app is not expected.
              exit 1
            fi
          done
      
      - name: Check mount pod
        if: always()
        run: |
          POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep juicefs-chart-testing-control-plane-pvc)
          echo POD_NAME is $POD_NAME
          for pod in $POD_NAME;do
            kubectl -n kube-system describe po $pod
            kubectl logs -n kube-system $pod > juicefs.log
            cat juicefs.log
            grep "<FATAL>:" juicefs.log | grep -v format.go && exit 1 || true
          done

      - name: Mount pod upgrade
        timeout-minutes: 5
        run: |
          chaos=${{matrix.chaos}}
          skip_conditions=("minio-io")
          if [[ "${skip_conditions[*]}" =~ "$chaos" ]]; then
            echo "skip mount pod upgrade"
            exit 0
          else
            CSI_POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep juicefs-csi-node)  
            PVC_POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep juicefs-chart-testing-control-plane-pvc)
            kubectl exec $CSI_POD_NAME -n kube-system -- juicefs-csi-driver upgrade $PVC_POD_NAME 2>&1 | tee upgrade.log
            sleep 5
            if ! grep "SUCCESS" upgrade.log;then exit -1;fi
            rm upgrade.log
            kubectl exec $CSI_POD_NAME -n kube-system -- juicefs-csi-driver upgrade $PVC_POD_NAME --restart 2>&1 | tee upgrade.log || true  
            sleep 5
          fi
          kubectl delete -f .github/scripts/chaos/workflow.yaml

      - name: Check csi controller log
        if: always()
        run: |
          kubectl describe pvc dynamic-ce
          kubectl -n kube-system get po -l app=juicefs-csi-controller
          kubectl -n kube-system logs juicefs-csi-controller-0 juicefs-plugin

      - name: Check csi node log
        if: always()
        run: |
          POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep juicefs-csi-node)
          echo POD_NAME is $POD_NAME
          kubectl -n kube-system describe po $POD_NAME
          kubectl -n kube-system logs $POD_NAME -c juicefs-plugin > csi_node.log
          cat csi_node.log
          # grep -i "error" csi_node.log && exit 1 || true

      - name: Check mount point pod
        if: always()
        run: |
          POD_NAME=$(kubectl get pods -n kube-system | grep juicefs-chart-testing-control-plane-pvc | grep Running | awk '{print $1}')
          echo POD_NAME is $POD_NAME
          for pod in $POD_NAME;do
            kubectl -n kube-system describe po $pod
            kubectl logs -n kube-system $pod > juicefs.log
            cat juicefs.log
            grep "<FATAL>:" juicefs.log | grep -v format.go && exit 1 || true
          done

      - name: Check vdbench log
        if: always()
        run: | 
          POD_NAME=$(kubectl get pods -n default -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep dynamic-ce )
          echo POD_NAME is $POD_NAME
          kubectl -n default describe po $POD_NAME
          kubectl logs -n default $POD_NAME > vdbench.log
          cat vdbench.log
          # grep -i "error" vdbench.log && exit 1 || true

      - name: Check Redis log
        if: always()
        run: | 
          POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep redis )
          echo POD_NAME is $POD_NAME
          kubectl -n kube-system describe po $POD_NAME
          kubectl logs -n kube-system $POD_NAME > redis.log
          cat redis.log
          # grep -i "error" redis.log && exit 1 || true

      - name: Check Minio log
        if: always()
        run: | 
          POD_NAME=$(kubectl get pods -n kube-system -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}'  | grep minio )
          echo POD_NAME is $POD_NAME
          kubectl -n kube-system describe po $POD_NAME
          kubectl logs -n kube-system $POD_NAME > minio.log
          cat minio.log
          # grep -i "error" minio.log && exit 1 || true

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [chaos-test]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch' 
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success() 
        run: echo "All Done"


================================================
FILE: .github/workflows/check-doc.yaml
================================================
name: Check document

on:
  push:
    branches: [main]
    paths:
      - 'README*.md'
      - 'docs/**'
      - 'package.json'
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - '.github/workflows/check-doc.yaml'
  pull_request:
    branches: [main]
    paths:
      - 'README*.md'
      - 'docs/**'
      - 'package.json'
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - '.github/workflows/check-doc.yaml'

jobs:
  check-doc:
    name: Check document
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Use Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '18.x'
          cache: 'npm'
      - name: Install dependencies
        run: |
          npm ci
      - name: Lint Markdown files (markdownlint)
        run: |
          npm run markdown-lint
      - name: Lint Markdown files (autocorrect)
        uses: huacnlee/autocorrect-action@main
        with:
          args: --lint --no-diff-bg-color ./docs/
      - name: Check broken link (including broken anchor)
        run: |
          npm run check-broken-link


================================================
FILE: .github/workflows/codeql-analysis.yml
================================================
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"

on:
  push:
    branches: [ main ]
    paths-ignore:
      - 'docs/**'
#  pull_request:
#    The branches below must be a subset of the branches above
#    branches: [ main ]
  schedule:
    - cron: '28 20 * * 0'
    
  workflow_dispatch:

jobs:
  analyze:
    name: Analyze
    timeout-minutes: 30
    runs-on: ubuntu-22.04
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: ['java','go']
        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
        # Learn more:
        # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed

    steps:
    - name: Checkout repository
      uses: actions/checkout@v3

    - name: Set up Java
      uses: actions/setup-java@v3
      with:
        distribution: 'temurin'
        java-version: '8'

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v2
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.
        # queries: ./path/to/local/query, your-org/your-repo/queries@main

    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    #- name: Autobuild
    #  uses: github/codeql-action/autobuild@v2

    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl

    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
    #    and modify them (or add more) to build your code if your project
    #    uses a compiled language
    - if: matrix.language == 'go'
      name: Autobuild
      uses: github/codeql-action/autobuild@v2

    - if: matrix.language == 'java'
      name: build-java
      run: mvn clean package -Dmaven.test.skip=true
      working-directory: sdk/java/
    #- run: |
    #   make bootstrap
    #   make release

    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v2

    - name: Send Slack Notification
      if: failure()
      uses: juicedata/slack-notify-action@main
      with:
        channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
        slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"


================================================
FILE: .github/workflows/command-win.yml
================================================
name: "command-win"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/command-win.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/command-win.yml'
  workflow_dispatch:
    inputs:
      debug_enabled:
        type: boolean
        description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)'
        required: false
        default: false
  schedule:
    - cron: '0 17 * * 0'

jobs:
  command-win:
    runs-on: windows-2022
    env:
      Actions_Allow_Unsecure_Commands: true
    steps:
      - name: Set up Go
        uses: actions/setup-go@v3
        with:
          go-version: '1.21'

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Setup MSBuild.exe
        if: false
        uses: microsoft/setup-msbuild@v1.0.3

      - name: Change Winsdk Version
        if: false
        uses: GuillaumeFalourd/setup-windows10-sdk-action@v1
        with:
          sdk-version: 18362

      - name: Download WinFsp
        run: |
          choco install wget
          mkdir "C:\wfsp\"
          wget -O winfsp.msi https://github.com/winfsp/winfsp/releases/download/v2.0/winfsp-2.0.23075.msi
          copy winfsp.msi "C:\wfsp\"
          choco install 7zip -y

      - name: Install WinFsp
        run: |
          # call start-process to install winfsp.msi
          Start-Process -Wait -FilePath "C:\wfsp\winfsp.msi" -ArgumentList "/quiet /norestart"
          ls "C:\Program Files (x86)\WinFsp"
          ls "C:\Program Files (x86)\WinFsp\bin"

      - name: Set up Include Headers
        run: |
          mkdir "C:\WinFsp\inc\fuse"
          copy .\hack\winfsp_headers\* C:\WinFsp\inc\fuse\
          dir "C:\WinFsp\inc\fuse"
          set CGO_CFLAGS=-IC:/WinFsp/inc/fuse
          go env
          go env -w CGO_CFLAGS=-IC:/WinFsp/inc/fuse
          go env

      - name: Install Scoop
        run: |
          dir "C:\Program Files (x86)\WinFsp"
          Set-ExecutionPolicy RemoteSigned -scope CurrentUser
          iwr -useb 'https://raw.githubusercontent.com/scoopinstaller/install/master/install.ps1' -outfile 'install.ps1'
          .\install.ps1 -RunAsAdmin
          echo $env:USERNAME
          scoop
          $redisUrl = "https://github.com/tporadowski/redis/releases/download/v5.0.14.1/Redis-x64-5.0.14.1.zip"
          $redisRoot = Join-Path $env:USERPROFILE "scoop\apps\redis\current"
          New-Item -ItemType Directory -Force -Path $redisRoot | Out-Null
          Invoke-WebRequest -Uri $redisUrl -OutFile redis.zip
          Expand-Archive -Path redis.zip -DestinationPath $redisRoot -Force
          $redisCli = Join-Path $redisRoot "redis-cli.exe"
          if (-not (Test-Path $redisCli)) {
            throw "redis-cli.exe not found after downloading from $redisUrl"
          }
          $env:Path += ";$redisRoot"
          $redisRoot | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "Updated PATH: $env:Path"
          & $redisCli --version
          scoop install minio@2021-12-10T23-03-39Z
          scoop install runasti

      - name: Download winsw
        run: |
          wget https://github.com/winsw/winsw/releases/download/v2.12.0/WinSW-x64.exe -q --show-progress -O winsw.exe
          ls winsw.exe

      - name: Start Redis
        run: |
          copy winsw.exe redis-service.exe
          $redisRoot = Join-Path $env:USERPROFILE "scoop\apps\redis\current"
          $redisExe = Join-Path $redisRoot "redis-server.exe"
          if (-not (Test-Path $redisExe)) {
            throw "redis-server.exe not found: $redisExe"
          }
          @"
          <service>
            <id>redisredis</id>
            <name>redisredis</name>
            <description>redisredis</description>
            <executable>$redisExe</executable>
            <arguments>--bind 127.0.0.1 --port 6379 --save \"\" --appendonly no</arguments>
            <logmode>rotate</logmode>
          </service>
          "@ | Set-Content redis-service.xml -Encoding utf8
          .\redis-service.exe install
          net start redisredis
          Start-Sleep -Seconds 2
          $redisCli = Join-Path $redisRoot "redis-cli.exe"
          & $redisCli -h 127.0.0.1 -p 6379 ping

      - name: Download MinGW
        run: |
          wget https://github.com/niXman/mingw-builds-binaries/releases/download/14.2.0-rt_v12-rev1/x86_64-14.2.0-release-win32-seh-msvcrt-rt_v12-rev1.7z -q --show-progress -O mingw.7z
          7z.exe x mingw.7z -oC:\mingw64
          ls C:\mingw64\bin

      - name: Install Git
        run: |
          if (-not (Get-Command git -ErrorAction SilentlyContinue)) {
              Write-Host "Installing Git..."
              $gitInstaller = "$env:TEMP\Git-Installer.exe"
              Invoke-WebRequest -Uri "https://github.com/git-for-windows/git/releases/download/v2.44.0.windows.1/Git-2.44.0-64-bit.exe" -OutFile $gitInstaller
              
              Start-Process -Wait -FilePath $gitInstaller -ArgumentList "/VERYSILENT", "/NORESTART", "/NOCANCEL", "/SP-", "/CLOSEAPPLICATIONS", "/RESTARTAPPLICATIONS", "/COMPONENTS=""icons,ext\reg\shellhere,assoc,assoc_sh"""
              $env:Path += ";C:\Program Files\Git\bin"
          }
       

      - name: Build Juicefs
        run: |
          $env:CGO_ENABLED=1
          $env:PATH+=";C:\mingw64\bin"
          go build -ldflags="-s -w" -o juicefs.exe .

      - name: Install Python3
        run: |
          choco install python3 -y

      - name: Wins_fs_test
        run: |
          ./juicefs.exe format redis://127.0.0.1:6379/1 myjfs
          $env:PATH+=";C:\Program Files (x86)\WinFsp\bin"
          ./juicefs.exe mount -d redis://127.0.0.1:6379/1 z:
          python3 .github/scripts/wins_fs_test.py

      - name: Test Gc
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/gc.sh
      
      - name: Test Debug
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/debug.sh

      - name: Test dump load
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/dump_load.sh

      - name: Test acl
        timeout-minutes: 10
        shell: bash
        if: false
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/acl.sh

      - name: Test clone
        timeout-minutes: 10
        shell: bash
#        if: false
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/clone.sh

      - name: Test fsck
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/fsck.sh

      - name: Test profile
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/profile.sh
      
      - name: Test gateway
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/gateway.sh

      - name: Test quota
        timeout-minutes: 10
        shell: bash
        run: |
          export PATH="$HOME/scoop/shims:$PATH"
          export LANG=C.UTF-8
          export LC_ALL=C.UTF-8
          META_URL=redis://127.0.0.1:6379/1 .github/scripts/command-win/quota.sh
          
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: mxschmitt/action-tmate@v3
     

================================================
FILE: .github/workflows/command.yml
================================================
name: "command-test"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '.github/scripts/command/*.sh'
      - '**/command.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '.github/scripts/command/*.sh'
      - '**/command.yml'
  schedule:
    - cron:  '30 20 * * *'

  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [ "${{github.event_name}}" == "schedule"  ]; then
            echo 'meta_matrix=["mysql", "redis", "tikv"]' >> $GITHUB_OUTPUT
          elif [ "${{github.event_name}}" == "pull_request"  ]; then
            echo 'meta_matrix=["mysql", "redis", "tikv"]' >> $GITHUB_OUTPUT
          elif [ "${{github.event_name}}" == "workflow_dispatch"  ]; then
            echo 'meta_matrix=["mysql", "redis", "tikv"]' >> $GITHUB_OUTPUT
          elif [[ "${{ github.event_name }}" == "issue_comment" ]] &&
               [[ "${{ github.event.comment.body }}" == *"/run-command-tests"* ]];then 
            echo 'meta_matrix=["mysql", "redis", "tikv"]' >> $GITHUB_OUTPUT
          else
            echo 'meta_matrix=["redis"]' >> $GITHUB_OUTPUT
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  command_test1:
    timeout-minutes: 60
    needs: [build-matrix]
    strategy:
      fail-fast: false
      matrix:
        # meta: [ 'sqlite3', 'redis', 'tikv']
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04
    steps:
      - name: Remove unused software
        shell: bash
        run: |
            echo "before remove unused software"
            sudo df -h
            sudo rm -rf /usr/share/dotnet
            sudo rm -rf /usr/local/lib/android
            sudo rm -rf /opt/ghc
            echo "after remove unused software"
            sudo df -h

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Build 
        timeout-minutes: 10
        uses: ./.github/actions/build
  
      - name: Download Random Test
        run: |
          wget https://juicefs-com-static.oss-cn-shanghai.aliyuncs.com/random-test/random-test
          chmod +x random-test
      
      - name: Test Mount
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/mount.sh

      - name: Test Gc
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/gc.sh      

      - name: Test Config
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/config.sh
    
      - name: Test acl
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/acl.sh     

      - name: Test Clone
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/clone.sh

      - name: Test fsck
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/fsck.sh

      - name: Test Gateway
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/gateway.sh
      
      - name: Test Debug
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/debug.sh
      
      - name: Test Info
        timeout-minutes: 10
        run: |
          free -g
          if [ "${{matrix.meta}}" == "tikv" ]; then
            ps -ef | grep tikv-server || true
          fi
          sudo META=${{matrix.meta}} .github/scripts/command/info.sh

      - name: Test Format
        timeout-minutes: 10
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/format.sh
  
      - name: Log
        if: always()
        run: |
          echo "juicefs log"
          sudo tail -n 1000 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log || true
          
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  command_test2:
    needs: [build-matrix]
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Remove unused software
        run: |
            echo "before remove unused software"
            sudo df -h
            sudo rm -rf /usr/share/dotnet
            sudo rm -rf /usr/local/lib/android
            sudo rm -rf /opt/ghc
            echo "after remove unused software"
            sudo df -h
            
      - name: Build 
        uses: ./.github/actions/build

      - name: Test Quota
        timeout-minutes: 30
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/quota.sh 

      - name: Log
        if: always()
        run: |
          echo "juicefs log"
          sudo tail -n 1000 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  command_test3:
    needs: [build-matrix]
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Build 
        uses: ./.github/actions/build

      - name: Test Graceful upgrade
        timeout-minutes: 30
        run: |
          if [ "${{matrix.meta}}" == "redis" ]; then
            sudo META=${{matrix.meta}} .github/scripts/command/graceful_upgrade.sh
          fi
      
      - name: Test Interface
        timeout-minutes: 20
        run: |
          sudo META=${{matrix.meta}} .github/scripts/command/interface.sh

      - name: Log
        if: always()
        run: |
          if [ "${{matrix.meta}}" == "redis" ]; then
            echo "juicefs log"
            sudo tail -n 1000 /var/log/juicefs.log
            grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          fi
          
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [command_test1, command_test2, command_test3]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/command2.yml
================================================
name: "command-random-test"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '.github/scripts/command/random.sh'
      - '.github/scripts/hypo/command*.py'
      - '**/command2.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '.github/scripts/command/random.sh'
      - '.github/scripts/hypo/command*.py'
      - '**/command2.yml'
  schedule:
    - cron:  '30 20 * * *'

  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [ "${{github.event_name}}" == "schedule"  ]; then
            echo 'meta_matrix=["sqlite3", "mysql", "tikv",  "postgres", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          elif [ "${{github.event_name}}" == "pull_request"  ]; then
            echo 'meta_matrix=["sqlite3"]' >> $GITHUB_OUTPUT
          elif [ "${{github.event_name}}" == "workflow_dispatch"  ]; then
            echo 'meta_matrix=["mysql", "tikv"]' >> $GITHUB_OUTPUT
          else
            echo 'meta_matrix=["mysql", "tikv"]' >> $GITHUB_OUTPUT
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  test:
    needs: [build-matrix]
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Run Example
        timeout-minutes: 60
        run: |
          sudo META1=redis META2=${{matrix.meta}} .github/scripts/command/random.sh test_run_examples
          
      - name: Remove unused software
        run: |
          if [ "${{ github.event_name }}" == "schedule" ]; then
            echo "before remove unused software"
            sudo df -h
            sudo rm -rf /usr/share/dotnet
            sudo rm -rf /usr/local/lib/android
            sudo rm -rf /opt/ghc
            echo "after remove unused software"
            sudo df -h
          fi

      - name: Download example database
        timeout-minutes: 5
        uses: dawidd6/action-download-artifact@v9
        if: false
        with:
          name: command2-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples
          if_no_artifact_found: ignore
          workflow_conclusion: ""
          check_artifacts: true

      - name: Run All
        continue-on-error: true
        timeout-minutes: 60
        run: |
          sudo -E LOG_LEVEL=WARNING META1=redis META2=${{matrix.meta}} .github/scripts/command/random.sh test_run_all 2>&1 | tee fsrand.log
      
      - name: check fsrand.log
        if: always()
        run: | 
          [[ -f "fsrand.log" ]] && tail -n 1000 fsrand.log     
          grep -i "AssertionError" fsrand.log && exit 1 || true

      - name: chmod example directory
        if: always()
        timeout-minutes: 5
        run: |
          if [[ -e ".hypothesis/examples" ]]; then
            echo "chmod for .hypothesis/examples" && sudo chmod -R 755 .hypothesis/examples
          fi

      - name: Upload example database
        uses: actions/upload-artifact@v4
        if: false
        with:
          include-hidden-files: true
          name: command2-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples

      - name: Check client log
        if: always()
        run: |
          echo "juicefs log"
          sudo tail -n 1000 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [test]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/compile.yml
================================================
name: "compile"

on:
  push:
    branches:
    - main
    - release**
    paths:
    - '**/compile.yml'
  pull_request:
    branches:
    - main
    - release**
    paths:
    - '**/compile.yml'
  schedule:
    - cron:  '0 20 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch"  ]]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "postgres", "badger", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          elif [[ "${{github.event_name}}" == "pull_request" ||  "${{github.event_name}}" == "push" ]]; then
            echo 'meta_matrix=["redis", "mysql", "tikv"]' >> $GITHUB_OUTPUT
          else
            echo "event_name is not supported" && exit 1
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  compile:
    timeout-minutes: 120
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi
  
      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}
      
      - name: Remove unused software
        timeout-minutes: 10
        run: |
          echo "before remove unused software"
          sudo df -h
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
          echo "after remove unused software"
          sudo df -h

      - name: Prepare meta db
        run: | 
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo ./juicefs format $meta_url --trash-days 0 --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo ./juicefs mount -d $meta_url /jfs --no-usage-report
          stat /jfs/.accesslog
  
      - name: Build Redis
        timeout-minutes: 10
        working-directory: /jfs
        run: |
          wget -O /tmp/redis.tar.gz https://github.com/redis/redis/archive/refs/tags/6.0.16.tar.gz
          mkdir redis
          tar -xvf /tmp/redis.tar.gz -C redis --strip-components 1
          make -C redis
 
      - name: Install Depenency for Kernel
        run: |
          sudo apt-get install bison flex libelf-dev bc -y

      - name: Build Kernel
        timeout-minutes: 90
        working-directory: /jfs
        run: |
          wget -O /tmp/linux.tar.gz https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-6.5.tar.gz
          mkdir linux
          tar xzf /tmp//linux.tar.gz -C linux --strip-components 1
          make -C linux defconfig
          make -C linux -j`grep -c processor /proc/cpuinfo`
          
      - name: Build Spack
        if: false
        run: |
          git clone https://github.com/spack/spack.git
          source spack/share/spack/setup-env.sh
          spack --version
          spack bootstrap now
          spack compiler find 
          spack compilers
          spack config get config > ~/.spack/config.yaml 
          sed -i '/build_stage:/,+2d' ~/.spack/config.yaml
          echo -e "build_stage:\n  - /jfs/spack-stage" >> ~/.spack/config.yaml
          spack install spack

      - name: Log
        if: always()
        run: |
          echo "juicefs log"
          sudo tail -n 1000 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [compile]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success() 
        run: echo "All Done"

================================================
FILE: .github/workflows/coverage-report.yml
================================================
name: "coverage-report"

on:
  push:
    branches:
      - main
      - release**
    paths:
      - '**/coverage-report.yml'
  pull_request:
    branches:
      - main
      - release**
    paths:
      - '**/coverage-report.yml'

  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
      last_date:
        type: string
        description: "last date of coverage data"
        required: false
        default: ""
  schedule:
    - cron:  '0 23 * * *'
    
jobs:
  coverage-report:
    strategy:
      fail-fast: false
      matrix:
        branch: ['main']
        test: ['ut', 'it', 'all']
          
    timeout-minutes: 60
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        timeout-minutes: 1
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir for cleanup
        if: ${{ matrix.test == 'all' }}
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: /ci-coverage
          subdir: juicefs/
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: clean up old coverage data
        if: ${{ matrix.test == 'all' }}
        continue-on-error: true
        timeout-minutes: 10
        run: |
          sudo find /ci-coverage -type f \( -name 'covcounters*' -o -name 'covmeta*' \) -mtime +2 -print -exec rm -f {} +
          umount /jfs-coverage

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: schedule
          subdir: juicefs/schedule
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}
      
      - name: Determine lastdate dir
        timeout-minutes: 120
        run: |
          if [[ -n "${{github.event.inputs.last_date}}" ]]; then
            last_date=${{github.event.inputs.last_date}}
          else
            last_date=$(ls -t schedule | head -n 1)
            [[ -z "$last_date" ]] && echo "no data found in schedule" && exit 1
          fi
          [[ ! -d "schedule/$last_date" ]] && echo "schedule/$last_date not found" && exit 1
          echo "last_date=$last_date" >> $GITHUB_ENV

      - name: Generate today's coverage report
        timeout-minutes: 30
        working-directory: schedule/${{env.last_date}}
        run: |
          echo "current dir is $(pwd)"
          coverdirs=""
          for dir in $(find . -mindepth 1 -maxdepth 1 -type d -exec basename {} \;); do
              if [[ ${{matrix.test}} == "ut" ]]; then
                if [[ "$dir" == "unittests" ]]; then
                  coverdirs+="$dir,"
                fi
              elif [[ ${{matrix.test}} == "it" ]]; then
                if [[ "$dir" != "unittests" ]]; then
                  coverdirs+="$dir,"
                fi
              elif [[ ${{matrix.test}} == "all" ]]; then
                coverdirs+="$dir,"
              fi
          done
          coverdirs=${coverdirs%,}
          echo coverdirs is $coverdirs
          [[ -z "$coverdirs" ]] && echo "no coverage dir found" && exit 0
          name=cover_${{matrix.test}}
          sudo go tool covdata percent -i=$coverdirs | sudo tee ${name}.percent
          echo "generated coverage percent report:" $(realpath ${name}.percent)
          sudo go tool covdata textfmt -i=$coverdirs -o ${name}.txt 
          echo "generated coverage report in text format:" $(realpath ${name}.txt)
          sudo go tool cover -html=${name}.txt -o ${name}.html
          echo "generated coverage report in html format:" $(realpath ${name}.html)
          ls -l cover_*
          
      - name: upload coverage report
        working-directory: schedule/${{env.last_date}}
        timeout-minutes: 10
        run: |
          echo "current dir is $(pwd)"
          [[ ! -f "cover_${{matrix.test}}.html" ]] && echo "no coverage report found" && exit 0
          UPLOAD_PATH=${{github.workflow}}_${{github.run_id}}_${{matrix.test}}.html
          response=$(curl -w '%{http_code}' -s -o /dev/null --form 'file=@cover_${{matrix.test}}.html' https://juicefs.com/upload-file-u80sdvuke/${UPLOAD_PATH}?token=${{secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN}})
          if [ "$response" -eq 200 ]; then
            echo Coverage Report for ${{matrix.test}}: https://i.juicefs.io/ci-coverage/${UPLOAD_PATH}
          else
            echo "Upload failed with status code $response"
            exit 1
          fi
  
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 30
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/dependency-review.yml
================================================
# Dependency Review Action
#
# This Action will scan dependency manifest files that change as part of a Pull Request, surfacing known-vulnerable versions of the packages declared or updated in the PR. Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable packages will be blocked from merging.
#
# Source repository: https://github.com/actions/dependency-review-action
# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
name: 'Dependency Review'
on: [pull_request]

permissions:
  contents: read

jobs:
  dependency-review:
    runs-on: ubuntu-22.04
    steps:
      - name: 'Checkout Repository'
        uses: actions/checkout@v3
      - name: 'Dependency Review'
        uses: actions/dependency-review-action@v2
#        with:
#          fail-on-severity: high


================================================
FILE: .github/workflows/dockerfile-sftp
================================================
FROM debian:stable-slim
RUN apt-get clean
RUN apt-get update

RUN apt-get install openssh-server -y

RUN mkdir /run/sshd
RUN sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config
RUN sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/g' /etc/ssh/sshd_config
RUN echo "root:password"|chpasswd
RUN useradd -m testUser1
RUN echo "testUser1:password"|chpasswd
EXPOSE 22
CMD    ["/usr/sbin/sshd", "-D"]


================================================
FILE: .github/workflows/dump_load.yml
================================================
name: "dump_load"
on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/start_meta_engine.sh'
      - '**/dump_load.yml'
      - '**/dump_load.sh'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/start_meta_engine.sh'
      - '**/dump_load.yml'
      - '**/dump_load.sh'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [ "${{github.event_name}}" == "schedule"  ] || [ "${{github.event_name}}" == "workflow_dispatch"  ]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          else
            echo 'meta_matrix=["redis", "mysql", "tikv"]' >> $GITHUB_OUTPUT
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  dump_load:
    timeout-minutes: 90
    needs: [build-matrix]
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}

    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Install nodejs
        run: |
          curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash
          export NVM_DIR="$HOME/.nvm"
          [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm
          [ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion" # This loads nvm bash_completion
          nvm install 22
          nvm use 22
          node -v 
          nvm current

      - name: Download Random Test
        run: |
          wget https://juicefs-com-static.oss-cn-shanghai.aliyuncs.com/random-test/random-test
          chmod +x random-test

      - name: Test Load & Dump with Binary
        timeout-minutes: 30
        continue-on-error: true
        run: |
          sudo BINARY=true GOCOVERDIR=$(pwd)/cover META=${{matrix.meta}} .github/scripts/command/dump_load.sh

      - name: Test Load & Dump with Json Fast Mode
        timeout-minutes: 30
        run: |
          sudo FAST=true GOCOVERDIR=$(pwd)/cover META=${{matrix.meta}} .github/scripts/command/dump_load.sh  

      - name: Test Load & Dump with Json
        timeout-minutes: 30
        run: |
          sudo GOCOVERDIR=$(pwd)/cover META=${{matrix.meta}} .github/scripts/command/dump_load.sh     

      - name: log
        if: always()
        run: | 
          tail -500 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [dump_load]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: upload total coverage report
        timeout-minutes: 30
        continue-on-error: true
        uses: ./.github/actions/upload-total-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch' 
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"
        

================================================
FILE: .github/workflows/dump_load_bench.yml
================================================
name: "dump_load_bench"
on:
  push:
    branches:
      - "main"
      - "release-**"
    paths:
      - "**/dump_load_bench.yml"
      - "**/dump_load_bench.sh"
  pull_request:
    branches:
      - "main"
      - "release-**"
    paths:
      - "**/dump_load_bench.yml"
      - "**/dump_load_bench.sh"
  schedule:
    - cron: "0 19 * * *"
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [ "${{github.event_name}}" == "schedule"  ] || [ "${{github.event_name}}" == "workflow_dispatch"  ]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          else
            echo 'meta_matrix=["redis", "sqlite3", "tikv"]' >> $GITHUB_OUTPUT
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  dump_load_bench:
    timeout-minutes: 90
    needs: [build-matrix]
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}

    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Clean up
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/local/.ghcup
          sudo docker system prune -af
          sudo df -h

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with:
          target: ${{steps.vars.outputs.target}}

      - name: Install nodejs
        run: |
          curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash
          export NVM_DIR="$HOME/.nvm"
          [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm
          [ -s "$NVM_DIR/bash_completion" ] && \. "$NVM_DIR/bash_completion" # This loads nvm bash_completion
          nvm install 22
          nvm use 22
          node -v 
          nvm current

      - name: Benchmark dump load in binary format
        timeout-minutes: 60
        env:
          AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}}
          AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}}
          AWS_ACCESS_TOKEN: ${{secrets.CI_COVERAGE_AWS_TOKEN}}
          META: ${{matrix.meta}}
          START_META: true
        run: |
          sudo -HE GOCOVERDIR=$(pwd)/cover .github/scripts/command/dump_load_bench.sh test_dump_load_in_binary

      - name: Benchmark dump load fast
        timeout-minutes: 60
        env:
          AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}}
          AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}}
          AWS_ACCESS_TOKEN: ${{secrets.CI_COVERAGE_AWS_TOKEN}}
          META: ${{matrix.meta}}
          START_META: false
        run: |
          sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/dump_load_bench.sh test_dump_load_fast

      - name: Benchmark dump load
        timeout-minutes: 60
        env:
          AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}}
          AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}}
          AWS_ACCESS_TOKEN: ${{secrets.CI_COVERAGE_AWS_TOKEN}}
          META: ${{matrix.meta}}
          START_META: false
        run: |
          sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/dump_load_bench.sh test_dump_load

      - name: log
        if: always()
        run: |
          tail -500 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure()
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [dump_load_bench]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: upload total coverage report
        timeout-minutes: 30
        continue-on-error: true
        uses: ./.github/actions/upload-total-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/dump_load_cross_meta.yml
================================================
name: "dump_load_cross_meta"
on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/start_meta_engine.sh'
      - '**/dump_load_cross_meta.yml'
      - '**/dump_load_cross_meta.sh'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/start_meta_engine.sh'
      - '**/dump_load_cross_meta.yml'
      - '**/dump_load_cross_meta.sh'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  dump_load_cross_meta:
    timeout-minutes: 90
    strategy:
      fail-fast: false
      matrix:
        meta1: [redis]
        meta2: [sqlite3]
        include:
          - meta1: mysql
            meta2: redis
          - meta1: mysql
            meta2: tikv
          # - meta1: tikv
          #   meta2: mysql
          # - meta1: redis
          #   meta2: tikv
          # - meta1: tikv
          #   meta2: redis

    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [[ "${{matrix.meta1}}" == "fdb" || "${{matrix.meta2}}" == "fdb" ]]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Test Load & Dump with Json Fast Mode
        timeout-minutes: 30
        run: |
          sudo GOCOVERDIR=$(pwd)/cover META1=${{matrix.meta1}} META2=${{matrix.meta2}} FAST=true .github/scripts/command/dump_load_cross_meta.sh  
    
      - name: Test Load & Dump with Binary
        timeout-minutes: 30
        run: |
          sudo GOCOVERDIR=$(pwd)/cover META1=${{matrix.meta1}} META2=${{matrix.meta2}} BINARY=true .github/scripts/command/dump_load_cross_meta.sh
      
      - name: Test Load & Dump with Json
        timeout-minutes: 30
        run: |
          sudo GOCOVERDIR=$(pwd)/cover META1=${{matrix.meta1}} META2=${{matrix.meta2}} .github/scripts/command/dump_load_cross_meta.sh     

      - name: log
        if: always()
        run: | 
          tail -500 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true

      - name: Upload dumpfiies
        uses: actions/upload-artifact@v4
        timeout-minutes: 5
        if: failure()
        with:
          name: dump-files-${{ github.run_id }}-${{matrix.meta1}}-${{matrix.meta2}}
          path: |
            ${{github.workspace}}/*.json
            ${{github.workspace}}/*.db
          if-no-files-found: warn

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [dump_load_cross_meta]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: upload total coverage report
        timeout-minutes: 30
        continue-on-error: true
        uses: ./.github/actions/upload-total-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch' 
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"
              
        
================================================
FILE: .github/workflows/fsrand.yml
================================================
name: "fsrand"

on:
  push:
    branches:
    - main
    - release**
    paths:
    - '**/fsrand.yml'
    - '**/fs.py'
    - '**/fs_test.py'
    - '**/fs_acl_test.py'
  pull_request:
    branches:
    - main
    - release**
    paths:
    - '**/fsrand.yml'
    - '**/fs.py'
    - '**/fs_test.py'
    - '**/fs_acl_test.py'
  schedule:
    - cron:  '0 17 * * 0'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
jobs:
  fsrand:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        meta: ['redis', 'mysql', 'tikv']
        # meta: ['redis']
        
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi
  
      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Install tool
        run: | 
          sudo .github/scripts/apt_install.sh attr
          sudo pip install xattr
          sudo pip install minio

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url --enable-acl --trash-days 0 --bucket=/mnt/jfs myjfs

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report --enable-xattr 
          if [ ! -f /tmp/jfs/.accesslog ]; then
            echo "<FATAL>: mount failed"
            exit 1
          fi
          
      - name: Run Examples
        timeout-minutes: 10
        run: |
          sudo -E python3 .github/scripts/hypo/fs_test.py
          sudo -E python3 .github/scripts/hypo/fs_acl_test.py

      - name: Download example database
        timeout-minutes: 5
        uses: dawidd6/action-download-artifact@v9
        if: false
        with:
          name: fsrand-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples
          if_no_artifact_found: ignore
          workflow_conclusion: ""
          check_artifacts: true

      - name: Test
        continue-on-error: true
        timeout-minutes: 120
        run: |
          sudo -E LOG_LEVEL=WARNING python3 .github/scripts/hypo/fs.py 2>&1 | tee fsrand.log
      
      - name: check fsrand.log
        if: always()
        run: | 
          [[ -f "fsrand.log" ]] && tail -n 1000 fsrand.log     
          grep -i "AssertionError" fsrand.log && exit 1 || true

      - name: chmod example directory
        if: always()
        timeout-minutes: 5
        run: |
          if [[ -e ".hypothesis/examples" ]]; then
            echo "chmod for .hypothesis/examples" && sudo chmod -R 755 .hypothesis/examples
          fi

      - name: Upload example database
        uses: actions/upload-artifact@v4
        if: false
        with:
          include-hidden-files: true
          name: fsrand-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples

      - name: check juicefs.log
        if: always()
        run: | 
          if [ -f ~/.juicefs/juicefs.log ]; then
            tail -300 ~/.juicefs/juicefs.log
            grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true
          fi
          if [ -f /var/log/juicefs.log ]; then
            tail -300 /var/log/juicefs.log
            grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          fi

      - name: upload coverage report
        timeout-minutes: 5
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure() 
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [fsrand]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"

================================================
FILE: .github/workflows/fsspec.yml
================================================
name: "fsspec"

on:
  push:
    branches:
      - main
      - release**
    paths:
      - '**/fsspec.yml'
  pull_request:
    branches:
      - main
      - release**
    paths:
      - '**/fsspec.yml'

  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron:  '0 16 * * *'
    
jobs:
  fsspec:
    strategy:
      fail-fast: false
      matrix:
        meta: ['redis', 'mysql', 'tikv']
    timeout-minutes: 60
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        timeout-minutes: 1
        uses: actions/checkout@v3

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi
  
      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}
      
      - name: Create venv
        run: |
          python3 -m venv venv
          source venv/bin/activate

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url
    
      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url --enable-acl --trash-days 0 --bucket=/mnt/jfs myjfs

      - name: Build and install SDK
        timeout-minutes: 5
        run: |
          make -C sdk/python/ libjfs.so
          sudo python3 sdk/python/juicefs/setup.py install

      - name: Build and install juicefs spec
        timeout-minutes: 10
        working-directory: sdk/python/juicefs
        run: |
          pip install build
          python3 -m build -w
          ls dist/
          pip install dist/juicefs-*.whl

      - name: Run Test
        timeout-minutes: 10
        working-directory: sdk/python/juicefs
        run: |
          sudo pip install pytest
          sudo pip install fsspec
          source ../../../.github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo JUICEFS_META=${meta_url} python3 -m pytest tests/test.py
  
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 30
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [fsspec]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"

================================================
FILE: .github/workflows/gateway-random.yml
================================================
name: "gateway-random"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/gateway-random.yml'
      - '**/gateway-random.sh'
      - '.github/scripts/hypo/s3**.py'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/gateway-random.yml'
      - '**/gateway-random.sh'
      - '.github/scripts/hypo/s3**.py'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch" ]]; then
            echo 'meta_matrix=["mysql", "redis", "tikv"]' >> $GITHUB_OUTPUT
          else
            echo 'meta_matrix=["redis"]' >> $GITHUB_OUTPUT
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  gateway-random:
    timeout-minutes: 90
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Test with example
        run: |
          if [[ ${{matrix.meta}} == "tikv" ]]; then
            subdir=true
          else
            subdir=false
          fi
          sudo -E SUBDIR=$subdir .github/scripts/command/gateway-random.sh test_run_example
          
      - name: Download example database
        timeout-minutes: 5
        uses: dawidd6/action-download-artifact@v9
        if: false
        with:
          name: gateway-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples
          if_no_artifact_found: ignore
          workflow_conclusion: ""
          check_artifacts: true

      - name: Test randomly
        continue-on-error: true
        timeout-minutes: 60
        run: |
          if [[ ${{matrix.meta}} == "tikv" ]]; then
            subdir=true
          else
            subdir=false
          fi
          sudo -E LOG_LEVEL=WARNING SUBDIR=$subdir .github/scripts/command/gateway-random.sh test_run_all 2>&1 | tee fsrand.log
      
      - name: check fsrand.log
        if: always()
        run: | 
          [[ -f "fsrand.log" ]] && tail -n 1000 fsrand.log     
          grep -i "AssertionError" fsrand.log && exit 1 || true

      - name: chmod example directory
        if: always()
        timeout-minutes: 5
        run: |
          if [[ -e ".hypothesis/examples" ]]; then
            echo "chmod for .hypothesis/examples" && sudo chmod -R 755 .hypothesis/examples
          fi

      - name: Upload example database
        uses: actions/upload-artifact@v4
        if: false
        with:
          include-hidden-files: true
          name: gateway-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples

      - name: check log
        if: always()
        run: | 
          if [ -f /var/log/juicefs-gateway.log ]; then
            tail -300 /var/log/juicefs-gateway.log
            grep "<FATAL>:" /var/log/juicefs-gateway.log && exit 1 || true
          fi
          
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [gateway-random]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success() 
        run: echo "All Done"

================================================
FILE: .github/workflows/gateway.yml
================================================
name: "gateway-test"

on:
  push:
    branches: 
      - release-**
    paths-ignore:
      - 'docs/**'
      - '**.md'
  pull_request:
    #The branches below must be a subset of the branches above
    branches: 
      - release-**
    paths-ignore:
      - 'docs/**'
      - '**.md'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron:  '0 19 * * *'

jobs:
  gateway:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        meta: [  'sqlite3', 'redis','tikv', 'badger',  'etcd', 'fdb']
        file_size: ['100M']
        isolation_level: ['']
        include:
          - meta: 'mariadb'
            file_size: '100M'
            isolation_level: "read committed"

          - meta: 'mysql'
            file_size: '100M'
            isolation_level: "read committed"
          - meta: 'mysql'
            file_size: '100M'
            isolation_level: "repeatable read"
          - meta: 'mysql'
            file_size: '100M'
            isolation_level: "serializable"

          - meta: 'postgres'
            file_size: '100M'
            isolation_level: "read committed"
          - meta: 'postgres'
            file_size: '100M'
            isolation_level: "repeatable read"
          - meta: 'postgres'
            file_size: '100M'
            isolation_level: "serializable"

          - meta: 'tidb'
            file_size: '100M'
            isolation_level: "read committed"
          - meta: 'tidb'
            file_size: '100M'
            isolation_level: "repeatable read"

    runs-on: ubuntu-22.04

    steps: 
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1
      
      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

            
      - name: Start meta
        run: | 
          sudo chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}

      - name: Install tools
        run: | 
          wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc
          chmod +x mc 
        shell: bash
        
      - name: start gateway
        shell: bash
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url ${{matrix.isolation_level}}
          mp="/tmp/myjfs"
          volume="myjfs"
          export MINIO_ROOT_USER=minioadmin
          export MINIO_ROOT_PASSWORD=minioadmin
          sudo chmod 777 /mnt
          ./juicefs format $meta_url $volume --trash-days 0 --bucket=/mnt/jfs
          ./juicefs gateway $meta_url localhost:8080 --no-usage-report --access-log /tmp/access1.log &
        
      - name: Sync with multiple process
        shell: bash
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          file_size=${{matrix.file_size}}
          if [ $file_size="100M" ]; then 
            file_count=5
          else
            file_count=2000
          fi
          threads=20
          mp=/tmp/myjfs
          volume=myjfs
          
          dd if=/dev/urandom of=file iflag=fullblock,count_bytes bs=4k count="$file_size" > /dev/null
          mkdir data
          for i in $(seq 1 $file_count); do
            cp file data/file$i
          done
          start=`date +%s`
          declare -a pids   
          ./juicefs sync --dirs data/  s3://minioadmin:minioadmin@localhost:8080/$volume/data/ --no-https -p $threads &
          pids+=($!)
          ./juicefs sync --dirs data/  s3://minioadmin:minioadmin@localhost:8080/$volume/data/ --no-https -p $threads &
          pids+=($!)
          ./juicefs sync --dirs data/  s3://minioadmin:minioadmin@localhost:8080/$volume/data/ --no-https -p $threads &
          pids+=($!)
          wait "${pids[@]}"
          rm -rf $HOME/.juicefs/cache/ || true
          # ./mc alias set minio http://localhost:9000 minioadmin minioadmin --api S3v4
          # ./mc mb minio/$volume
          # ./mc cp --recursive data/  minio/$volume/data
          end=`date +%s`
          time=$((end-start))
          echo time cost is: $time second
          killall juicefs 
          sleep 3
          ./juicefs mount -d $meta_url $mp --no-usage-report
          diff -ur data/ $mp/data/ 
          echo "diff succeed"
          ./juicefs umount  $mp --force
        
      - name: Sync with empty dir
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          threads=20
          mp=/tmp/myjfs
          volume=myjfs
          export MINIO_ROOT_USER=minioadmin
          export MINIO_ROOT_PASSWORD=minioadmin
          ./juicefs mdtest $meta_url test --dirs 10 --depth 2 --files 10 --threads 10 --no-usage-report
          ./juicefs gateway $meta_url localhost:8080 --access-log /tmp/access1.log &
          sleep 10
          mkdir emptydir
          declare -a pids  
          ./juicefs sync emptydir/ s3://minioadmin:minioadmin@localhost:8080/$volume/test/ --delete-dst --no-https -p $threads &
          pids+=($!)
          ./juicefs sync emptydir/ s3://minioadmin:minioadmin@localhost:8080/$volume/test/ --delete-dst --no-https -p $threads &
          pids+=($!)
          ./juicefs sync emptydir/ s3://minioadmin:minioadmin@localhost:8080/$volume/test/ --delete-dst --no-https -p $threads &
          pids+=($!)
          wait "${pids[@]}"
          killall juicefs
          sleep 3
          ./juicefs mount -d $meta_url $mp --no-usage-report
          [ -d "$mp/test/" ] && exit 1 
          ./juicefs umount  $mp --force
        shell: bash

      - name: log
        if: always()
        shell: bash
        run: | 
          if [ -f ~/.juicefs/juicefs.log ]; then
            tail -300 ~/.juicefs/juicefs.log
            grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true
          fi

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 1
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [gateway]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/integrationtests.yml
================================================
name: "integrationtests"

on:
  push:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - '**.c'
      - '**.go'
      - 'Makefile'
      - '**/integrationtests.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - '**.c'
      - '**.go'
      - 'Makefile'
      - '**/integrationtests.yml'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          echo "GITHUB_REF_NAME is ${GITHUB_REF_NAME}"
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch"  ]]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "badger", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          elif [[ "${{github.event_name}}" == "pull_request" ||  "${{github.event_name}}" == "push" ]]; then
            echo 'meta_matrix=["redis", "mysql", "tikv"]' >> $GITHUB_OUTPUT
          else
            echo "event_name is not supported" && exit 1
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  integrationtests:
    timeout-minutes: 120
    runs-on: ubuntu-22.04
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1
        
      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url --trash-days 0 --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /jfs --no-usage-report --enable-xattr
          stat /jfs/.accesslog

      - name: Fslock Test
        timeout-minutes: 5
        run: |
          cd /jfs/
          git clone https://github.com/danjacques/gofslock.git
          cd gofslock && git checkout cc7f001fe0e7df1710adc8f0cd9e9d6d21fdb3a9
          go test -v ./fslock/...
          stat /jfs/

      - name: flock test
        timeout-minutes: 5
        run: |
          git clone https://github.com/gofrs/flock.git
          mkdir /jfs/tmp
          cd flock && go mod tidy && TMPDIR=/jfs/tmp go test .

      - name: make secfs.test
        run: |
          sudo .github/scripts/apt_install.sh  libacl1-dev
          git clone https://github.com/billziss-gh/secfs.test.git
          make -C secfs.test tools tools/bin/fsx
          make -C secfs.test tools tools/bin/fsracer
  
      - name: Fsx Test
        timeout-minutes: 16
        run: |
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch"  ]] ; then
            duration=900
          else
            duration=300
          fi
          sudo touch /jfs/fsx.out
          sudo rm -f /tmp/fsx.out
          sudo ln -s /jfs/fsx.out /tmp/fsx.out
          sudo secfs.test/tools/bin/fsx -d $duration -p 10000 -F 10000000 /tmp/fsx.out

      - name: Fsracer Test
        if: false
        timeout-minutes: 16
        shell: 'script -q -e -c "bash {0}"'
        run: |
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch"  ]] ; then
            duration=600
          else
            duration=300
          fi
          sudo secfs.test/tools/bin/fsracer $duration /jfs
  
      - name: log
        if: always()
        run: |
          tail -300 /var/log/juicefs.log
          grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [integrationtests]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success() 
        run: echo "All Done"

================================================
FILE: .github/workflows/ltpfs.yml
================================================
name: "ltpfs"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/ltpfs.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/ltpfs.yml'
  schedule:
    - cron:  '30 20 * * *'
  workflow_dispatch:

jobs:
  ltpfs:
    timeout-minutes: 60
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build

      - name: Copy
        run: |
          cp .github/workflows/bash/rm_fs /tmp/
          cp .github/workflows/bash/rm_list.sh /tmp/

      - name: Run Redis
        run: |
          sudo docker run -d --name redis -v redis-data:/data  \
          -p 6379:6379  redis redis-server --appendonly yes

      - name: Juicefs Format
        run: |
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs format --trash-days 0 redis://127.0.0.1:6379/1 --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d redis://127.0.0.1:6379/1 /tmp/jfs --no-usage-report

      - name: LTP FS
        timeout-minutes: 50
        run: |
          sudo .github/scripts/apt_install.sh libaio-dev libacl1-dev attr
          wget -O ltp.tar.gz https://github.com/linux-test-project/ltp/archive/refs/tags/20210927.tar.gz
          mkdir ltp
          tar -xvf ltp.tar.gz -C ltp --strip-components 1
          cd ltp
          ls -lh
          make autotools
          ./configure
          make
          sudo make install
          cd /opt/ltp
          sudo chmod +x /tmp/rm_list.sh
          sudo chmod 777 runtest/fs
          sudo /tmp/rm_list.sh /tmp/rm_fs /opt/ltp/runtest/fs
          sudo ./runltp -d /tmp/jfs -f fs,fs_perms_simple,fsx,io,fcntl-locktests -C result.log.failed -T result.log.tconf -l result.log

      - name: tconf Log
        if: always()
        run: |
          cat /opt/ltp/output/result.log.tconf

      - name: check ltpsyscall failed log
        if: always()
        run: |
          cat /opt/ltp/output/result.log.failed
          
      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"  


================================================
FILE: .github/workflows/ltpsyscalls.yml
================================================
name: "ltp-syscalls"

on:
  push:
    branches:
      - 'release-**'
    paths-ignore:
      - 'docs/**'
  pull_request:
    #The branches below must be a subset of the branches above
    branches:
      - 'release-**'
    paths-ignore:
      - 'docs/**'
  schedule:
    - cron:  '30 20 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  ltpsyscalls:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        # meta: [ 'sqlite3', 'redis', 'mysql', 'tikv', 'tidb', 'postgres', 'mariadb', 'badger', 'fdb']
        meta: ['redis']
        type: [ 'head', 'middle', 'tail']
    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Copy
        run: |
          cp .github/workflows/bash/rm_syscalls /tmp/
          cp .github/workflows/bash/rm_list.sh /tmp/

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs format --trash-days 0 $meta_url --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount --enable-xattr -d $meta_url /tmp/jfs --no-usage-report

      - name: Install LTP Syscalls
        run: |
          sudo .github/scripts/apt_install.sh libaio-dev libacl1-dev attr
          wget -O ltp.tar.gz https://github.com/linux-test-project/ltp/archive/refs/tags/20210927.tar.gz
          mkdir ltp
          tar -xvf ltp.tar.gz -C ltp --strip-components 1
          cd ltp
          make autotools
          ./configure
          make
          sudo make install

      - name: Run LTP Syscalls
        timeout-minutes: 60
        run: |
          cd /opt/ltp
          sudo chmod +x /tmp/rm_list.sh
          sudo chmod 777 runtest/syscalls
          sudo /tmp/rm_list.sh /tmp/rm_syscalls /opt/ltp/runtest/syscalls
          split -a 1 -d -l $(( $(wc -l < /opt/ltp/runtest/syscalls) / 3 + 1 )) /opt/ltp/runtest/syscalls /tmp/syscalls_
          sudo chmod 777 /tmp/syscalls_*
          if [ "${{matrix.type}}" == "head" ]; then
            cat /tmp/syscalls_0
            sudo ./runltp -d /tmp/jfs -C result.log.failed -T result.log.tconf -l result0.log -f /tmp/syscalls_0
          elif [ "${{matrix.type}}" == "middle" ]; then
            cat /tmp/syscalls_1
            sudo ./runltp -d /tmp/jfs -C result.log.failed -T result.log.tconf -l result1.log -f /tmp/syscalls_1
          elif [ "${{matrix.type}}" == "tail" ]; then
            cat /tmp/syscalls_2
            sudo ./runltp -d /tmp/jfs -C result.log.failed -T result.log.tconf -l result2.log -f /tmp/syscalls_2
          else
            echo "matrix.type: ${{matrix.type}} is not valid" && exit 1
          fi

      - name: tconf Log
        if: always()
        run: |
          cat /opt/ltp/output/result.log.tconf

      - name: check ltpsyscall failed log
        if: always()
        run: |
          cat /opt/ltp/output/result.log.failed

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"  

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/mutate-test-sdk.yml
================================================
name: mutate-test-sdk
on:
  pull_request:
    branches:
      - 'main'
    paths:
      - '**/JuiceFileSystemTest.java'

  workflow_dispatch:
    inputs:
      targetTests:
        type: string
        description: "Target tests, eg: io.juicefs.JuiceFileSystemTest"
        required: true
        default: ""  
      targetClasses:
        type: string
        description: "Target classes, eg: io.juicefs.JuiceFileSystemImpl*"
        required: true
        default: ""  
      timeoutConstant:
        type: int
        description: "Timeout constant"
        required: true
        default: 1000  
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false  

jobs:
  mutate-test-sdk:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - name: Build
        uses: ./.github/actions/build

      - name: Set up Java
        uses: actions/setup-java@v3
        with:
          distribution: 'temurin'
          java-version: '8'

      - name: Run Redis
        run: |
          sudo docker run -d --name redis -v redis-data:/data  \
          -p 6379:6379  redis redis-server --appendonly yes

      - name: Juicefs Format
        run: |
          sudo ./juicefs format  localhost --bucket=/mnt/jfs dev

      - name: Juicefs Mount
        run: |
          sudo ./juicefs mount -d localhost /jfs
          touch /jfs/inner_sym_target
          echo "hello juicefs" > /jfs/inner_sym_target
          cd /jfs
          ln -s inner_sym_target inner_sym_link 
          mkdir etc
          chmod 777 etc
          echo `hostname` > etc/nodes

      - name: Make SDK
        run: |
          cd sdk/java
          make
          cd -

      - name: Change pom
        run: |
          if [ "${{github.event_name}}" == "pull_request"  ]; then
            targetTests="io.juicefs.JuiceFileSystemTest"
            targetClasses="io.juicefs.JuiceFileSystemImpl*"
            timeConstant=1000
          elif [ "${{github.event_name}}" == "workflow_dispatch"  ]; then
            targetTests="${{github.event.inputs.targetTests}}"
            echo "targetTests is $targetTests"
            targetClasses="${{github.event.inputs.targetClasses}}"
            echo "targetClasses is $targetClasses"
            timeConstant="${{github.event.inputs.timeConstant}}"
          fi
          POM_XML_PATH="sdk/java/pom.xml" TARGET_TESTS=$targetTests TARGET_CLASSES=$targetClasses TIME_CONSTANT=$timeConstant python3 .github/scripts/mutate/modify_sdk_pom.py
          cat sdk/java/pom.xml

      - name: Test SDK
        run: |
          cd sdk/java
          sudo mvn --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage
          cd -

      - name: Upload Pit Report
        uses: actions/upload-artifact@v4
        with:
          name: pit-reports
          path: sdk/java/target/pit-reports

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

================================================
FILE: .github/workflows/mutate-test.yml
================================================
name: mutate-test
on:
  pull_request:
    branches:
      - 'main'
    paths:
      - '**/*_test.go'

  workflow_dispatch:
    inputs:
      test_file:
        type: string
        description: "the go test file relative path you want to mutate, eg cmd/meta/xattr_test.go"
        required: true
        default: ""  
      job_total:
        type: string
        description: "number of job to run mutation test"
        required: true
        default: "1"
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false  

jobs:

  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 0

      - name: Set up Go
        uses: actions/setup-go@v3
        with:
          go-version: 'oldstable'

      - name: install go-mutesting
        run: |
          go install github.com/zimmski/go-mutesting/cmd/go-mutesting@latest

      - id: set-matrix
        run: |
          sudo .github/scripts/apt_install.sh jq
          if [ "${{github.event_name}}" == "pull_request"  ]; then
            echo github.event.pull_request.base.sha is ${{github.event.pull_request.base.sha}}
            echo github.event.pull_request.head.sha is ${{github.event.pull_request.head.sha}}
            echo github.sha is ${{ github.sha }}
            changed_file_str=$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | grep _test.go$ | xargs)
            echo "added or changed test files: $changed_file_str"
            changed_file_array=($changed_file_str)
            declare -a Jobs=();
            for test_file_name in "${changed_file_array[@]}"
            do
              echo "test_file_name is $test_file_name"
              if grep -q "//mutate:disable" $test_file_name; then
                echo "found //mutate:disable in $test_file_name" 
                continue
              fi
              source_file_name=${test_file_name%"_test.go"}.go
              echo "source_file_name is :" $source_file_name
              black_list_file=black.list
              TEST_FILE_NAME="$test_file_name" BLACK_LIST_FILE=$black_list_file python3 .github/scripts/mutate/parse_black_list.py 
              echo "black list checksum: "
              cat $black_list_file
              total_count=$(go-mutesting $source_file_name --debug --no-exec --blacklist $black_list_file| grep "Save mutation into" | wc -l)
              echo "total_count is $total_count"
              job_total=$(TEST_FILE_NAME=$test_file_name python3 .github/scripts/mutate/parse_job_total.py)
              echo "job_total specified: $job_total"
              if [ $job_total -eq 0 ]; then
                if [ $total_count -gt 200 ]; then
                  job_total=4
                else
                  job_total=1
                fi
              fi
              echo "job_total: $job_total"
              for i in `seq 1 $job_total` 
              do
                Jobs=("${Jobs[@]}" "$test_file_name-$i-$job_total")
              done
            done
            value=`printf '%s\n' "${Jobs[@]}" | jq -R . | jq -cs .`
            echo "value: $value"
            echo "matrix=$value" >> $GITHUB_OUTPUT
          elif [ "${{github.event_name}}" == "workflow_dispatch"  ]; then
            test_file_name=${{github.event.inputs.test_file}}
            echo "test file is $test_file_name"
            job_total=${{github.event.inputs.job_total}}
            echo "job_total is $job_total"
            declare -a Jobs=();
            for i in `seq 1 $job_total` 
              do
                Jobs=("${Jobs[@]}" "$test_file_name-$i-$job_total")
              done
            value=`printf '%s\n' "${Jobs[@]}" | jq -R . | jq -cs .`
            echo "value: $value"
            echo "matrix=$value" >> $GITHUB_OUTPUT
          fi

    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}

  mutate-test:
    timeout-minutes: 120
    if: "!github.event.pull_request.draft"
    name: ${{matrix.test_file}}
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        test_file: ${{ fromJson(needs.build-matrix.outputs.matrix) }}
    runs-on: ubuntu-22.04
    permissions:
      pull-requests: write
    steps:
      - uses: actions/checkout@v3

      - name: Get Current Job Log URL
        uses: Tiryoh/gha-jobid-action@v0
        id: jobs
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          job_name: ${{matrix.test_file}}

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build

      - name: Remove problem matcher for go
        run: |
          # https://github.com/actions/setup-go/blob/main/matchers.json
          echo "::remove-matcher owner=go::"

      - name: Install Packages
        run: |
          go install github.com/zimmski/go-mutesting/cmd/go-mutesting@latest
          sudo .github/scripts/apt_install.sh g++-multilib redis-server libacl1-dev attr python3-tk
          sudo pip install mysqlclient
          apt -y install glusterfs-server

      - name: Prepare Database
        timeout-minutes: 10
        run: |
          docker run -d -p 9000:9000 -p 9001:9001 -e "MINIO_ROOT_USER=testUser" -e "MINIO_ROOT_PASSWORD=testUserPassword" quay.io/minio/minio:RELEASE.2022-01-25T19-56-04Z server /data --console-address ":9001"
          go install github.com/minio/mc@RELEASE.2022-01-07T06-01-38Z && mc alias set local http://127.0.0.1:9000 testUser testUserPassword && mc mb local/testbucket
          make
          # sudo make -C fstests setup

      - name: run mutate test
        # timeout-minutes: 120
        run: |
          sudo chmod 777 /var/jfsCache
          test_file=$(echo ${{matrix.test_file}} | awk -F'-' '{print $1}')
          job_index=$(echo ${{matrix.test_file}} | awk -F'-' '{print $2}')
          job_total=$(echo ${{matrix.test_file}} | awk -F'-' '{print $3}')
          echo "test file is: $test_file, job_index is $job_index, job_total is $job_total"
          if [ -z "$test_file" ]; then 
            echo "test file is empty, will not run mutate test"
            exit 0
          fi
          source_file=${test_file%"_test.go"}.go
          echo "source file is :" $source_file
          package_path=$(dirname $test_file)
          echo "package path is :" $package_path

          test_cases=$(TEST_FILE_NAME=$test_file python3 .github/scripts/mutate/parse_test_cases.py || true)
          if [ "$?" -ne 0 ]; then
            echo "no test cases in test file, will not run mutate test"
            exit 0
          fi
          echo "test cases: $test_cases"

          if [[ "$test_file" =~ ^pkg/.* ]]; then
            go test ./$package_path/...  -v -run "$test_cases" -count=1 -cover -timeout=5m -coverpkg=./$package_path/... -coverprofile=mutest-cov.out
          elif [[ "$test_file" =~ ^cmd/.* ]]; then
            sudo JFS_GC_SKIPPEDTIME=1 MINIO_ACCESS_KEY=testUser MINIO_SECRET_KEY=testUserPassword go test ./cmd/... -v -run "$test_cases" -count=1 -cover -timeout=5m -coverpkg=./pkg/...,./cmd/... -coverprofile=mutest-cov.out 
          else
            echo "test file location error: $test_file"
            exit 0
          fi
          
          black_list_file=black.list
          TEST_FILE_NAME="$test_file" BLACK_LIST_FILE=$black_list_file python3 .github/scripts/mutate/parse_black_list.py 
          echo "black list checksum: "
          cat $black_list_file

          go-mutesting $source_file --debug --no-exec --do-not-remove-tmp-folder  --blacklist $black_list_file | tee -a mutate.log
          mutation_dir=$(cat mutate.log | grep "Save mutations into" | awk -F' ' '{print $4}' | sed -e 's:"::g')
          echo "mutation dir is $mutation_dir"
          JOB_INDEX=$job_index JOB_TOTAL=$job_total MUTATE_ORIGINAL=$source_file MUTATION_DIR=$mutation_dir COVERAGE_FILE=mutest-cov.out TEST_FILE_NAME="$test_file" PACKAGE_PATH="$package_path" STAT_RESULT_FILE=stat_result.log python3 .github/scripts/mutate/mutesting.py
          # COVERAGE_FILE=mutest-cov.out TEST_FILE_NAME="$test_file" PACKAGE_PATH="$package_path" go-mutesting $source_file --debug --exec=.github/scripts/mutate/mutest.sh  --do-not-remove-tmp-folder --blacklist $black_list_file 
          if [ $? != 0 ]; then echo "run mutesting.py failed" && exit 1; fi
          
          [[ -z "${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} " ]] && echo "<WARNING>: MYSQL_PASSWORD is empty" && exit 0
          export MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} 
          JOB_NAME=${{matrix.test_file}} JOB_URL=${{steps.jobs.outputs.html_url}}  STAT_RESULT_FILE=stat_result.log python3 .github/scripts/mutate/save_report.py

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-22.04
    needs: [build-matrix, mutate-test]
    if: always() && !github.event.pull_request.draft
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Install tools
        run: |
          sudo pip install mysqlclient

      - name: Generate mutate report
        run: |
          [[ -z "${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} " ]] && echo "<WARNING>: MYSQL_PASSWORD is empty" && exit 0
          export MYSQL_PASSWORD=${{secrets.MYSQL_PASSWORD_FOR_JUICEDATA}} 
          mutate_report=$(python3 .github/scripts/mutate/query_report.py)
          echo "mutate_report is $mutate_report"
          # echo "mutate_report=$mutate_report" >> $GITHUB_ENV
          MY_STRING=$(cat << EOF
          $mutate_report
          EOF
          )
          echo "MY_STRING<<EOF" >> $GITHUB_ENV
          echo "$MY_STRING" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV
          
      - uses: mshick/add-pr-comment@v2
        with:
          allow-repeats: true
          message: |
            *Mutate Test Report*
            ${{env.MY_STRING}} 
            
            Usage: https://github.com/juicedata/juicefs/blob/main/.github/scripts/mutate/how_to_use_mutate_test.md  

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/perf-test.yml
================================================
name: "JuiceFS mdtest Performance Comparison"

on:
  push:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - '**/perf-test.yml'
      - '**/perf/**'
  pull_request:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - '**/perf-test.yml'
      - '**/perf/**'
  schedule:
    - cron:  '0 20 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
      current_version_commit:
        type: string
        description: "Commit hash for current version to test (default: HEAD)"
        required: false
        default: ''
      old_version_commit:
        type: string
        description: "Commit hash for old version to compare (default: latest release)"
        required: false
        default: ''

jobs:
  mdtest-perf-test:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        meta: ['redis', 'redis-nocache', 'mysql', 'tikv']
        cases: ["mdtest_fio"]
    runs-on: ubuntu-22.04

    steps:
      - name: Clean up
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/local/.ghcup
          sudo docker system prune -af
          sudo df -h

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 0  # Need full history for checking out specific commits

      - name: Install dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y build-essential bc
          sudo pip install minio

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Prepare meta db
        run: |
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          engine_meta=${{matrix.meta}}
          if [[ "$engine_meta" == "redis-nocache" ]]; then
            engine_meta="redis"
          fi
          start_meta_engine $engine_meta
          meta_url=$(get_meta_url $engine_meta)
          create_database $meta_url

      - name: Install mdtest & fio
        run: |
          sudo apt-get install mpich openmpi-bin libopenmpi-dev fio -y
          wget https://github.com/hpc/ior/releases/download/3.3.0/ior-3.3.0.tar.gz
          tar -xzvf ior-3.3.0.tar.gz
          cd ior-3.3.0
          ./configure && make && sudo make install

      # Build and test current version (either specified commit or HEAD)
      - name: Checkout and build current version
        if: ${{ inputs.current_version_commit != '' }}
        run: |
          mkdir -p ../juicefs-build
          cd ../juicefs-build
          git clone $GITHUB_SERVER_URL/$GITHUB_REPOSITORY .
          git checkout ${{ inputs.current_version_commit }}
          make
          cp juicefs ../juicefs/
          cd ../juicefs

      - name: Build current version (default)
        if: ${{ inputs.current_version_commit == '' }}
        timeout-minutes: 10
        uses: ./.github/actions/build
        with:
          target: ${{steps.vars.outputs.target}}

      - name: Run benckmark with current version
        run: |
          mkdir -p /tmp/jfs/mdtest
          sudo chmod 777 /tmp/jfs
          
          # Mount current version
          engine_meta=${{matrix.meta}}
          if [[ "$engine_meta" == "redis-nocache" ]]; then
            engine_meta="redis"
          fi
          meta_url=$(source .github/scripts/start_meta_engine.sh; get_meta_url $engine_meta)
          sudo chmod 777 /mnt
          if [[ "${{matrix.meta}}" == "redis-nocache" ]]; then
            meta_url=${meta_url%%\?*}
            echo "Removed redis query parameters for redis-nocache profile"
          fi
          ./juicefs format $meta_url current-version-test --trash-days 0 --bucket=/mnt/jfs
          ./juicefs mount -d "$meta_url" /tmp/jfs --no-usage-report
          
          # Run tests
          chmod +x .github/scripts/perf/*.sh
          .github/scripts/perf/${{matrix.cases}}.sh /tmp/jfs "./results/current_${{matrix.meta}}" "current" "$meta_url"

      - name: Cleanup current version
        run: |
          source .github/scripts/common/common.sh
          source .github/scripts/start_meta_engine.sh
          engine_meta=${{matrix.meta}}
          if [[ "$engine_meta" == "redis-nocache" ]]; then
            engine_meta="redis"
          fi
          meta_url=$(source .github/scripts/start_meta_engine.sh; get_meta_url $engine_meta)
          if [[ "${{matrix.meta}}" == "redis-nocache" ]]; then
            meta_url=${meta_url%%\?*}
            echo "Removed redis query parameters for redis-nocache profile"
          fi
          META_URL=$meta_url          
          uuid=$(./juicefs status $meta_url | grep UUID | cut -d '"' -f 4)
          ./juicefs destroy --force $meta_url $uuid
          ./juicefs umount /tmp/jfs
          rm -rf /mnt/jfs
          start_meta_engine $engine_meta
          create_database $meta_url
          prepare_test

      # Build and test old version (either specified commit or latest release)
      - name: Checkout and build old version from commit
        if: ${{ inputs.old_version_commit != '' }}
        run: |
          mkdir -p ../juicefs-build2
          cd ../juicefs-build2
          git clone $GITHUB_SERVER_URL/$GITHUB_REPOSITORY .
          git checkout ${{ inputs.old_version_commit }}
          make
          engine_meta=${{matrix.meta}}
          if [[ "$engine_meta" == "redis-nocache" ]]; then
            engine_meta="redis"
          fi
          meta_url=$(source .github/scripts/start_meta_engine.sh; get_meta_url $engine_meta)
          mkdir -p /tmp/jfs_old/mdtest
          sudo chmod 777 /tmp/jfs_old
          if [[ "$engine_meta" == "mysql" || "$engine_meta" == "redis" ]]; then
            meta_url=${meta_url%%\?*}
            echo "Removed query parameters for old version compatibility"
          fi
          ./juicefs format "$meta_url" old-version-test --trash-days 0 --bucket=/mnt/jfs
          ./juicefs mount -d "$meta_url" /tmp/jfs_old --no-usage-report
          cd ../juicefs

      - name: Install old JuiceFS version (default)
        if: ${{ inputs.old_version_commit == '' }}
        run: |
          curl -sSL https://d.juicefs.com/install | sh -
          JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v')
          engine_meta=${{matrix.meta}}
          if [[ "$engine_meta" == "redis-nocache" ]]; then
            engine_meta="redis"
          fi
          meta_url=$(source .github/scripts/start_meta_engine.sh; get_meta_url $engine_meta)
          if [[ "$engine_meta" == "mysql" || "$engine_meta" == "redis" ]]; then
            meta_url=${meta_url%%\?*}
            echo "Removed query parameters for old version compatibility"
          fi
          juicefs format "$meta_url" old-version-test --trash-days 0 --bucket=/mnt/jfs
          mkdir -p /tmp/jfs_old/mdtest
          sudo chmod 777 /tmp/jfs_old
          juicefs mount -d "$meta_url" /tmp/jfs_old --no-usage-report

      - name: Run benchmark with old version
        run: |
          engine_meta=${{matrix.meta}}
          if [[ "$engine_meta" == "redis-nocache" ]]; then
            engine_meta="redis"
          fi
          meta_url=$(source .github/scripts/start_meta_engine.sh; get_meta_url $engine_meta)
          if [[ "$engine_meta" == "mysql" || "$engine_meta" == "redis" ]]; then
            meta_url=${meta_url%%\?*}
            echo "Removed query parameters for old version compatibility"
          fi
          .github/scripts/perf/${{matrix.cases}}.sh /tmp/jfs_old "./results/old_${{matrix.meta}}" "old" "$meta_url"

      - name: Compare results
        run: |
          .github/scripts/perf/compare_${{matrix.cases}}.sh "./results/current_${{matrix.meta}}" "./results/old_${{matrix.meta}}" || true

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/permission-check.yaml
================================================
name: "permission-check"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '**.java'
      - '**/pom.xml'
  pull_request:
    #The branches below must be a subset of the branches above
    branches:
      - 'main'
      - 'release-**'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '.github/**'
      - '**.java'
      - '**/pom.xml'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  pjdfstest:
    timeout-minutes: 30
    strategy:
      fail-fast: false
      matrix:
        meta: [ 'sqlite3', 'redis', 'badger' ]

    runs-on: ubuntu-22.04
    steps:
      - uses: shogo82148/actions-setup-perl@v1
        with:
          perl-version: '5.34'

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Set Variable
        id: vars
        run: echo "target=juicefs" >> $GITHUB_OUTPUT

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with:
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: |
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo ./juicefs format $meta_url --trash-days 0 pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          # sudo mkdir /var/jfs
          # sudo chmod 777 /var/jfs
          sudo ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report --attr-cache 0 --entry-cache 0 --dir-entry-cache 0 --non-default-permission &
          sleep 5
          if [ ! -f /tmp/jfs/.accesslog ]; then
            echo "<FATAL>: mount failed"
            exit 1
          fi

      - name: Pjdfstest
        run: |
          sudo .github/scripts/apt_install.sh libtap-harness-archive-perl
          cd /tmp/jfs/
          git clone https://github.com/hexilee/pjdfstest.git
          cd pjdfstest
          autoreconf -ifs
          ./configure
          make pjdfstest
          sudo prove -rv tests/

      - name: log
        if: always()
        shell: bash
        run: |
          if [ -f ~/.juicefs/juicefs.log ]; then
            tail -300 ~/.juicefs/juicefs.log
            grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true
          fi
          if [ -f /var/log/juicefs.log ]; then
            tail -300 /var/log/juicefs.log
            grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          fi

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [pjdfstest]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/pjdfstest.yml
================================================
name: "pjdfstest"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '.github/**'
      - '**.java'
      - '**/pom.xml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '.github/**'
      - '**.java'
      - '**/pom.xml'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch" ]]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "badger", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          elif [[ "${{github.event_name}}" == "pull_request" || "${{github.event_name}}" == "push"  ]]; then
            echo 'meta_matrix=["redis"]' >> $GITHUB_OUTPUT
          else
            echo 'event name is not supported' && exit 1
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  pjdfstest:
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        # [ 'sqlite3', 'redis', 'mysql', 'tikv', 'tidb', 'postgres', 'badger', 'mariadb', 'fdb']
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04

    steps:
      - uses: shogo82148/actions-setup-perl@v1
        with:
          perl-version: '5.34'

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        uses: ./.github/actions/build
        with:
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: |
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo ./juicefs format $meta_url --trash-days 0 --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          # sudo mkdir /var/jfs
          # sudo chmod 777 /var/jfs
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report
          stat /tmp/jfs/.accesslog

      - name: Pjdfstest
        run: |
          sudo .github/scripts/apt_install.sh libtap-harness-archive-perl
          cd /tmp/jfs/
          git clone https://github.com/sanwan/pjdfstest.git
          cd pjdfstest
          autoreconf -ifs
          ./configure
          make pjdfstest
          sudo prove -rv tests/

      - name: log
        if: always()
        shell: bash
        run: |
          if [ -f ~/.juicefs/juicefs.log ]; then
            tail -300 ~/.juicefs/juicefs.log
            grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true
          fi
          if [ -f /var/log/juicefs.log ]; then
            tail -300 /var/log/juicefs.log
            grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          fi

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [pjdfstest]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/pysdk.yml
================================================
name: "pysdk"

on:
  push:
    branches:
    - main
    - release**
    paths:
    - '**/hypo/fs_op.py'
    - '**/hypo/fs.py'
    - '**/hypo/fs_sdk_test.py'
    - '**/pysdk_test.py'
    - '**/juicefs.py'
    - '**/pysdk.yml'
  pull_request:
    branches:
    - main
    - release**
    paths:
    - '**/hypo/fs_op.py'
    - '**/hypo/fs.py'
    - '**/hypo/fs_sdk_test.py'
    - '**/pysdk_test.py'
    - '**/juicefs.py'
    - '**/pysdk.yml'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
      seed:
        type: string
        description: "Seed for random test"
        required: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          # TODO: add fdb when bugfix: https://github.com/juicedata/juicefs/issues/5910
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch" ]]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "mariadb"]' >> $GITHUB_OUTPUT
          elif [[ "${{github.event_name}}" == "pull_request" || "${{github.event_name}}" == "push"  ]]; then
            echo 'meta_matrix=["redis", "tikv", "mysql"]' >> $GITHUB_OUTPUT
          else
            echo 'event name is not supported' && exit 1
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  pysdk:
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        # [ 'sqlite3', 'redis', 'mysql', 'tikv', 'tidb', 'postgres', 'badger', 'mariadb', 'fdb']
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Download example database
        timeout-minutes: 5
        uses: dawidd6/action-download-artifact@v9
        if: false
        with:
          name: pysdk-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples
          if_no_artifact_found: ignore
          workflow_conclusion: ""
          check_artifacts: true

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi
      
      - name: Build
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo ./juicefs format $meta_url --trash-days 0 --enable-acl --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        if: false
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report --enable-xattr
          stat /tmp/jfs/.accesslog

      - name: Build and install SDK
        timeout-minutes: 5
        run: |
          make -C sdk/python/ libjfs.so
          sudo python3 sdk/python/juicefs/setup.py install
          df -h

      - name: Run juicefs.py
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo JFS_VOLUME=test-volume JFS_META=$meta_url python3 sdk/python/juicefs/juicefs/juicefs.py
          df -h

      - name: Run pysdk_test.py
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo META_URL=$meta_url python3 .github/scripts/pysdk/pysdk_test.py
          df -h

      - name: Run file_test.py
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo USE_SDK=true META_URL=$meta_url python3 .github/scripts/hypo/file_test.py
          df -h

      - name: Run file.py without read
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo USE_SDK=true META_URL=$meta_url EXCLUDE_RULES="read,readline,readlines" python3 .github/scripts/hypo/file.py
          df -h

      - name: Run file.py without write
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo USE_SDK=true META_URL=$meta_url EXCLUDE_RULES="write,writelines,truncate" python3 .github/scripts/hypo/file.py
          df -h

      - name: Run fs_sdk_test.py
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo USE_SDK=true META_URL=$meta_url python3 .github/scripts/hypo/fs_sdk_test.py
          df -h

      - name: Run fs.py
        timeout-minutes: 60
        run: |
          if [[ -n "${{ github.event.inputs.seed }}" ]]; then 
            seed=${{ github.event.inputs.seed }}
          elif [[ "${{github.event_name}}" == "pull_request" || "${{github.event_name}}" == "push" ]]; then
            seed=0
          else
            seed=$RANDOM
          fi
          echo seed is $seed
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo IGNORE_DIFF_ERRORS=true MAX_EXAMPLE=1000 STEP_COUNT=200 USE_SDK=true SEED=$seed META_URL=$meta_url EXCLUDE_RULES="readlines,readline,clone_cp_file,clone_cp_dir" python3 .github/scripts/hypo/fs.py 2>&1 | tee fsrand.log
          exit ${PIPESTATUS[0]}
          df -h

      - name: check fsrand.log
        if: always()
        run: |
          sudo tail -n 500 fsrand.log || true

      - name: chmod example directory
        if: always()
        timeout-minutes: 5
        run: |
          if [[ -e ".hypothesis/examples" ]]; then
            echo "chmod for .hypothesis/examples" && sudo chmod -R 755 .hypothesis/examples
          fi

      - name: Upload example database
        uses: actions/upload-artifact@v4
        if: false
        with:
          include-hidden-files: true
          name: pysdk-hypothesis-example-db-${{ matrix.meta }}
          path: .hypothesis/examples
          overwrite: true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"  

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [pysdk]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success() 
        run: echo "All Done"

================================================
FILE: .github/workflows/random-test.yml
================================================
name: "random-test"
on:
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]
    branches:
      - main
      - release**
    paths:
      - '**/workflows/random-test.yml'

  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
      duration:
        type: string
        description: "duration in seconds"
        required: false
        default: "1800"

  schedule:
    - cron: '0 16 * * *'

jobs:
  random-test:
    if: "!github.event.pull_request.draft"
    strategy:
      fail-fast: false
      matrix:
          meta: ["redis"]
          basedir: [ "random-test" ]
          subdir: [ "true" ]
          removeOp: [ "rm"]
          zipf: [""]
          include:
            - basedir: "random-test"
              meta: "mysql"
              subdir: "false"
              removeOp: "rmr"
              zipf: "1.02"
            - basedir: ""
              meta: "tikv"
              subdir: "false"
              removeOp: "noOp"
              zipf: "1.04"
            - basedir: "random-test"
              meta: "postgres"
              subdir: "false"
              removeOp: "rm"
              zipf: "1.06"

    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        uses: ./.github/actions/build
        with:
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: |
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo ./juicefs format $meta_url --trash-days 0 --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          [[ "${{matrix.subdir}}" == "true" ]] && subdir_option="--subdir=subdir" || subdir_option=""
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report $subdir_option
          stat /tmp/jfs/.accesslog
          
      - name: Test
        timeout-minutes: 125
        run: |
          if [[ "${{github.event_name}}" == "pull_request" || "${{github.event_name}}" == "push" ]]; then
            duration=600
          elif [[ -n "${{github.event.inputs.duration}}" ]]; then
            duration=${{github.event.inputs.duration}}
          else
            duration=1800
          fi
          [[ "${{matrix.zipf}}" == "" ]] && merge_option="-mergeOp 1,uniform" || merge_option=""
          [[ "${{matrix.zipf}}" != "" ]] && zipf_option="-zipf ${{matrix.zipf}}" || zipf_option=""
          wget -q s.juicefs.com/static/Linux/mount -O mount && chmod +x mount
          set -x
          timeout $((duration + 60))s sudo ./mount random-test runOp --baseDir /tmp/jfs/${{matrix.basedir}} --logDir random-test-log \
            --duration ${duration}s --files 1000000 --ops 1000000000 --threads 200 --dirSize 100 ${zipf_option} --skewInterval 30s --hotPoints $((RANDOM%5+1)) \
            --mkdirOp 10,uniform -createOp 10,uniform -readOp 1,uniform -lsOp 1,uniform -deleteOp 0.1,uniform -rmrOp 0.03,end -renameOp 1,uniform -linkOp 3,uniform --truncateOp 1,uniform --truncateSize 1G,1G -inspectOp 30,uniform --cmdCloneOp 5,1s,10s --cmdRmrOp 5,1s,10s -walkOp 1,10s -walkThreads 5
          set +x

      - name: Remove test dir
        timeout-minutes: 30
        run: |
          echo "Removing test dir /tmp/jfs/${{matrix.basedir}} with ${{matrix.removeOp}}"
          if [[ "${{matrix.removeOp}}" == "rm" ]]; then
            sudo rm -rf /tmp/jfs/${{matrix.basedir}} || (find /tmp/jfs/${{matrix.basedir}} -exec stat -c '%n %i' {} + && exit 1)
          elif [[ "${{matrix.removeOp}}" == "rmr" ]]; then
            sudo ./juicefs rmr /tmp/jfs/${{matrix.basedir}} || (find /tmp/jfs/${{matrix.basedir}} -exec stat -c '%n %i' {} + && exit 1)
          elif [[ "${{matrix.removeOp}}" == "rmr-skip-trash" ]]; then
            sudo ./juicefs rmr /tmp/jfs/${{matrix.basedir}} --skip-trash || (find /tmp/jfs/${{matrix.basedir}} -exec stat -c '%n %i' {} + && exit 1)
          else
            echo "no removeOp specified, skip removing test dir"
          fi
          if [[ "${{matrix.removeOp}}" == "rm" || "${{matrix.removeOp}}" == "rmr" || "${{matrix.removeOp}}" == "rmr-skip-trash" ]]; then
            ls -ali /tmp/jfs/${{matrix.basedir}} && find /tmp/jfs/${{matrix.basedir}} -exec stat -c '%n %i' {} + && echo "Error: /tmp/jfs/${{matrix.basedir}} still exists after remove" && exit 1 || echo "/tmp/jfs/${{matrix.basedir}} removed successfully"
          fi
          
      - name: Check file-op.log
        timeout-minutes: 5
        if: always() && !github.event.pull_request.draft
        run: |
          sudo chmod -R a+r random-test-log
          ls -l random-test-log/
          [[ -f random-test-log/file-op.log ]] && tail -n 500 random-test-log/file-op.log || true

      - name: Check log
        if: always()
        shell: bash
        run: |
          if [ -f /var/log/juicefs.log ]; then
            tail -300 /var/log/juicefs.log
            grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          fi
      
      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure() && github.event.inputs.debug == 'true'
        # if: failure()
        timeout-minutes: 30
        uses: owenthereal/action-upterm@v1
        with:
          wait-timeout-minutes: 10

  success-all-test:
    runs-on: ubuntu-latest
    needs: [random-test]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"

================================================
FILE: .github/workflows/release.yml
================================================
name: release

on:
  push:
    tags:
      - v*

jobs:
  releaser:
    runs-on: ubuntu-22.04
    steps:
      - name: Clean up
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/local/.ghcup
          sudo docker system prune -af
          sudo df -h

      - name: Set up Go
        uses: actions/setup-go@v3
        with:
          go-version: "oldstable"

      - name: Set up Java
        uses: actions/setup-java@v3
        with:
          distribution: "temurin"
          java-version: "8"
          server-id: central
          server-username: MAVEN_USERNAME
          server-password: MAVEN_PASSWORD
          gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }}
          gpg-passphrase: MAVEN_GPG_PASSPHRASE

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: setup release environment
        run: |-
          echo 'GITHUB_TOKEN=${{secrets.GH_PERSONAL_ACCESS_TOKEN}}' > .release-env

      - name: goreleaser release
        run: make release

      - name: Cache local Maven repository
        id: cache-maven
        uses: actions/cache@v3
        with:
          path: ~/.m2/repository
          key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
          restore-keys: |
            ${{ runner.os }}-maven-

      - name: Chown go module cache
        run: sudo chown -R $USER $HOME/go/pkg/mod

      - name: Build SDK
        run: |
          make -C sdk/java package-all && sudo chown -R $USER sdk/java/target
          echo "JUICEFS_VERSION=$(mvn -f sdk/java/pom.xml help:evaluate -Dexpression=project.version -q -DforceStdout)" >> $GITHUB_ENV

      - name: Upload SDK
        uses: softprops/action-gh-release@v1
        with:
          files: |
            sdk/java/target/juicefs-hadoop-${{ env.JUICEFS_VERSION }}.jar
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Publish package
        run: mvn -f sdk/java/pom.xml deploy -DskipTests
        env:
          MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }}
          MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }}
          MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }}

      - name: Setup upterm session
        if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/resources/core-site.xml
================================================
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>jfs://dev/</value>
    </property>
    <property>
        <name>fs.jfs.impl</name>
        <value>io.juicefs.JuiceFileSystem</value>
    </property>
    <property>
        <name>juicefs.cache-size</name>
        <value>0</value>
    </property>
    <property>
        <name>juicefs.no-usage-report</name>
        <value>true</value>
    </property>
    <property>
        <name>juicefs.access-log</name>
        <value>/tmp/juicefs-access.log</value>
    </property>
    <property>
        <name>juicefs.dev.meta</name>
        <value>redis://127.0.0.1:6379/1</value>
    </property>
</configuration>


================================================
FILE: .github/workflows/resources/load-balancer.conf
================================================
   upstream backend {
      server 127.0.0.1:9000;
      server 127.0.0.1:9001;
   }

   # This server accepts all traffic to port 80 and passes it to the upstream.
   # Notice that the upstream name and the proxy_pass need to match.

   server {
      listen 8080;
      server_name localhost;
      location / {
          proxy_set_header Host $http_host;
          proxy_pass http://backend;
      }
   }

   client_max_body_size 100M;

================================================
FILE: .github/workflows/resources/sync-options.txt
================================================
--dirs --include .* , -r --include .* , enable                                                                                                                                              
--dirs --exclude .* , -r --exclude .*  , enable                                                         
--dirs --exclude .* --exclude docs/ --exclude *.png , -r --exclude .* --exclude docs/ --exclude *.png , enable
--dirs --include docs/ --include *.png --exclude * , -r --include docs/ --include *.png --exclude * , enable 
--dirs --exclude * --include docs/ --include *.png , -r --exclude * --include docs/ --include *.png , enable
--dirs --include .github --include *.png --exclude .* , -r --include .github --include *.png --exclude .* , enable
--dirs --include .github --include *.png --exclude .* , -r --include .github --include *.png --exclude .* , enable
--dirs --exclude .* --include .github --include *.png , -r --exclude .* --include .github --include *.png , enable
--dirs --include [a-f]*.go --exclude *.go ,-r --include [a-f]*.go --exclude *.go ,enable            
--dirs --include *_test.go --exclude *.go ,-r --include *_test.go --exclude *.go ,enable            
--dirs --include cmd/ --exclude *.go ,-r --include cmd/ --exclude *.go ,enable                       
--dirs --include pk*/chu*/ --exclude *.go ,-r --include pk*/chu*/ --exclude *.go ,enable            
--dirs --include chu*/ --exclude pk*/ --exclude *.go ,-r --include chu*/ --exclude pk*/  --exclude *.go ,enable
--dirs --include chun?/ --exclude pk*/ --exclude *.go ,-r --include chun?/ --exclude pk*/  --exclude *.go ,enable

================================================
FILE: .github/workflows/resources/tpcds_datagen.scala
================================================
// Copyright 2015 Databricks
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

val scaleFactor = "5"

// data format.
val format = "parquet"
// If false, float type will be used instead of decimal.
val useDecimal = true
// If false, string type will be used instead of date.
val useDate = true
// If true, rows with nulls in partition key will be thrown away.
val filterNull = false
// If true, partitions will be coalesced into a single file during generation.
val shuffle = true

// s3/dbfs path to generate the data to.
val rootDir = s"jfs:///tmp/performance-datasets/tpcds/sf$scaleFactor-$format/"
// name of database to be created.
val databaseName = s"tpcds_sf${scaleFactor}" +
  s"""_${if (useDecimal) "with" else "no"}decimal""" +
  s"""_${if (useDate) "with" else "no"}date""" +
  s"""_${if (filterNull) "no" else "with"}nulls"""


import com.databricks.spark.sql.perf.tpcds.TPCDSTables
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val tables = new TPCDSTables(sqlContext, dsdgenDir = "/tmp/tpcds-kit/tools", scaleFactor = scaleFactor, useDoubleForDecimal = !useDecimal, useStringForDate = !useDate)


import org.apache.spark.deploy.SparkHadoopUtil
// Limit the memory used by parquet writer
// Compress with snappy:
sqlContext.sparkContext.hadoopConfiguration.set("parquet.memory.pool.ratio", "0.1")
sqlContext.setConf("spark.sql.parquet.compression.codec", "snappy")
// TPCDS has around 2000 dates.
spark.conf.set("spark.sql.shuffle.partitions", "10")
// Don't write too huge files.
sqlContext.setConf("spark.sql.files.maxRecordsPerFile", "20000000")

val dsdgen_partitioned=10 // recommended for SF10000+.
val dsdgen_nonpartitioned=10 // small tables do not need much parallelism in generation.

// generate all the small dimension tables
val nonPartitionedTables = Array("call_center", "catalog_page", "customer", "customer_address", "customer_demographics", "date_dim", "household_demographics", "income_band", "item", "promotion", "reason", "ship_mode", "store",  "time_dim", "warehouse", "web_page", "web_site")
nonPartitionedTables.foreach { t => {
  tables.genData(
      location = rootDir,
      format = format,
      overwrite = true,
      partitionTables = true,
      clusterByPartitionColumns = shuffle,
      filterOutNullPartitionValues = filterNull,
      tableFilter = t,
      numPartitions = dsdgen_nonpartitioned)
}}
println("Done generating non partitioned tables.")

// leave the biggest/potentially hardest tables to be generated last.
val partitionedTables = Array("inventory", "web_returns", "catalog_returns", "store_returns", "web_sales", "catalog_sales", "store_sales") 
partitionedTables.foreach { t => {
  tables.genData(
      location = rootDir,
      format = format,
      overwrite = true,
      partitionTables = true,
      clusterByPartitionColumns = shuffle,
      filterOutNullPartitionValues = filterNull,
      tableFilter = t,
      numPartitions = dsdgen_partitioned)
}}
println("Done generating partitioned tables.")

// COMMAND ----------

sql(s"drop database if exists $databaseName cascade")
sql(s"create database $databaseName")

// COMMAND ----------

sql(s"use $databaseName")

// COMMAND ----------

tables.createExternalTables(rootDir, format, databaseName, overwrite = true, discoverPartitions = true)

// COMMAND ----------

// MAGIC %md
// MAGIC Analyzing tables is needed only if cbo is to be used.

// COMMAND ----------

tables.analyzeTables(databaseName, analyzeColumns = true)

System.exit(0)


================================================
FILE: .github/workflows/resources/tpcds_run.scala
================================================
// Copyright 2015 Databricks
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Databricks notebook source
// MAGIC %md 
// MAGIC This notebook runs spark-sql-perf TPCDS benchmark on and saves the result.

// COMMAND ----------

// Database to be used:
// TPCDS Scale factor
val scaleFactor = "5"
// If false, float type will be used instead of decimal.
val useDecimal = true
// If false, string type will be used instead of date.
val useDate = true
// name of database to be used.
val filterNull = false

val databaseName = s"tpcds_sf${scaleFactor}" +
  s"""_${if (useDecimal) "with" else "no"}decimal""" +
  s"""_${if (useDate) "with" else "no"}date""" +
  s"""_${if (filterNull) "no" else "with"}nulls"""

val iterations = 2 // how many times to run the whole set of queries.

val timeout = 60 // timeout in hours

val query_filter = Seq("q1-v2.4", "q2-v2.4", "q3-v2.4", "q4-v2.4", "q5-v2.4", "q6-v2.4", "q7-v2.4", "q8-v2.4", "q9-v2.4", "q10-v2.4") // Seq() == all queries
val randomizeQueries = false // run queries in a random order. Recommended for parallel runs.

// detailed results will be written as JSON to this location.
val resultLocation = "file:///tmp/performance-datasets/tpcds/results"

// COMMAND ----------

// Spark configuration
spark.conf.set("spark.sql.broadcastTimeout", "10000") // good idea for Q14, Q88.

// ... + any other configuration tuning

// COMMAND ----------

sql(s"use `$databaseName`")

// COMMAND ----------

import com.databricks.spark.sql.perf.tpcds.TPCDS
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val tpcds = new TPCDS (sqlContext = sqlContext)
def queries = {
  val filtered_queries = query_filter match {
    case Seq() => tpcds.tpcds2_4Queries
    case _ => tpcds.tpcds2_4Queries.filter(q => query_filter.contains(q.name))
  }
  if (randomizeQueries) scala.util.Random.shuffle(filtered_queries) else filtered_queries
}
val experiment = tpcds.runExperiment(
  queries,
  iterations = iterations,
  resultLocation = resultLocation,
  tags = Map("runtype" -> "benchmark", "database" -> databaseName, "scale_factor" -> scaleFactor))

experiment.waitForFinish(timeout*60*60)

experiment.getCurrentResults.createOrReplaceTempView("result")
spark.sql("select substring(name,1,100) as Name, bround((parsingTime+analysisTime+optimizationTime+planningTime+executionTime)/1000.0,1) as Runtime_sec  from result").show()

System.exit(0)
//display(summary)


================================================
FILE: .github/workflows/resources/vdbench_big_file.conf
================================================
data_errors=1
fsd=fsd1,anchor=/tmp/vdbench/vdbench-big,depth=1,width=1,files=4,size=1g,openflags=o_direct

fwd=fwd1,fsd=fsd1,operation=write,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4
fwd=fwd2,fsd=fsd1,operation=read,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4

rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=10,interval=1
rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=10,interval=1


================================================
FILE: .github/workflows/resources/vdbench_long_run.conf
================================================
data_errors=1
fsd=fsd1,anchor=/tmp/jfs,depth=1,width=2,files=2,sizes=(10m,0),shared=yes,openflags=o_direct
fwd=fwd1,fsd=fsd1,threads=4,xfersize=(512,20,4k,20,64k,20,512k,20,1024k,20),fileio=random,fileselect=random,rdpct=70
rd=rd1,fwd=fwd*,fwdrate=max,format=restart,elapsed=1500,interval=1


================================================
FILE: .github/workflows/resources/vdbench_small_file.conf
================================================
data_errors=1
fsd=fsd1,anchor=/tmp/vdbench/vdbench-small,depth=3,width=10,files=10,size=128k,openflags=o_direct

fwd=fwd1,fsd=fsd1,operation=write,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd2,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd3,fsd=fsd1,rdpct=70,xfersize=128k,fileio=random,fileselect=random,threads=4

rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=60,interval=1
rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=60,interval=1
rd=rd3,fwd=fwd3,fwdrate=max,format=restart,elapsed=60,interval=1


================================================
FILE: .github/workflows/rmfiles.yml
================================================
name: "rmr-test"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/rmfiles.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/rmfiles.yml'
  schedule:
    - cron:  '0 20 * * *'
  workflow_dispatch:

jobs:
  rmr-test:
    timeout-minutes: 30
    strategy:
      fail-fast: false
      matrix:
        meta: [ 'sqlite3', 'redis', 'mysql',  'postgres', 'tikv', 'fdb', 'badger', 'etcd']

    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }} 

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: | 
          sudo chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}

      - name: Rmr
        shell: bash
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url
          mp=/tmp/jfs
          # wget -q https://s.juicefs.com/static/bench/500K_empty_files.dump.gz
          # gzip -dk  500K_empty_files.dump.gz
          # ./juicefs load $meta_url 500K_empty_files.dump
          sudo chmod 777 /mnt
          GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url --bucket=/mnt/jfs jfs
          GOCOVERDIR=$(pwd)/cover ./juicefs mdtest $meta_url test --dirs 10 --depth 3 --files 10 --threads 10 --no-usage-report
          GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url $mp --no-usage-report
          sleep 3
          ls -l $mp/test
          GOCOVERDIR=$(pwd)/cover ./juicefs rmr $mp/test/
          sleep 3 
          ls -l $mp/test && exit 1 || true
        
      - name: Clear
        run: | 
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          mp=/tmp/jfs
          volume=jfs
          test -d $mp && ./juicefs umount -f $mp
          ./juicefs status $meta_url && UUID=$(./juicefs status $meta_url | grep UUID | cut -d '"' -f 4) || echo "meta not exist"
          if [ -n "$UUID" ];then
            ./juicefs destroy --yes $meta_url $UUID
          fi
          test -d /var/jfs/$volume && rm -rf /var/jfs/$volume || true
        shell: bash

      - name: Rmr Parallel
        shell: bash
        run: |
          sudo chmod 777 /var
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          mp=/tmp/jfs
          # wget -q https://s.juicefs.com/static/bench/500K_empty_files.dump.gz
          # gzip -dk  500K_empty_files.dump.gz
          # ./juicefs load $meta_url 500K_empty_files.dump
          GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url --bucket=/mnt/jfs jfs
          GOCOVERDIR=$(pwd)/cover ./juicefs mdtest $meta_url test --dirs 10 --depth 3 --files 15 --threads 10 --no-usage-report
          GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url $mp --no-usage-report
          sleep 3
          declare -a pidlist
          GOCOVERDIR=$(pwd)/cover ./juicefs rmr $mp/test/ || true &
          pidlist+=($!)
          GOCOVERDIR=$(pwd)/cover ./juicefs rmr $mp/test/ || true &
          pidlist+=($!)
          GOCOVERDIR=$(pwd)/cover ./juicefs rmr $mp/test/ || true &
          pidlist+=($!)
          wait "${pidlist[@]}"
          ls -l $mp/test && exit 1 || true

      - name: log
        if: always()
        shell: bash
        run: | 
          tail -300 ~/.juicefs/juicefs.log
          grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}          
                
      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [rmr-test]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"

================================================
FILE: .github/workflows/sdktest.yml
================================================
name: "sdktest"

on:
  push:
    branches:
      - 'main'
      - 'release-*'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
  pull_request:
    #The branches below must be a subset of the branches above
    branches:
      - 'main'
      - 'release-*'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '.github/**'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron:  '0 17 * * *'

jobs:
  sdktest:
    timeout-minutes: 50
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build

      - name: Set up Java
        uses: actions/setup-java@v3
        with:
          distribution: 'temurin'
          java-version: '8'

      - name: Run Redis
        run: |
          sudo docker run -d --name redis -v redis-data:/data  \
          -p 6379:6379  redis redis-server --appendonly yes

      - name: Juicefs Format
        run: |
          sudo ./juicefs format  localhost  --bucket=/mnt/jfs dev

      - name: Juicefs Mount
        run: |
          sudo ./juicefs mount -d localhost /jfs
          touch /jfs/inner_sym_target
          echo "hello juicefs" > /jfs/inner_sym_target
          cd /jfs
          ln -s inner_sym_target inner_sym_link
          mkdir etc
          chmod 777 etc
          echo `hostname` > etc/nodes
          echo "tom:3001" > users
          echo "g1:2001:tom" > groups
          mkdir /jfs/tmp
          chmod 777 /jfs/tmp

      - name: Sdk Test
        run: |
          sudo sh sdk/java/kerberos.sh
          make -C sdk/java/libjfs
          cd sdk/java
          sudo mvn test -B -Dtest=\!io.juicefs.permission.**,\!io.juicefs.kerberos.**
          sudo mvn test -B -Dflink.version=1.17.2 -Dtest=io.juicefs.JuiceFileSystemTest#testFlinkHadoopRecoverableWriter
          # ranger test
          sudo JUICEFS_RANGER_TEST=1 mvn test -B -Dtest=io.juicefs.permission.RangerPermissionCheckerTest,\!io.juicefs.permission.RangerPermissionCheckerTest#testRangerCheckerInitFailed
          sudo mvn test -B -Dtest=io.juicefs.permission.RangerPermissionCheckerTest#testRangerCheckerInitFailed
          # kerberos test
          sudo cp src/test/resources/kerberos.cfg /tmp/kerberos.cfg
          sudo sh -c "echo "dev.keytab=`base64 /tmp/server.keytab -w 0`" >> /tmp/kerberos.cfg"
          sudo ../../juicefs config localhost --kerberos-config-file /tmp/kerberos.cfg
          sudo mvn test -B -Dtest=io.juicefs.kerberos.KerberosTest
          
          sudo mvn package -B -Dmaven.test.skip=true --quiet -Dmaven.javadoc.skip=true
          expect=$(git rev-parse --short HEAD | cut -b 1-7)
          ls /tmp/libjfs* | grep $expect

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/sync.yml
================================================
name: "sync"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**.go'
      - 'Makefile'
      - '**/sync.yml'
      - '.github/scripts/sync/**'
      - '.github/scripts/hypo/sync.py'
      - '.github/scripts/hypo/sync_test.py'
      - '.github/actions/upload-coverage/action.yml'
      - '.github/actions/mount-coverage-dir/action.yml'
      - '.github/actions/upload-total-coverage/action.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**.go'
      - 'Makefile'
      - '**/sync.yml'
      - '.github/scripts/sync/**'
      - '.github/scripts/hypo/sync.py'
      - '.github/scripts/hypo/sync_test.py'
      - '.github/actions/upload-coverage/action.yml'
      - '.github/actions/mount-coverage-dir/action.yml'
      - '.github/actions/upload-total-coverage/action.yml'
      
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
      seed:
        type: string
        description: "Seed for random test"
        required: false
  schedule:
    - cron:  '0 17 * * *'

jobs:
  sync:
    runs-on: ubuntu-22.04
    strategy:
      fail-fast: false
      matrix:
        type: ['sync', 'sync_encrypt', 'sync_fsrand', 'sync_minio', 'sync_cluster', 'sync_exclude']
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Build 
        uses: ./.github/actions/build

      - name: Clean up
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/local/.ghcup
          sudo docker system prune -af
          sudo df -h

      - name: Test Sync
        timeout-minutes: 20
        run: |
          if [[ "${{matrix.type}}" == 'sync' ]]; then
            sudo GOCOVERDIR=$(pwd)/cover META=redis .github/scripts/sync/sync.sh
          elif [[ "${{matrix.type}}" == 'sync_encrypt' ]]; then
            sudo GOCOVERDIR=$(pwd)/cover ENCRYPT=true META=redis .github/scripts/sync/sync.sh
          elif [[ "${{matrix.type}}" == 'sync_fsrand' ]]; then
            if [[ -n "${{ github.event.inputs.seed }}" ]]; then 
              seed=${{ github.event.inputs.seed }}
            elif [[ "${{github.event_name}}" == "pull_request" || "${{github.event_name}}" == "push" ]]; then
              seed=0
            else
              seed=$RANDOM
            fi
            echo "using seed: $seed"
            sudo GOCOVERDIR=$(pwd)/cover META=redis SEED="$seed" .github/scripts/sync/sync_fsrand.sh 
          elif [[ "${{matrix.type}}" == 'sync_minio' ]]; then
            sudo GOCOVERDIR=$(pwd)/cover META=redis .github/scripts/sync/sync_minio.sh 
          elif [[ "${{matrix.type}}" == 'sync_cluster' ]]; then
            wget https://juicefs-com-static.oss-cn-shanghai.aliyuncs.com/random-test/random-test
            chmod +x random-test
            types=("ecdsa"  "ed25519"  "rsa")
            random_type=${types[$RANDOM % ${#types[@]}]}
            sudo CI=true GOCOVERDIR=$(pwd)/cover META=redis KEY_TYPE=$random_type .github/scripts/sync/sync_cluster.sh
          elif [[ "${{matrix.type}}" == 'sync_exclude' ]]; then
            sudo GOCOVERDIR=$(pwd)/cover python3 .github/scripts/hypo/sync_test.py
            sudo GOCOVERDIR=$(pwd)/cover MAX_EXAMPLE=50 STEP_COUNT=30 PROFILE=ci python3 .github/scripts/hypo/sync.py
          else
            echo "Unknown type: ${{matrix.type}}"
            exit 1
          fi

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}          

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [sync]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1
      
      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: upload total coverage report
        timeout-minutes: 30
        continue-on-error: true
        uses: ./.github/actions/upload-total-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch' 
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/unit-random-tests.yml
================================================
name: "unit-random-tests"

on:
  push:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - 'pkg/meta/random_test.go'
      - '**/unit-random-tests.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - 'pkg/meta/random_test.go'
      - '**/unit-random-tests.yml'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron: '0 17 * * *'

jobs:
  unit-random-tests:
    strategy:
      fail-fast: false
      matrix:
        meta: ['redis', 'sqlite3', 'tikv']
    timeout-minutes: 120
    runs-on: ubuntu-22.04
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Test
        timeout-minutes: 60
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          number=10000
          if [[ ${{matrix.meta}} == "tikv" ]]; then
            number=1500
          fi
          make unit-random-test seed=$RANDOM checks=${number} steps=200 meta="$meta_url"

      - name: print failfile content
        if: failure()
        run: |
          pwd
          cat pkg/meta/testdata/rapid/TestFSOps/TestFSOps-*.fail
        continue-on-error: true

      - name: upload coverage report
        if: always()
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }} 

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [unit-random-tests]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run:
          exit 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: upload total coverage report
        timeout-minutes: 30
        continue-on-error: true
        uses: ./.github/actions/upload-total-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch' 
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/unittests.yml
================================================
name: "unittests"

on:
  push:
    branches:
      - 'main'
      - 'release-*'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '**.java'
      - '**/pom.xml'
  pull_request:
    #The branches below must be a subset of the branches above
    branches:
      - 'main'
      - 'release-*'
    paths-ignore:
      - '.autocorrectrc'
      - '.markdownlint-cli2.jsonc'
      - 'package*.json'
      - 'docs/**'
      - '**.md'
      - '.github/**'
      - '!.github/workflows/unittests.yml'
      - '**.java'
      - '**/pom.xml'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron: '0 17 * * *'
jobs:
  unittests:
    strategy:
      fail-fast: false
      matrix:
        test: [ 'test.meta.core','test.meta.non-core','test.pkg','test.cmd', 'test.fdb' ]
    timeout-minutes: 60
    runs-on: ubuntu-22.04
    env:
      MINIO_TEST_BUCKET: 127.0.0.1:9000/testbucket
      MINIO_ACCESS_KEY: testUser
      MINIO_SECRET_KEY: testUserPassword
      GLUSTER_VOLUME: jfstest/gv0
      DISPLAY_PROGRESSBAR: false
      HDFS_ADDR: localhost:8020
      SFTP_HOST: localhost:2222:/home/testUser1/upload/
      SFTP_USER: testUser1
      SFTP_PASS: password
      CIFS_ADDR: localhost:4445/Data
      CIFS_USER: samba
      CIFS_PASSWORD: secret
      WEBDAV_TEST_BUCKET: 127.0.0.1:9007
      TIKV_ADDR: 127.0.0.1
      REDIS_ADDR: redis://127.0.0.1:6379/13
      ETCD_ADDR: 127.0.0.1:3379
      MYSQL_ADDR: (127.0.0.1:3306)/dev
      MYSQL_USER: root
      NFS_ADDR: 127.0.0.1:/srv/nfs/
    steps:

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build

      - name: Install Packages
        run: |
          sudo .github/scripts/apt_install.sh g++-multilib redis-server libacl1-dev attr glusterfs-server libglusterfs-dev nfs-kernel-server
          sudo mkdir -p /home/travis/.m2/
      - if: matrix.test == 'test.pkg'
        name: Set up nfs-server
        run: |
          sudo mkdir -p /srv/nfs
          sudo chown nobody:nogroup /srv/nfs
          sudo chmod 777 /srv/nfs
          echo "/srv/nfs 127.0.0.1(rw,sync,insecure)" | sudo tee -a /etc/exports
          sudo systemctl start nfs-kernel-server.service
          sudo exportfs -arv

      - if: matrix.test == 'test.meta.non-core'
        name: Install redis-cluster
        uses: vishnudxb/redis-cluster@1.0.5
        with:
          master1-port: 7000
          master2-port: 7001
          master3-port: 7002
          slave1-port: 7003
          slave2-port: 7004
          slave3-port: 7005

      - name: Prepare Database
        run: |
          TEST=${{matrix.test}} ./.github/scripts/prepare_db.sh

      - name: Clean up
        run: |
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/local/.ghcup
          sudo docker system prune -af
          sudo df -h

      - name: Unit Test
        timeout-minutes: 30
        run: |
          test=${{matrix.test}}
          make $test

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Code Coverage
        uses: codecov/codecov-action@v3
        with:
          files: cover/cover.txt

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 120
        uses: lhotari/action-upterm@v1


  success-all-test:
    runs-on: ubuntu-latest
    needs: [unittests]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"


================================================
FILE: .github/workflows/vdbench.yml
================================================
name: "vdbench"

on:
  push:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - '**/vdbench.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-*'
    paths:
      - '**/vdbench.yml'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false
  schedule:
    - cron: '0 17 * * *'

jobs:
  vdbench:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        meta: [ 'redis', 'mysql', 'fdb', 'tikv']
        #storage: [ 'local', 'minio', 'cifs' ]
        storage: [ 'cifs' ]
    runs-on: ubuntu-22.04

    steps:        
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Remove unused software
        timeout-minutes: 10
        run: |
          echo "before remove unused software"
          sudo df -h
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf /usr/local/lib/android
          sudo rm -rf /opt/ghc
          echo "after remove unused software"
          sudo df -h

      - name: Build
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}    

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Install tools
        shell: bash
        run: |
          wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc
          chmod +x mc 
          wget -q https://s.juicefs.com/static/bench/vdbench50407.zip
          unzip vdbench50407.zip -d vdbench

      - name: vdbench-long-run
        shell: bash
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          storage='${{matrix.storage}}'
          format_opts='--bucket=/mnt/jfs'

          if [ "$storage" == "minio" ]; then
            start_meta_engine none minio
            ./mc alias set myminio http://127.0.0.1:9000 minioadmin minioadmin
            ./mc mb myminio/vdbench-long-run || true
            format_opts='--storage minio --bucket=http://127.0.0.1:9000/vdbench-long-run --access-key=minioadmin --secret-key=minioadmin'
          elif [ "$storage" == "cifs" ]; then
            SMB_CONTAINER_NAME="juicefs-ci-smb"
            SMB_USER="juicefs"
            SMB_PASSWORD="juicefs"
            SMB_SHARE="share" 
            SMB_DATA_DIR="/mnt/jfs/${SMB_CONTAINER_NAME}-data"
            docker rm -f "$SMB_CONTAINER_NAME" >/dev/null 2>&1 || true
            sudo mkdir -p $SMB_DATA_DIR
            sudo chmod 0777 $SMB_DATA_DIR -R
            docker run -d --name "$SMB_CONTAINER_NAME" \
              -v "$SMB_DATA_DIR":/mount \
              dperson/samba \
              -r \
              -u "$SMB_USER;$SMB_PASSWORD" \
              -s "$SMB_SHARE;/mount;yes;no;no;$SMB_USER" >/dev/null
            container_ip=$(docker container inspect "$SMB_CONTAINER_NAME" --format '{{ .NetworkSettings.IPAddress }}')
            for _ in $(seq 1 40); do
              if (echo > /dev/tcp/${container_ip}/445) >/dev/null 2>&1; then
                break
              fi
              sleep 1
            done
            SMB_ENDPOINT="${container_ip}/${SMB_SHARE}"
            format_opts="--storage cifs --bucket=${SMB_ENDPOINT} --access-key=${SMB_USER} --secret-key=${SMB_PASSWORD}"
          fi

          sudo chmod 777 /mnt
          GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url vdbench-long-run --trash-days 0 $format_opts
          GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report --cache-size 1024 --max-deletes 50
          vdbench/vdbench -f .github/workflows/resources/vdbench_long_run.conf -jn

      - uses: actions/upload-artifact@v4
        with:
          name: output-long-run-${{ matrix.meta }}-${{ matrix.storage }}
          path: output

      - name: check vdbench log
        if: always()
        run: |
          grep -i "java.lang.RuntimeException" output/errorlog.html && exit 1 || true
          if ! grep -q "Vdbench execution completed successfully" output/logfile.html; then
            echo "vdbench not completed succeed"
            exit 1
          fi

      - name: log
        if: always()
        run: | 
          tail -300 ~/.juicefs/juicefs.log
          grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }} 

      - name: Send Slack Notification
        if: failure()
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"          

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/verify.yml
================================================
name: verify

on:
  push:
    branches:
      - main
      - "release-**"
    paths-ignore:
      - ".autocorrectrc"
      - ".markdownlint-cli2.jsonc"
      - "package*.json"
      - "docs/**"
      - "**.md"
      - ".github/**"
      - "!.github/workflows/verify.yml"
  pull_request:
    branches:
      - "main"
      - "release-**"
    paths-ignore:
      - ".autocorrectrc"
      - ".markdownlint-cli2.jsonc"
      - "package*.json"
      - "docs/**"
      - "**.md"
      - ".github/**"
      - "!.github/workflows/verify.yml"
  workflow_dispatch:
  schedule:
    - cron: "0 17 * * 0"

jobs:
  lint:
    runs-on: ubuntu-22.04
    steps:
      - uses: actions/setup-go@v3
        with:
          go-version: "1.23"

      - uses: actions/checkout@v3
      - name: golangci-lint
        uses: golangci/golangci-lint-action@v6

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1
  build:
    strategy:
      fail-fast: false
      matrix:
        version: ["1.21", "1.22", "1.23"]
    runs-on: ubuntu-22.04
    steps:
      - name: Check out code
        uses: actions/checkout@v3

      - name: Set up Go
        uses: actions/setup-go@v3
        with:
          go-version: ${{ matrix.version }}
          cache: true
        id: go

      - name: Install dependencies
        run: |
          sudo .github/scripts/apt_install.sh g++-multilib gcc-mingw-w64

      - name: Go mod tidy
        run: |
          go mod tidy

      - name: Build linux target
        timeout-minutes: 10
        run: |
          make
          ./juicefs version

      - name: build lite
        timeout-minutes: 10
        run: |
          make juicefs.lite
          ./juicefs.lite version

      - name: build windows
        timeout-minutes: 10
        run: make juicefs.exe

      - name: build libjfs.dll
        timeout-minutes: 10
        run: make -C sdk/java/libjfs libjfs.dll

      - name: build ceph
        timeout-minutes: 10
        run: |
          sudo .github/scripts/apt_install.sh librados-dev
          make juicefs.ceph
          ./juicefs.ceph version

      - name: build fdb
        timeout-minutes: 10
        run: |
          wget https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-clients_6.3.23-1_amd64.deb
          sudo dpkg -i foundationdb-clients_6.3.23-1_amd64.deb
          make juicefs.fdb
          ./juicefs.fdb version

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        # if: failure()
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


================================================
FILE: .github/workflows/version_compatible_hypo.yml
================================================
name: "version-compatible-test-hypo"

on:
  push:
    branches: 
      - main
    paths:
      - '**/testVersionCompatible.py'
      - '**/version_compatible_hypo.yml'
  pull_request:
    branches: 
      - main
    paths:
      - '**/testVersionCompatible.py'
      - '**/version_compatible_hypo.yml'
  schedule:
    - cron:  '0 19 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  vc-hypo:
    timeout-minutes: 120
    continue-on-error: false
    strategy:
      fail-fast: false
      matrix:
        old_juicefs_version: ['main', 'release-1.0']
        meta: ['redis', 'mysql', 'tikv']
        storage: ['minio']
        include:
          - old_juicefs_version: 'main'
            meta: 'fdb'
            storage: 'minio'
          - old_juicefs_version: 'release-1.0'
            meta: 'postgres'
            storage: 'minio'

    runs-on: ubuntu-22.04

    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build new version
        timeout-minutes: 10
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Checkout old version
        uses: actions/checkout@v3
        with:
          ref: ${{matrix.old_juicefs_version}}
          path: ${{matrix.old_juicefs_version}}

      - name: Make old build
        timeout-minutes: 10
        run: | 
          cd ${{matrix.old_juicefs_version}}
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "build juicefs.fdb"
            wget -q https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-clients_6.3.23-1_amd64.deb
            sudo dpkg -i foundationdb-clients_6.3.23-1_amd64.deb
            make juicefs.fdb
            mv juicefs.fdb juicefs
          else
            echo "build juicefs"
            make juicefs 
          fi
          cd -

      - name: Prepare meta database
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}} ${{matrix.storage}}
          # meta_url=$(get_meta_url ${{matrix.meta}})
          # create_database $meta_url
          
      - name: Install tools
        run: | 
          wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc
          chmod +x mc
          sudo mv mc /usr/local/bin
          sudo .github/scripts/apt_install.sh redis-tools
          sudo pip install hypothesis
          sudo pip install minio
          sudo pip install xattr
          
      - name: Test
        timeout-minutes: 90
        run: |          
          export META=${{matrix.meta}}
          export STORAGE=${{matrix.storage}}
          new_version=`./juicefs --version | awk -F" " '{print $3}' | awk -F+ '{print $1}'`
          echo new_version is $new_version
          mv juicefs juicefs-$new_version
          export NEW_JFS_BIN="juicefs-$new_version"
          old_version=`${{matrix.old_juicefs_version}}/juicefs --version | awk -F" " '{print $3}' | awk -F+ '{print $1}'`
          echo old_version is $old_version
          mv ${{matrix.old_juicefs_version}}/juicefs juicefs-$old_version
          export OLD_JFS_BIN="juicefs-$old_version"
          timeout 3600 python3 .github/scripts/testVersionCompatible.py 2>&1 | tee result.log || code=$?; if [[ $code -eq 124 ]]; then echo test timeout with $code && exit 0; else echo exit with $code && exit $code; fi
      
      - name: Display result log
        if: always()
        run: | 
          if [ -f "result.log" ]; then
            tail -n 500 result.log
          fi

      - name: Upload command log
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: ${{matrix.meta}}-${{matrix.old_juicefs_version}}.command.log
          path: ~/command.log

      - name: Display log
        if: always()
        shell: bash
        run: | 
          if [ -f "/home/runner/.juicefs/juicefs.log" ]; then
            tail -1000 /home/runner/.juicefs/juicefs.log
            grep "<FATAL>:" /home/runner/.juicefs/juicefs.log && exit 1 || true
          fi

      - name: Display command
        if: always()
        shell: bash
        run: | 
          if [ -f "$HOME/command.log" ]; then
            tail -100 ~/command.log
          fi

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1


  success-all-test:
    runs-on: ubuntu-latest
    needs: [vc-hypo]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success()
        run: echo "All Done"

================================================
FILE: .github/workflows/wintest.yml
================================================
name: "wintest"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/wintest.yml'
      - 'pkg/winfsp/*.go'
      - '**/*_windows.go'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**/wintest.yml'
      - 'pkg/winfsp/*.go'
      - '**/*_windows.go'
  workflow_dispatch:
    inputs:
      debug_enabled:
        type: boolean
        description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)'
        required: false
        default: false
  schedule:
    - cron: '0 17 * * 0'

jobs:
  wintest:
    runs-on: windows-2022
    env:
      Actions_Allow_Unsecure_Commands: true
    steps:
      - name: Set up Go
        uses: actions/setup-go@v3
        with:
          go-version: '1.21'

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: Setup MSBuild.exe
        if: false
        uses: microsoft/setup-msbuild@v1.0.3

      - name: Change Winsdk Version
        if: false
        uses: GuillaumeFalourd/setup-windows10-sdk-action@v1
        with:
          sdk-version: 18362

      - name: Download WinFsp
        run: |
          choco install wget
          mkdir "C:\wfsp\"
          wget -O winfsp.msi https://github.com/winfsp/winfsp/releases/download/v2.0/winfsp-2.0.23075.msi
          copy winfsp.msi "C:\wfsp\"
          choco install 7zip -y

      - name: Install WinFsp
        run: |
          # call start-process to install winfsp.msi
          Start-Process -Wait -FilePath "C:\wfsp\winfsp.msi" -ArgumentList "/quiet /norestart"
          ls "C:\Program Files (x86)\WinFsp"
          ls "C:\Program Files (x86)\WinFsp\bin"

      - name: Set up Include Headers
        run: |
          mkdir "C:\WinFsp\inc\fuse"
          copy .\hack\winfsp_headers\* C:\WinFsp\inc\fuse\
          dir "C:\WinFsp\inc\fuse"
          set CGO_CFLAGS=-IC:/WinFsp/inc/fuse
          go env
          go env -w CGO_CFLAGS=-IC:/WinFsp/inc/fuse
          go env

      - name: Install Scoop
        run: |
          dir "C:\Program Files (x86)\WinFsp"
          Set-ExecutionPolicy RemoteSigned -scope CurrentUser
          iwr -useb 'https://raw.githubusercontent.com/scoopinstaller/install/master/install.ps1' -outfile 'install.ps1'
          .\install.ps1 -RunAsAdmin
          echo $env:USERNAME
          scoop
          $redisUrl = "https://github.com/tporadowski/redis/releases/download/v5.0.14.1/Redis-x64-5.0.14.1.zip"
          $redisRoot = Join-Path $env:USERPROFILE "scoop\apps\redis\current"
          New-Item -ItemType Directory -Force -Path $redisRoot | Out-Null
          Invoke-WebRequest -Uri $redisUrl -OutFile redis.zip
          Expand-Archive -Path redis.zip -DestinationPath $redisRoot -Force
          $redisCli = Join-Path $redisRoot "redis-cli.exe"
          if (-not (Test-Path $redisCli)) {
            throw "redis-cli.exe not found after downloading from $redisUrl"
          }
          $env:Path += ";$redisRoot"
          $redisRoot | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          & $redisCli --version
          scoop install minio@2021-12-10T23-03-39Z
          scoop install runasti

      - name: Download winsw
        run: |
          wget https://github.com/winsw/winsw/releases/download/v2.12.0/WinSW-x64.exe -q --show-progress -O winsw.exe
          ls winsw.exe

      - name: Start Redis
        run: |
          copy winsw.exe redis-service.exe
          $redisExe = Join-Path $env:USERPROFILE "scoop\apps\redis\current\redis-server.exe"
          if (!(Test-Path $redisExe)) {
            throw "redis-server.exe not found: $redisExe"
          }
          @"
          <service>
            <id>redisredis</id>
            <name>redisredis</name>
            <description>redisredis</description>
            <executable>$redisExe</executable>
            <arguments>--bind 127.0.0.1 --port 6379 --save \"\" --appendonly no</arguments>
            <logmode>rotate</logmode>
          </service>
          "@ | Set-Content redis-service.xml -Encoding utf8
          .\redis-service.exe install
          net start redisredis
          Start-Sleep -Seconds 2
          redis-cli -h 127.0.0.1 -p 6379 ping

      - name: Download MinGW
        run: |
          wget https://github.com/niXman/mingw-builds-binaries/releases/download/14.2.0-rt_v12-rev1/x86_64-14.2.0-release-win32-seh-msvcrt-rt_v12-rev1.7z -q --show-progress -O mingw.7z
          7z.exe x mingw.7z -oC:\mingw64
          ls C:\mingw64\bin


      - name: Build Juicefs
        run: |
          $env:CGO_ENABLED=1
          $env:PATH+=";C:\mingw64\bin"
          go build -ldflags="-s -w" -o juicefs.exe .
      
      - name: Install Python2
        run: |
          choco install python2 -y

      - name: Juicefs Format
        run: |
          ./juicefs.exe format redis://127.0.0.1:6379/1 myjfs

      - name: Juicefs Mount
        run: |
          $env:PATH+=";C:\Program Files (x86)\WinFsp\bin"
          ./juicefs.exe mount -d redis://127.0.0.1:6379/1 z: --fuse-trace-log c:/fuse.log

      - name: Run Winfsp Tests
        run: |
          wget https://github.com/juicedata/winfsp/releases/download/testing_suit_20250324/winfsp-tests-x64.exe -q --show-progress -O "C:\Program Files (x86)\WinFsp\bin\winfsp-tests-x64.exe"
          ls "C:\Program Files (x86)\WinFsp\bin\winfsp-tests-x64.exe"
          cd Z:
          & "C:\Program Files (x86)\WinFsp\bin\winfsp-tests-x64.exe" --fuse-external --resilient --case-insensitive-cmp
      
      - name: Run winfstest
        run: |
          wget https://github.com/juicedata/winfstest/releases/download/testing_20250313/TestSuite-x64-v4.zip -q --show-progress -O Z:\TestSuite-x64.zip
          ls Z:\TestSuite-x64.zip
          cd Z:\
          Expand-Archive -Path .\TestSuite-x64.zip -DestinationPath .\TestSuite
          ls Z:\TestSuite
          cd Z:\TestSuite\TestSuite
          ./run-winfstest.ps1

      - name: Run FSX Test
        run: |
          cd Z:\
          wget https://github.com/chenjie4255/fstools/releases/download/v0.0.1/fsx-x64.exe -q --show-progress -O fsx.exe
          ls fsx.exe
          ./fsx.exe -d 180 -p 10000 -F 100000 fsxtest

      - name: Run basic subcommand tests
        run: |
          echo hi > Z:\hi.txt
          ./juicefs.exe info Z:\hi.txt
          ./juicefs.exe status redis://127.0.0.1:6379/1
          ./juicefs.exe debug Z:\
          New-Item -Path 'Z:\summary' -ItemType Directory
          echo hi > Z:\summary\1.txt
          echo hi > Z:\summary\2.txt
          ./juicefs.exe summary Z:\summary
          ./juicefs.exe info Z:\summary\1.txt
          ./juicefs.exe stats Z: -c 5
      - name: Setup tmate session
        if: ${{ failure() && github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
        uses: mxschmitt/action-tmate@v3


================================================
FILE: .github/workflows/xattr.yml
================================================
name: "xattr"

on:
  push:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**.go'
      - '**.c'
      - '**/xattr.yml'
  pull_request:
    branches:
      - 'main'
      - 'release-**'
    paths:
      - '**.go'
      - '**.c'
      - '**/xattr.yml'
  schedule:
    - cron:  '0 17 * * *'
  workflow_dispatch:
    inputs:
      debug:
        type: boolean
        description: "Run the build with tmate debugging enabled"
        required: false
        default: false

jobs:
  build-matrix:
    runs-on: ubuntu-22.04
    steps:
      - id: set-matrix
        run: |
          echo "github.event_name is ${{github.event_name}}"
          if [[ "${{github.event_name}}" == "schedule" || "${{github.event_name}}" == "workflow_dispatch" ]]; then
            echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "badger", "postgres", "mariadb", "fdb"]' >> $GITHUB_OUTPUT
          elif [[ "${{github.event_name}}" == "pull_request" || "${{github.event_name}}" == "push"  ]]; then
            echo 'meta_matrix=["redis"]' >> $GITHUB_OUTPUT
          else
            echo 'event name is not supported' && exit 1
          fi
    outputs:
      meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }}

  xattr:
    needs: build-matrix
    strategy:
      fail-fast: false
      matrix:
        meta: ${{ fromJson(needs.build-matrix.outputs.meta_matrix) }}
    runs-on: ubuntu-22.04

    steps:
      - uses: shogo82148/actions-setup-perl@v1
        with:
          perl-version: '5.34'

      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 1

      - name: mount coverage dir
        timeout-minutes: 5
        uses: ./.github/actions/mount-coverage-dir
        with:
          mount_point: cover
          access_key: ${{ secrets.CI_COVERAGE_AWS_AK }}
          secret_key: ${{ secrets.CI_COVERAGE_AWS_SK }}
          token: ${{ secrets.CI_COVERAGE_AWS_TOKEN }}

      - name: Set Variable
        id: vars
        run: |
          if [ "${{matrix.meta}}" == "fdb" ]; then
            echo "target=juicefs.fdb" >> $GITHUB_OUTPUT
          else
            echo "target=juicefs" >> $GITHUB_OUTPUT
          fi

      - name: Build
        uses: ./.github/actions/build
        with: 
          target: ${{steps.vars.outputs.target}}

      - name: Prepare meta db
        run: | 
          chmod +x .github/scripts/start_meta_engine.sh
          source .github/scripts/start_meta_engine.sh
          start_meta_engine ${{matrix.meta}}
          meta_url=$(get_meta_url ${{matrix.meta}})
          create_database $meta_url

      - name: Juicefs Format
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs format $meta_url --trash-days 0 --bucket=/mnt/jfs pics

      - name: Juicefs Mount
        run: |
          source .github/scripts/start_meta_engine.sh
          meta_url=$(get_meta_url ${{matrix.meta}})
          # sudo mkdir /var/jfs
          # sudo chmod 777 /var/jfs
          sudo GOCOVERDIR=$(pwd)/cover ./juicefs mount -d $meta_url /tmp/jfs --no-usage-report --enable-xattr
          stat /tmp/jfs/.accesslog

      - name: Test
        run: |
          git clone https://github.com/iustin/pyxattr.git
          cd pyxattr
          pip3 install pytest
          pip3 install pyxattr
          stat /tmp/jfs/
          if [[ "${{matrix.meta}}" == "tikv" || "${{matrix.meta}}" == "badger" ]]; then
            TEST_DIR=/tmp/jfs/ python3 -m pytest tests -k "not test_empty_value"
          else
            TEST_DIR=/tmp/jfs/ python3 -m pytest tests
          fi
          
      - name: log
        if: always()
        run: | 
          if [ -f /var/log/juicefs.log ]; then
            tail -300 /var/log/juicefs.log
            grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true
          fi

      - name: upload coverage report
        timeout-minutes: 5
        continue-on-error: true
        uses: ./.github/actions/upload-coverage
        with:
          UPLOAD_TOKEN: ${{ secrets.CI_COVERAGE_FILE_UPLOAD_AUTH_TOKEN }}

      - name: Setup upterm session
        if: failure() && (github.event.inputs.debug == 'true' || github.run_attempt != 1)
        timeout-minutes: 60
        uses: lhotari/action-upterm@v1

  success-all-test:
    runs-on: ubuntu-latest
    needs: [xattr]
    if: always()
    steps:
      - uses: technote-space/workflow-conclusion-action@v3
      - uses: actions/checkout@v3

      - name: Check Failure
        if: env.WORKFLOW_CONCLUSION == 'failure'
        run: exit 1

      - name: Send Slack Notification
        if: failure() && github.event_name != 'workflow_dispatch'
        uses: juicedata/slack-notify-action@main
        with:
          channel-id: "${{ secrets.SLACK_CHANNEL_ID_FOR_PR_CHECK_NOTIFY }}"
          slack_bot_token: "${{ secrets.SLACK_BOT_TOKEN }}"

      - name: Success
        if: success() 
        run: echo "All Done"

================================================
FILE: .gitignore
================================================
*.o
*.sw[po]
ltmain.sh
*.orig
*.rej
.deps
.dirstamp
jfs
*.rdb
.release-env
*.so
libjfs.h
docs/node_modules
cmd/cmd
.hypothesis
/node_modules

# os
.DS_Store

# ide
.vscode
.idea

# lang
__pycache__

# temp
pkg/meta/badger
pkg/meta/testdata
*.dump
*.out

# gen
/juicefs
/juicefs.ceph
/juicefs.exe
/juicefsd.exe
/juicefs.exe~
/juicefsd.exe~
/juicefs.lite
dist/


================================================
FILE: .golangci.yml
================================================
run:
  timeout: 5m
  tests: false


================================================
FILE: .goreleaser.yml
================================================
project_name: juicefs
env:
  - GO111MODULE=on
  - CGO_ENABLED=1
  - REVISIONDATE={{ .Env.REVISIONDATE }}
before:
  hooks:
    - go mod download
builds:
  - id: juicefs-windows-amd64
    hooks:
      pre:
        - sh -c 'mkdir -p /usr/local/include/winfsp && cp hack/winfsp_headers/* /usr/local/include/winfsp'
    env:
      - CC=x86_64-w64-mingw32-gcc
      - CXX=x86_64-w64-mingw32-g++
    ldflags: -s -w -X github.com/juicedata/juicefs/pkg/version.version={{.Version}} -X github.com/juicedata/juicefs/pkg/version.revision={{.ShortCommit}} -X github.com/juicedata/juicefs/pkg/version.revisionDate={{.Env.REVISIONDATE}}
    flags:
      - -buildmode
      - exe
    main: .
    goos:
      - windows
    goarch:
      - amd64
  - id: juicefs-darwin-amd64
    env:
      - CC=o64-clang
      - CXX=o64-clang++
    ldflags: -s -w -X github.com/juicedata/juicefs/pkg/version.version={{.Version}} -X github.com/juicedata/juicefs/pkg/version.revision={{.ShortCommit}} -X github.com/juicedata/juicefs/pkg/version.revisionDate={{.Env.REVISIONDATE}}
    main: .
    goos:
      - darwin
    goarch:
      - amd64
  - id: juicefs-darwin-arm64
    env:
      - CC=oa64-clang
      - CXX=oa64-clang++
    ldflags: -s -w -X github.com/juicedata/juicefs/pkg/version.version={{.Version}} -X github.com/juicedata/juicefs/pkg/version.revision={{.ShortCommit}} -X github.com/juicedata/juicefs/pkg/version.revisionDate={{.Env.REVISIONDATE}}
    main: .
    goos:
      - darwin
    goarch:
      - arm64
  - id: juicefs-linux-amd64
    env:
      - CC=/usr/bin/musl-gcc
    ldflags: -s -w -X github.com/juicedata/juicefs/pkg/version.version={{.Version}} -X github.com/juicedata/juicefs/pkg/version.revision={{.ShortCommit}} -X github.com/juicedata/juicefs/pkg/version.revisionDate={{.Env.REVISIONDATE}} -linkmode external -extldflags '-static'
    main: .
    goos:
      - linux
    goarch:
      - amd64
  - id: juicefs-linux-arm64
    env:
      - CC=/usr/local/aarch64-linux-musl-cross/bin/aarch64-linux-musl-cc
    ldflags: -s -w -X github.com/juicedata/juicefs/pkg/version.version={{.Version}} -X github.com/juicedata/juicefs/pkg/version.revision={{.ShortCommit}} -X github.com/juicedata/juicefs/pkg/version.revisionDate={{.Env.REVISIONDATE}} -linkmode external -extldflags '-static'
    main: .
    goos:
      - linux
    goarch:
      - arm64
checksum:
  name_template: 'checksums.txt'
snapshot:
  name_template: "{{ .Tag }}-next"
changelog:
  sort: asc
  filters:
    exclude:
      - '^docs:'
      - '^test:'
archives:
  - name_template: "{{ .ProjectName }}-{{ .Version }}-{{ .Os }}-{{ .Arch }}"


================================================
FILE: .markdownlint-cli2.jsonc
================================================
{
  "customRules": [
    "markdownlint-rule-enhanced-proper-names/src/enhanced-proper-names.js",
    "markdownlint-rule-no-trailing-slash-in-links/src/no-trailing-slash-in-links.js"
  ],
  "config": {
    "default": true,
    "first-heading-h1": false,
    "heading-style": {
      "style": "atx"
    },
    "ul-style": false,
    "link-image-style": {
      "autolink": false
    },
    "no-hard-tabs": {
      "spaces_per_tab": 4
    },
    "line-length": false,
    "no-duplicate-heading": {
      "siblings_only": true
    },
    "no-inline-html": {
      "allowed_elements": [
        "Badge",
        "TabItem",
        "Tabs",
        "a",
        "br",
        "div",
        "img",
        "li",
        "ul",
        "kbd",
        "p",
        "span",
        "sup",
        "iframe",
        "VersionAdd"
      ]
    },
    "fenced-code-language": false,
    "first-line-heading": false,
    "no-alt-text": true,
    "code-block-style": {
      "style": "fenced"
    },
    "code-fence-style": {
      "style": "backtick"
    },
    "link-fragments": false,
    "no-trailing-slash-in-links": true,
    "enhanced-proper-names": {
      "code_blocks": false,
      "html_elements": false,
      "heading_id": false,
      "names": [
        "ACL",
        "AI",
        "API",
        "ARM",
        "ARM64",
        "AWS",
        "Amazon",
        "Ansible",
        "Apache",
        "Azure",
        "BSD",
        "BadgerDB",
        "CDH",
        "CPU",
        "CSI Driver",
        "CSI",
        "CentOS",
        "Ceph",
        "CephFS",
        "ClickHouse",
        "Cloud SQL",
        "Colab",
        "Consul",
        "Debian",
        "DevOps",
        "DigitalOcean",
        "DistCp",
        "Docker Compose",
        "Docker",
        "Dockerfile",
        "Doris",
        "ECI",
        "Elasticsearch",
        "FTP",
        "FUSE",
        "Flink",
        "Fluid",
        "FoundationDB",
        "GCC",
        "GID",
        "Git",
        "GitHub",
        "Google",
        "Grafana",
        "Graphite",
        "HBase",
        "HDFS",
        "HDP",
        "HTTP",
        "HTTPS",
        "Hadoop",
        "Hive Metastore",
        "Hive",
        "Hudi",
        "IAM",
        "ID",
        "IOPS",
        "IP",
        "Iceberg",
        "JAR",
        "JDK",
        "JSON",
        "Java",
        "JuiceFS",
        "JuiceFSRuntime",
        "Juicedata",
        "K3s",
        "K8s",
        "Kerberos",
        "KeyDB",
        "KubeSphere",
        "Kubernetes",
        "LDAP",
        "LZ4",
        "Linux",
        "M1",
        "MariaDB",
        "Maven",
        "MinIO",
        "MySQL",
        "NFS",
        "NGINX",
        "POSIX",
        "PV",
        "PVC",
        "PostgreSQL",
        "PowerShell",
        "Prometheus",
        "Pushgateway",
        "Python",
        "QPS",
        "QoS",
        "RADOS",
        "RESTful",
        "RGW",
        "RPC",
        "Raft",
        "Rancher",
        "Ranger",
        "Redis",
        "S3",
        "S3QL",
        "SDK",
        "SFTP",
        "SID",
        "SMB",
        "SQL",
        "SQLite",
        "SSH",
        "Samba",
        "Scala",
        "Spark",
        "StarRocks",
        "ThriftServer",
        "TiKV",
        "Trino",
        "UID",
        "UUID",
        "Ubuntu",
        "Unix",
        "VFS",
        "WSL",
        "WebDAV",
        "WinFsp",
        "Windows",
        "YAML",
        "YARN",
        "Zstandard",
        "etcd",
        "macFUSE",
        "macOS"
      ]
    }
  }
}


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v2.3.0
    hooks:
      - id: check-yaml
        args: [--allow-multiple-documents]
      - id: end-of-file-fixer
      - id: trailing-whitespace
  - repo: https://github.com/golangci/golangci-lint
    rev: v1.52.2
    hooks:
      - id: golangci-lint


================================================
FILE: ADOPTERS.md
================================================
# JuiceFS Adopters

Please visit [JuiceFS Official Documentation](https://juicefs.com/docs/community/adopters) for details.


================================================
FILE: ADOPTERS_CN.md
================================================
# JuiceFS 使用者

请访问 [JuiceFS 官方文档](https://juicefs.com/docs/zh/community/adopters)了解详情。


================================================
FILE: CODEOWNERS
================================================
/docs/ @CaitinChen


================================================
FILE: CODE_OF_CONDUCT.md
================================================

# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
  community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or advances of
  any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
  without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at team@juicedata.io.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series of
actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within the
community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].

Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].

For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].

[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to JuiceFS

## Guidelines

- Before starting work on a feature or bug fix, please search GitHub or reach out to us via GitHub, Slack etc. The purpose of this step is make sure no one else is already working on it and we'll ask you to open a GitHub issue if necessary.
- We will use the GitHub issue to discuss the feature and come to agreement. This is to prevent your time being wasted, as well as ours.
- If it is a major feature update, we highly recommend you also write a design document to help the community understand your motivation and solution.
- A good way to find a project properly sized for a first time contributor is to search for open issues with the label ["kind/good-first-issue"](https://github.com/juicedata/juicefs/labels/kind%2Fgood-first-issue) or ["kind/help-wanted"](https://github.com/juicedata/juicefs/labels/kind%2Fhelp-wanted).

## Coding Style

- We're following ["Effective Go"](https://go.dev/doc/effective_go) and ["Go Code Review Comments"](https://github.com/golang/go/wiki/CodeReviewComments).
- Use `go fmt` to format your code before committing. You can find information in editor support for Go tools in ["IDEs and Plugins for Go"](https://github.com/golang/go/wiki/IDEsAndTextEditorPlugins).
- If you see any code which clearly violates the style guide, please fix it and send a pull request.
- Every new source file must begin with a license header.
- Install [pre-commit](https://pre-commit.com) and use it to set up a pre-commit hook for static analysis. Just run `pre-commit install` in the root of the repo.

## Sign the CLA

Before you can contribute to JuiceFS, you will need to sign the [Contributor License Agreement](https://cla-assistant.io/juicedata/juicefs). There're a CLA assistant to guide you when you first time submit a pull request.

## What is a Good PR

- Presence of unit tests
- Adherence to the coding style
- Adequate in-line comments
- Explanatory commit message

## Contribution Flow

This is a rough outline of what a contributor's workflow looks like:

1. Create a topic branch from where to base the contribution. This is usually `main`.
1. Make commits of logical units.
1. Make sure commit messages are in the proper format.
1. Push changes in a topic branch to a personal fork of the repository.
1. Submit a pull request to [juicedata/juicefs](https://github.com/juicedata/juicefs/compare). The PR should link to one issue which either created by you or others.
1. The PR must receive approval from at least one maintainer before it be merged.

Happy hacking!


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: Makefile
================================================
export GO111MODULE=on

all: juicefs

REVISION := $(shell git rev-parse --short HEAD 2>/dev/null)
REVISIONDATE := $(shell git log -1 --pretty=format:'%cd' --date short 2>/dev/null)
PKG := github.com/juicedata/juicefs/pkg/version
GCFLAGS =
BUILD ?= release
ifneq ($(strip $(REVISION)),) # Use git clone
	LDFLAGS += -X $(PKG).revision=$(REVISION) \
		   -X $(PKG).revisionDate=$(REVISIONDATE)
endif

ifeq ($(BUILD),release)
	LDFLAGS += -s -w
else ifeq ($(BUILD),debug)
	GCFLAGS := all=-N -l
endif

SHELL = /bin/sh

ifdef STATIC
	LDFLAGS += -linkmode external -extldflags '-static'
	CC = /usr/bin/musl-gcc
	export CC
endif

juicefs: Makefile cmd/*.go pkg/*/*.go go.*
	go version
	go build -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs .

juicefs.cover: Makefile cmd/*.go pkg/*/*.go go.*
	go version
	go build -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -cover -o juicefs .

juicefs.lite: Makefile cmd/*.go pkg/*/*.go
	go build -tags nogateway,nowebdav,nocos,nobos,nohdfs,noibmcos,noobs,nooss,noqingstor,nosftp,noswift,noazure,nogs,noufile,nob2,nonfs,nodragonfly,nosqlite,nomysql,nopg,notikv,nobadger,noetcd,nocifs \
		-gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs.lite .

juicefs.ceph: Makefile cmd/*.go pkg/*/*.go
	go build -tags ceph -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs.ceph .

juicefs.fdb: Makefile cmd/*.go pkg/*/*.go
	go build -tags fdb -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs.fdb .

juicefs.fdb.cover: Makefile cmd/*.go pkg/*/*.go
	go build -tags fdb -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -cover -o juicefs.fdb .

juicefs.gluster: Makefile cmd/*.go pkg/*/*.go
	go build -tags gluster -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs.gluster .

juicefs.gluster.cover: Makefile cmd/*.go pkg/*/*.go
	go build -tags gluster -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -cover -o juicefs.gluster .

juicefs.all: Makefile cmd/*.go pkg/*/*.go
	go build -tags ceph,fdb,gluster -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs.all .

# This is cross-compiling LoongArch in a Linux environment on x86_64 (amd64) or aarch64 (arm64) architecture.
# 1. Install LoongArch64 cross-compile toolchain from https://github.com/loong64/cross-tools
# 2. Set CC to your toolchain path.
# 3. Run `STATIC=1 make juicefs.loongarch` to build the LoongArch binary.
juicefs.loongarch: Makefile cmd/*.go pkg/*/*.go go.*
	CC=bin/loongarch64-unknown-linux-musl-cc CGO_ENABLED=1 GOARCH=loong64 go build -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -o juicefs .

# This is the script for compiling the Linux version on the MacOS platform.
# Please execute the `brew install FiloSottile/musl-cross/musl-cross` command before using it.
juicefs.linux:
	CGO_ENABLED=1 GOOS=linux GOARCH=amd64 CC=x86_64-linux-musl-gcc CGO_LDFLAGS="-static" go build -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)"  -o juicefs .

/usr/local/include/winfsp:
	sudo mkdir -p /usr/local/include/winfsp
	sudo cp hack/winfsp_headers/* /usr/local/include/winfsp

# This is the script for compiling the Windows version on the MacOS platform.
# Please execute the `brew install mingw-w64` command before using it.
juicefs.exe: /usr/local/include/winfsp cmd/*.go pkg/*/*.go
	GOOS=windows CGO_ENABLED=1 CC=x86_64-w64-mingw32-gcc \
	     go build -gcflags="$(GCFLAGS)" -ldflags="$(LDFLAGS)" -buildmode exe -o juicefs.exe .

# This is the script for compiling the Windows version on Windows platform.
# Please ensure mingw64 is in PATH and WinFsp SDK is installed at C:/WinFsp
_juicefs.exe:
	powershell -Command "$$env:PATH+=';C:\mingw64\bin'; $$env:CGO_ENABLED='1'; $$env:CGO_CFLAGS='-IC:/WinFsp/inc/fuse'; go build -ldflags='-s -w' -o juicefs.exe ."

.PHONY: snapshot release debug test
snapshot:
	docker run --rm --privileged \
		-e REVISIONDATE=$(REVISIONDATE) \
		-e PRIVATE_KEY=${PRIVATE_KEY} \
		-v ~/go/pkg/mod:/go/pkg/mod \
		-v `pwd`:/go/src/github.com/juicedata/juicefs \
		-v /var/run/docker.sock:/var/run/docker.sock \
		-w /go/src/github.com/juicedata/juicefs \
		juicedata/golang-cross:latest release --snapshot --rm-dist --skip-publish

release:
	docker run --rm --privileged \
		-e REVISIONDATE=$(REVISIONDATE) \
		-e PRIVATE_KEY=${PRIVATE_KEY} \
		--env-file .release-env \
		-v ~/go/pkg/mod:/go/pkg/mod \
		-v `pwd`:/go/src/github.com/juicedata/juicefs \
		-v /var/run/docker.sock:/var/run/docker.sock \
		-w /go/src/github.com/juicedata/juicefs \
		juicedata/golang-cross:latest release --rm-dist

debug:
	$(MAKE) BUILD=debug all

test.meta.core:
	SKIP_NON_CORE=true go test -v -cover -count=1  -failfast -timeout=12m ./pkg/meta/... -args -test.gocoverdir="$(shell realpath cover/)"

test.meta.non-core:
	go test -v -cover -run='TestRedisCluster|TestPostgreSQLClient|TestLoadDumpSlow|TestEtcdClient|TestKeyDB' -count=1  -failfast -timeout=12m ./pkg/meta/... -args -test.gocoverdir="$(shell realpath cover/)"

test.pkg:
	go test -tags gluster -v -cover -count=1  -failfast -timeout=12m $$(go list ./pkg/... | grep -v /meta) -args -test.gocoverdir="$(shell realpath cover/)"

test.cmd:
	sudo JFS_GC_SKIPPEDTIME=1 MINIO_ACCESS_KEY=testUser MINIO_SECRET_KEY=testUserPassword GOMAXPROCS=8 go test -v -count=1 -failfast -cover -timeout=8m ./cmd/... -coverpkg=./pkg/...,./cmd/... -args -test.gocoverdir="$(shell realpath cover/)"

test.fdb:
	go test -v -cover -count=1  -failfast -timeout=4m ./pkg/meta/ -tags fdb -run=TestFdb -args -test.gocoverdir="$(shell realpath cover/)"

unit-random-test:
	echo "Using meta:$(meta), seed: $(seed), checks:${checks}, steps: $(steps)"
	go test ./pkg/meta/... -rapid.meta="$(meta)" -rapid.seed=$(seed) -rapid.checks=$(checks) -rapid.steps=$(steps) -run "TestFSOps" -v -failfast -count=1 -timeout=60m -cover -coverpkg=./pkg/... -args -test.gocoverdir="$(shell realpath cover/)"


================================================
FILE: README.md
================================================
<p align="center"><a href="https://github.com/juicedata/juicefs"><img alt="JuiceFS Logo" src="docs/en/images/juicefs-logo-new.svg" width="50%" /></a></p>
<p align="center">
    <a href="https://github.com/juicedata/juicefs/releases/latest"><img alt="Latest Stable Release" src="https://img.shields.io/github/v/release/juicedata/juicefs" /></a>
    <a href="https://github.com/juicedata/juicefs/actions/workflows/unittests.yml"><img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/juicedata/juicefs/unittests.yml?branch=main&label=Unit%20Testing" /></a>
    <a href="https://github.com/juicedata/juicefs/actions/workflows/integrationtests.yml"><img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/juicedata/juicefs/integrationtests.yml?branch=main&label=Integration%20Testing" /></a>
    <a href="https://goreportcard.com/report/github.com/juicedata/juicefs"><img alt="Go Report" src="https://goreportcard.com/badge/github.com/juicedata/juicefs" /></a>
    <a href="https://juicefs.com/docs/community/introduction"><img alt="English doc" src="https://img.shields.io/badge/docs-Doc%20Center-brightgreen" /></a>
    <a href="https://go.juicefs.com/slack"><img alt="Join Slack" src="https://badgen.net/badge/Slack/Join%20JuiceFS/0abd59?icon=slack" /></a>
</p>

**JuiceFS** is a high-performance [POSIX](https://en.wikipedia.org/wiki/POSIX) file system released under Apache License 2.0, particularly designed for the cloud-native environment. The data, stored via JuiceFS, will be persisted in Object Storage _(e.g. Amazon S3)_, and the corresponding metadata can be persisted in various compatible database engines such as Redis, MySQL, and TiKV based on the scenarios and requirements.

With JuiceFS, massive cloud storage can be directly connected to big data, machine learning, artificial intelligence, and various application platforms in production environments. Without modifying code, the massive cloud storage can be used as efficiently as local storage.

📖 **Document**: [Quick Start Guide](https://juicefs.com/docs/community/quick_start_guide)

## Highlighted Features

1. **Fully POSIX-compatible**: Use as a local file system, seamlessly docking with existing applications without breaking business workflow.
2. **Fully Hadoop-compatible**: JuiceFS' [Hadoop Java SDK](https://juicefs.com/docs/community/hadoop_java_sdk) is compatible with Hadoop 2.x and Hadoop 3.x as well as a variety of components in the Hadoop ecosystems.
3. **S3-compatible**:  JuiceFS' [S3 Gateway](https://juicefs.com/docs/community/s3_gateway) provides an S3-compatible interface.
4. **Cloud Native**: A [Kubernetes CSI Driver](https://juicefs.com/docs/community/how_to_use_on_kubernetes) is provided for easily using JuiceFS in Kubernetes.
5. **Shareable**: JuiceFS is a shared file storage that can be read and written by thousands of clients.
6. **Strong Consistency**: The confirmed modification will be immediately visible on all the servers mounted with the same file system.
7. **Outstanding Performance**: The latency can be as low as a few milliseconds, and the throughput can be expanded nearly unlimitedly _(depending on the size of the Object Storage)_. [Test results](https://juicefs.com/docs/community/benchmark)
8. **Data Encryption**: Supports data encryption in transit and at rest (please refer to [the guide](https://juicefs.com/docs/community/security/encrypt) for more information).
9. **Global File Locks**: JuiceFS supports both BSD locks (flock) and POSIX record locks (fcntl).
10. **Data Compression**: JuiceFS supports [LZ4](https://lz4.github.io/lz4) or [Zstandard](https://facebook.github.io/zstd) to compress all your data.

---

[Architecture](#architecture) | [Getting Started](#getting-started) | [Advanced Topics](#advanced-topics) | [POSIX Compatibility](#posix-compatibility) | [Performance Benchmark](#performance-benchmark) | [Supported Object Storage](#supported-object-storage) | [Who is using](#who-is-using) | [Roadmap](#roadmap) | [Reporting Issues](#reporting-issues) | [Contributing](#contributing) | [Community](#community) | [Usage Tracking](#usage-tracking) | [License](#license) | [Credits](#credits) | [FAQ](#faq)

---

## Architecture

JuiceFS consists of three parts:

1. **JuiceFS Client**: Coordinates Object Storage and metadata storage engine as well as implementation of file system interfaces such as POSIX, Hadoop, Kubernetes, and S3 gateway.
2. **Data Storage**: Stores data, with supports of a variety of data storage media, e.g., local disk, public or private cloud Object Storage, and HDFS.
3. **Metadata Engine**: Stores the corresponding metadata that contains information of file name, file size, permission group, creation and modification time and directory structure, etc., with supports of different metadata engines, e.g., Redis, MySQL, SQLite and TiKV.

![JuiceFS Architecture](docs/en/images/juicefs-arch-new.png)

JuiceFS can store the metadata of file system on different metadata engines, like Redis, which is a fast, open-source, in-memory key-value data storage, particularly suitable for storing metadata; meanwhile, all the data will be stored in Object Storage through JuiceFS client. [Learn more](https://juicefs.com/docs/community/architecture)

![data-structure-diagram](docs/en/images/data-structure-diagram.svg)

Each file stored in JuiceFS is split into **"Chunk"** s at a fixed size with the default upper limit of 64 MiB. Each Chunk is composed of one or more **"Slice"**(s), and the length of the slice varies depending on how the file is written. Each slice is composed of size-fixed **"Block"** s, which are 4 MiB by default. These blocks will be stored in Object Storage in the end; at the same time, the metadata information of the file and its Chunks, Slices, and Blocks will be stored in metadata engines via JuiceFS. [Learn more](https://juicefs.com/docs/community/architecture/#how-juicefs-store-files)

![How JuiceFS stores your files](docs/en/images/how-juicefs-stores-files.svg)

When using JuiceFS, files will eventually be split into Chunks, Slices and Blocks and stored in Object Storage. Therefore, the source files stored in JuiceFS cannot be found in the file browser of the Object Storage platform; instead, there are only a chunks directory and a bunch of digitally numbered directories and files in the bucket. Don't panic! This is just the secret of the high-performance operation of JuiceFS!

## Getting Started

Before you begin, make sure you have:

1. One supported metadata engine, see [How to Set Up Metadata Engine](https://juicefs.com/docs/community/databases_for_metadata)
2. One supported Object Storage for storing data blocks, see [Supported Object Storage](https://juicefs.com/docs/community/how_to_setup_object_storage)
3. [JuiceFS Client](https://juicefs.com/docs/community/installation) downloaded and installed

Please refer to [Quick Start Guide](https://juicefs.com/docs/community/quick_start_guide) to start using JuiceFS right away!

### Command Reference

Check out all the command line options in [command reference](https://juicefs.com/docs/community/command_reference).

### Containers

JuiceFS can be used as a persistent volume for Docker and Podman, please check [here](https://juicefs.com/docs/community/juicefs_on_docker) for details.

### Kubernetes

It is also very easy to use JuiceFS on Kubernetes. Please find more information [here](https://juicefs.com/docs/community/how_to_use_on_kubernetes).

### Hadoop Java SDK

If you wanna use JuiceFS in Hadoop, check [Hadoop Java SDK](https://juicefs.com/docs/community/hadoop_java_sdk).

## Advanced Topics

- [Redis Best Practices](https://juicefs.com/docs/community/redis_best_practices)
- [How to Setup Object Storage](https://juicefs.com/docs/community/how_to_setup_object_storage)
- [Cache](https://juicefs.com/docs/community/cache)
- [Fault Diagnosis and Analysis](https://juicefs.com/docs/community/fault_diagnosis_and_analysis)
- [FUSE Mount Options](https://juicefs.com/docs/community/fuse_mount_options)
- [Using JuiceFS on Windows](https://juicefs.com/docs/community/installation#windows)
- [S3 Gateway](https://juicefs.com/docs/community/s3_gateway)

Please refer to [JuiceFS Document Center](https://juicefs.com/docs/community/introduction) for more information.

## POSIX Compatibility

JuiceFS has passed all of the compatibility tests (8813 in total) in the latest [pjdfstest](https://github.com/pjd/pjdfstest) .

```
All tests successful.

Test Summary Report
-------------------
/root/soft/pjdfstest/tests/chown/00.t          (Wstat: 0 Tests: 1323 Failed: 0)
  TODO passed:   693, 697, 708-709, 714-715, 729, 733
Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr  0.38 sys +  2.57 cusr  3.93 csys =  9.65 CPU)
Result: PASS
```

Aside from the POSIX features covered by pjdfstest, JuiceFS also provides:

- **Close-to-open consistency**. Once a file is written _and_ closed, it is guaranteed to view the written data in the following opens and reads from any client. Within the same mount point, all the written data can be read immediately.
- Rename and all other metadata operations are atomic, which are guaranteed by supported metadata engine transaction.
- Opened files remain accessible after unlink from same mount point.
- Mmap (tested with FSx).
- Fallocate with punch hole support.
- Extended attributes (xattr).
- BSD locks (flock).
- POSIX record locks (fcntl).

## Performance Benchmark

### Basic benchmark

JuiceFS provides a subcommand that can run a few basic benchmarks to help you understand how it works in your environment:

![JuiceFS Bench](docs/en/images/juicefs-bench.png)

### Throughput

A sequential read/write benchmark has also been performed on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [fio](https://github.com/axboe/fio).

![Sequential Read Write Benchmark](docs/en/images/sequential-read-write-benchmark.svg)

Above result figure shows that JuiceFS can provide 10X more throughput than the other two (see [more details](https://juicefs.com/docs/community/fio)).

### Metadata IOPS

A simple mdtest benchmark has been performed on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [mdtest](https://github.com/hpc/ior).

![Metadata Benchmark](docs/en/images/metadata-benchmark.svg)

The result shows that JuiceFS can provide significantly more metadata IOPS than the other two (see [more details](https://juicefs.com/docs/community/mdtest)).

### Analyze performance

See [Real-Time Performance Monitoring](https://juicefs.com/docs/community/fault_diagnosis_and_analysis#performance-monitor) if you encountered performance issues.

## Supported Object Storage

- Amazon S3 _(and other S3 compatible Object Storage services)_
- Google Cloud Storage
- Azure Blob Storage
- Alibaba Cloud Object Storage Service (OSS)
- Tencent Cloud Object Storage (COS)
- Qiniu Cloud Object Storage (Kodo)
- QingStor Object Storage
- Ceph RGW
- MinIO
- Local disk
- Redis
- ...

JuiceFS supports numerous Object Storage services. [Learn more](https://juicefs.com/docs/community/how_to_setup_object_storage#supported-object-storage).

## Who is using

JuiceFS is production ready and used by thousands of machines in production. A list of users has been assembled and documented [here](https://juicefs.com/docs/community/adopters). In addition JuiceFS has several collaborative projects that integrate with other open source projects, which we have documented [here](https://juicefs.com/docs/community/integrations). If you are also using JuiceFS, please feel free to let us know, and you are welcome to share your specific experience with everyone.

The storage format is stable, and will be supported by all future releases.

## Roadmap

- Gateway Optimization
- Resumable Sync
- Read-ahead Optimization
- Optimization for Large-scale Scenarios
- Snapshots

## Reporting Issues

We use [GitHub Issues](https://github.com/juicedata/juicefs/issues) to track community reported issues. You can also [contact](#community) the community for any questions.

## Contributing

Thank you for your contribution! Please refer to the [JuiceFS Contributing Guide](https://juicefs.com/docs/community/development/contributing_guide) for more information.

## Community

Welcome to join the [Discussions](https://github.com/juicedata/juicefs/discussions) and the [Slack channel](https://go.juicefs.com/slack) to connect with JuiceFS team members and other users.

## Usage Tracking

JuiceFS collects **anonymous** usage data by default to help us better understand how the community is using JuiceFS. Only core metrics (e.g. version number) will be reported, and user data and any other sensitive data will not be included. The related code can be viewed [here](pkg/usage/usage.go).

You could also disable reporting easily by command line option `--no-usage-report`:

```bash
juicefs mount --no-usage-report
```

## License

JuiceFS is open-sourced under Apache License 2.0, see [LICENSE](LICENSE).

## Credits

The design of JuiceFS was inspired by [Google File System](https://research.google/pubs/pub51), [HDFS](https://hadoop.apache.org) and [MooseFS](https://moosefs.com). Thanks for their great work!

## FAQ

### Why doesn't JuiceFS support XXX Object Storage?

JuiceFS supports many Object Storage services. Please check out [this list](https://juicefs.com/docs/community/how_to_setup_object_storage#supported-object-storage) first. If the Object Storage you want to use is compatible with S3, you could treat it as S3. Otherwise, try reporting any issue.

### Can I use Redis Cluster as metadata engine?

Yes. Since [v1.0.0 Beta3](https://github.com/juicedata/juicefs/releases/tag/v1.0.0-beta3) JuiceFS supports the use of [Redis Cluster](https://redis.io/docs/manual/scaling) as the metadata engine, but it should be noted that Redis Cluster requires that the keys of all operations in a transaction must be in the same hash slot, so a JuiceFS file system can only use one hash slot.

See ["Redis Best Practices"](https://juicefs.com/docs/community/redis_best_practices) for more information.

### What's the difference between JuiceFS and XXX?

See ["Comparison with Others"](https://juicefs.com/docs/community/comparison/juicefs_vs_alluxio) for more information.

For more FAQs, please see the [full list](https://juicefs.com/docs/community/faq).

## Stargazers over time

[![Star History Chart](https://api.star-history.com/svg?repos=juicedata/juicefs&type=Date)](https://star-history.com/#juicedata/juicefs&Date)


================================================
FILE: README_CN.md
================================================
<p align="center"><a href="https://github.com/juicedata/juicefs"><img alt="JuiceFS Logo" src="docs/zh_cn/images/juicefs-logo-new.svg" width="50%" /></a></p>
<p align="center">
    <a href="https://github.com/juicedata/juicefs/actions/workflows/unittests.yml"><img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/juicedata/juicefs/unittests.yml?branch=main&label=Unit%20Testing" /></a>
    <a href="https://github.com/juicedata/juicefs/actions/workflows/integrationtests.yml"><img alt="GitHub Workflow Status" src="https://img.shields.io/github/actions/workflow/status/juicedata/juicefs/integrationtests.yml?branch=main&label=Integration%20Testing" /></a>
    <a href="https://goreportcard.com/report/github.com/juicedata/juicefs"><img alt="Go Report" src="https://goreportcard.com/badge/github.com/juicedata/juicefs" /></a>
    <a href="https://juicefs.com/docs/zh/community/introduction"><img alt="English doc" src="https://img.shields.io/badge/docs-文档中心-brightgreen" /></a>
    <a href="https://go.juicefs.com/slack"><img alt="Join Slack" src="https://badgen.net/badge/Slack/加入%20JuiceFS/0abd59?icon=slack" /></a>
</p>

JuiceFS 是一款高性能 [POSIX](https://en.wikipedia.org/wiki/POSIX) 文件系统，针对云原生环境特别优化设计，在 Apache 2.0 开源协议下发布。使用 JuiceFS 存储数据，数据本身会被持久化在对象存储（例如 Amazon S3），而数据所对应的元数据可以根据场景需求被持久化在 Redis、MySQL、TiKV 等多种数据库引擎中。

JuiceFS 可以简单便捷的将海量云存储直接接入已投入生产环境的大数据、机器学习、人工智能以及各种应用平台，无需修改代码即可像使用本地存储一样高效使用海量云端存储。

📺 **视频**：[什么是 JuiceFS?](https://www.bilibili.com/video/BV1HK4y197va)

📖 **文档**：[快速上手指南](https://juicefs.com/docs/zh/community/quick_start_guide)

## 核心特性

1. **POSIX 兼容**：像本地文件系统一样使用，无缝对接已有应用，无业务侵入性；
2. **HDFS 兼容**：完整兼容 [HDFS API](https://juicefs.com/docs/zh/community/hadoop_java_sdk)，提供更强的元数据性能；
3. **S3 兼容**：提供 [S3 网关](https://juicefs.com/docs/zh/community/s3_gateway) 实现 S3 协议兼容的访问接口；
4. **云原生**：通过 [Kubernetes CSI 驱动](https://juicefs.com/docs/zh/community/how_to_use_on_kubernetes) 可以很便捷地在 Kubernetes 中使用 JuiceFS；
5. **多端共享**：同一文件系统可在上千台服务器同时挂载，高性能并发读写，共享数据；
6. **强一致性**：确认的修改会在所有挂载了同一文件系统的服务器上立即可见，保证强一致性；
7. **强悍性能**：毫秒级的延迟，近乎无限的吞吐量（取决于对象存储规模），查看[性能测试结果](https://juicefs.com/docs/zh/community/benchmark)；
8. **数据安全**：支持传输中加密（encryption in transit）以及静态加密（encryption at rest），[查看详情](https://juicefs.com/docs/zh/community/security/encrypt)；
9. **文件锁**：支持 BSD 锁（flock）及 POSIX 锁（fcntl）；
10. **数据压缩**：支持使用 [LZ4](https://lz4.github.io/lz4) 或 [Zstandard](https://facebook.github.io/zstd) 压缩数据，节省存储空间。

---

[架构](#架构) | [开始使用](#开始使用) | [进阶主题](#进阶主题) | [POSIX 兼容性](#posix-兼容性测试) | [性能测试](#性能测试) | [支持的对象存储](#支持的对象存储) | [谁在使用](#谁在使用) | [产品路线图](#产品路线图) | [反馈问题](#反馈问题) | [贡献](#贡献) | [社区](#社区) | [使用量收集](#使用量收集) | [开源协议](#开源协议) | [致谢](#致谢) | [FAQ](#faq)

---

## 架构

JuiceFS 由三个部分组成：

1. **JuiceFS 客户端**：协调对象存储和元数据存储引擎，以及 POSIX、Hadoop、Kubernetes、S3 Gateway 等文件系统接口的实现；
2. **数据存储**：存储数据本身，支持本地磁盘、对象存储；
3. **元数据引擎**：存储数据对应的元数据，支持 Redis、MySQL、SQLite 等多种引擎；

![JuiceFS Architecture](docs/zh_cn/images/juicefs-arch-new.png)

JuiceFS 依靠 Redis 来存储文件的元数据。Redis 是基于内存的高性能的键值数据存储，非常适合存储元数据。与此同时，所有数据将通过 JuiceFS 客户端存储到对象存储中。[了解详情](https://juicefs.com/docs/zh/community/architecture)

![Data structure diagram](docs/en/images/data-structure-diagram.svg)

任何存入 JuiceFS 的文件都会被拆分成固定大小的 **"Chunk"**，默认的容量上限是 64 MiB。每个 Chunk 由一个或多个 **"Slice"** 组成，Slice 的长度不固定，取决于文件写入的方式。每个 Slice 又会被进一步拆分成固定大小的 **"Block"**，默认为 4 MiB。最后，这些 Block 会被存储到对象存储。与此同时，JuiceFS 会将每个文件以及它的 Chunks、Slices、Blocks 等元数据信息存储在元数据引擎中。[了解详情](https://juicefs.com/docs/zh/community/architecture#%E5%A6%82%E4%BD%95%E5%AD%98%E5%82%A8%E6%96%87%E4%BB%B6)

![How JuiceFS stores your files](docs/zh_cn/images/how-juicefs-stores-files.svg)

使用 JuiceFS，文件最终会被拆分成 Chunks、Slices 和 Blocks 存储在对象存储。因此，你会发现在对象存储平台的文件浏览器中找不到存入 JuiceFS 的源文件，存储桶中只有一个 chunks 目录和一堆数字编号的目录和文件。不要惊慌，这正是 JuiceFS 高性能运作的秘诀！

## 开始使用

创建 JuiceFS，需要以下 3 个方面的准备：

1. 准备 Redis 数据库
2. 准备对象存储
3. 下载安装 [JuiceFS 客户端](https://juicefs.com/docs/zh/community/installation)

请参照 [快速上手指南](https://juicefs.com/docs/zh/community/quick_start_guide) 立即开始使用 JuiceFS！

### 命令索引

请点击 [这里](https://juicefs.com/docs/zh/community/command_reference) 查看所有子命令以及命令行参数。

### 容器

JuiceFS 可以为 Docker、Podman 等容器化技术提供持久化存储，请查阅 [文档](https://juicefs.com/docs/community/juicefs_on_docker) 了解详情。

### Kubernetes

在 Kubernetes 中使用 JuiceFS 非常便捷，请查看 [这个文档](https://juicefs.com/docs/zh/community/how_to_use_on_kubernetes) 了解更多信息。

### Hadoop Java SDK

JuiceFS 使用 [Hadoop Java SDK](https://juicefs.com/docs/zh/community/hadoop_java_sdk) 与 Hadoop 生态结合。

## 进阶主题

- [Redis 最佳实践](https://juicefs.com/docs/zh/community/redis_best_practices)
- [如何设置对象存储](https://juicefs.com/docs/zh/community/how_to_setup_object_storage)
- [缓存](https://juicefs.com/docs/zh/community/cache)
- [故障诊断和分析](https://juicefs.com/docs/zh/community/fault_diagnosis_and_analysis)
- [FUSE 挂载选项](https://juicefs.com/docs/zh/community/fuse_mount_options)
- [在 Windows 中使用 JuiceFS](https://juicefs.com/docs/zh/community/installation#windows-系统)
- [S3 网关](https://juicefs.com/docs/zh/community/s3_gateway)

请查阅 [JuiceFS 文档中心](https://juicefs.com/docs/zh/community/introduction) 了解更多信息。

## POSIX 兼容性测试

JuiceFS 通过了 [pjdfstest](https://github.com/pjd/pjdfstest) 最新版所有 8813 项兼容性测试。

```
All tests successful.

Test Summary Report
-------------------
/root/soft/pjdfstest/tests/chown/00.t          (Wstat: 0 Tests: 1323 Failed: 0)
  TODO passed:   693, 697, 708-709, 714-715, 729, 733
Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr  0.38 sys +  2.57 cusr  3.93 csys =  9.65 CPU)
Result: PASS
```

除了 pjdfstest 覆盖的那些 POSIX 特性外，JuiceFS 还支持：

- 关闭再打开（close-to-open）一致性。一旦一个文件写入完成并关闭，之后的打开和读操作保证可以访问之前写入的数据。如果是在同一个挂载点，所有写入的数据都可以立即读。
- 重命名以及所有其他元数据操作都是原子的，由 Redis 的事务机制保证。
- 当文件被删除后，同一个挂载点上如果已经打开了，文件还可以继续访问。
- 支持 mmap
- 支持 fallocate 以及空洞
- 支持扩展属性
- 支持 BSD 锁（flock）
- 支持 POSIX 记录锁（fcntl）

## 性能测试

### 基础性能测试

JuiceFS 提供一个性能测试的子命令来帮助你了解它在你的环境中的性能表现：

![JuiceFS Bench](docs/zh_cn/images/juicefs-bench.png)

### 顺序读写性能

使用 [fio](https://github.com/axboe/fio) 测试了 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 的顺序读写性能，结果如下：

![Sequential Read Write Benchmark](docs/zh_cn/images/sequential-read-write-benchmark.svg)

上图显示 JuiceFS 可以比其他两者提供 10 倍以上的吞吐，详细结果请看[这里](https://juicefs.com/docs/zh/community/fio)。

### 元数据性能

使用 [mdtest](https://github.com/hpc/ior) 测试了 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 的元数据性能，结果如下：

![Metadata Benchmark](docs/zh_cn/images/metadata-benchmark.svg)

上图显示 JuiceFS 的元数据性能显著优于其他两个，详细的测试报告请看[这里](https://juicefs.com/docs/zh/community/mdtest)。

### 性能分析

如遇性能问题，查看[「实时性能监控」](https://juicefs.com/docs/zh/community/fault_diagnosis_and_analysis#performance-monitor)。

## 支持的对象存储

- 亚马逊 S3
- 谷歌云存储
- 微软云存储
- 阿里云 OSS
- 腾讯云 COS
- 青云 QingStor 对象存储
- Ceph RGW
- MinIO
- 本地目录
- Redis
- ……

JuiceFS 支持几乎所有主流的对象存储服务，[查看详情](https://juicefs.com/docs/zh/community/how_to_setup_object_storage/#%E6%94%AF%E6%8C%81%E7%9A%84%E5%AD%98%E5%82%A8%E6%9C%8D%E5%8A%A1)。

## 谁在使用

JuiceFS 已经可以用于生产环境，目前有几千个节点在生产环境中使用它。我们收集汇总了一份使用者名单，记录在[这里](https://juicefs.com/docs/zh/community/adopters)。另外 JuiceFS 还有不少与其他开源项目进行集成的合作项目，我们将其记录在[这里](https://juicefs.com/docs/zh/community/integrations)。如果你也在使用 JuiceFS，请随时告知我们，也欢迎你向大家分享具体的使用经验。

JuiceFS 的存储格式已经稳定，会被后续发布的所有版本支持。

## 产品路线图

- 基于用户和组的配额
- 快照
- 一次写入多次读取（WORM）

## 反馈问题

我们使用 [GitHub Issues](https://github.com/juicedata/juicefs/issues) 来管理社区反馈的问题，你也可以通过其他[渠道](#社区)跟社区联系。

## 贡献

感谢你对 JuiceFS 社区的贡献！请参考 [JuiceFS 贡献指南](https://juicefs.com/docs/zh/community/development/contributing_guide) 了解更多信息。

## 社区

欢迎加入 [Discussions](https://github.com/juicedata/juicefs/discussions) 和 [Slack 频道](https://go.juicefs.com/slack) 跟我们的团队和其他社区成员交流。

## 使用量收集

JuiceFS 的客户端会收集 **匿名** 使用数据来帮助我们更好地了解大家如何使用它，它只上报诸如版本号等使用量数据，不包含任何用户信息，完整的代码在 [这里](pkg/usage/usage.go)。

你也可以通过下面的方式禁用它：

```bash
juicefs mount --no-usage-report
```

## 开源协议

使用 Apache License 2.0 开源，详见 [LICENSE](LICENSE)。

## 致谢

JuiceFS 的设计参考了 [Google File System](https://research.google/pubs/pub51)、[HDFS](https://hadoop.apache.org) 以及 [MooseFS](https://moosefs.com)，感谢他们的杰出工作。

## FAQ

### 为什么不支持某个对象存储？

已经支持了绝大部分对象存储，参考这个[列表](https://juicefs.com/docs/zh/community/how_to_setup_object_storage#支持的存储服务)。如果它跟 S3 兼容的话，也可以当成 S3 来使用。否则，请创建一个 issue 来增加支持。

### 是否可以使用 Redis 集群版作为元数据引擎？

可以。自 [v1.0.0 Beta3](https://github.com/juicedata/juicefs/releases/tag/v1.0.0-beta3) 版本开始 JuiceFS 支持使用 [Redis 集群版](https://redis.io/docs/manual/scaling)作为元数据引擎，不过需要注意的是 Redis 集群版要求一个事务中所有操作的 key 必须在同一个 hash slot 中，因此一个 JuiceFS 文件系统只能使用一个 hash slot。

请查看[「Redis 最佳实践」](https://juicefs.com/docs/zh/community/redis_best_practices)了解更多信息。

### JuiceFS 与 XXX 的区别是什么？

请查看[「同类技术对比」](https://juicefs.com/docs/zh/community/comparison/juicefs_vs_alluxio)文档了解更多信息。

更多 FAQ 请查看[完整列表](https://juicefs.com/docs/zh/community/faq)。

## 历史加星

[![Stargazers over time](https://starchart.cc/juicedata/juicefs.svg)](https://starchart.cc/juicedata/juicefs)


================================================
FILE: check-changed.sh
================================================
#!/bin/bash

set -e

if [ x"${TRAVIS_COMMIT_RANGE}" == x ] ; then
  CHANGED_FILES=`git diff --name-only HEAD~1`
else
  CHANGED_FILES=`git diff --name-only $TRAVIS_COMMIT_RANGE`
fi
echo $CHANGED_FILES
DOCS_DIR="docs/"
GITHUB_DIR=".github/"
SKIP_TEST=true

for CHANGED_FILE in $CHANGED_FILES; do
  if ! [[ $CHANGED_FILE =~ $DOCS_DIR ]] && ! [[ $CHANGED_FILE =~ $GITHUB_DIR ]] ; then
    SKIP_TEST=false
    break
  fi
done

================================================
FILE: cmd/bench.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdBench() *cli.Command {
	return &cli.Command{
		Name:      "bench",
		Action:    bench,
		Category:  "TOOL",
		Usage:     "Run benchmarks on a path",
		ArgsUsage: "PATH",
		Description: `
Run basic benchmarks on the target PATH to test if it works as expected. Results are colored with
green/yellow/red to indicate whether they are in a normal range. If you see any red value, please
double check relevant configuration before further test.

Examples:
# Run benchmarks with 4 threads
$ juicefs bench /mnt/jfs -p 4

# Run benchmarks of only small files
$ juicefs bench /mnt/jfs --big-file-size 0

Details: https://juicefs.com/docs/community/performance_evaluation_guide#juicefs-bench`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "block-size",
				Value: "1M",
				Usage: "size of each IO block in MiB",
			},
			&cli.StringFlag{
				Name:  "big-file-size",
				Value: "1G",
				Usage: "size of each big file in MiB",
			},
			&cli.StringFlag{
				Name:  "small-file-size",
				Value: "128K",
				Usage: "size of each small file in KiB",
			},
			&cli.UintFlag{
				Name:  "small-file-count",
				Value: 100,
				Usage: "number of small files per thread",
			},
			&cli.UintFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   1,
				Usage:   "number of concurrent threads",
			},
		},
	}
}

var resultRange = map[string][4]float64{
	"bigwr":   {100, 200, 10, 50},
	"bigrd":   {100, 200, 10, 50},
	"smallwr": {12.5, 20, 50, 80},
	"smallrd": {50, 100, 10, 20},
	"stat":    {20, 1000, 1, 5},
	"fuse":    {0, 0, 0.5, 2},
	"meta":    {0, 0, 2, 5},
	"put":     {0, 0, 100, 200},
	"get":     {0, 0, 100, 200},
	"delete":  {0, 0, 30, 100},
	"cachewr": {0, 0, 10, 20},
	"cacherd": {0, 0, 1, 5},
}

type benchCase struct {
	bm               *benchmark
	name             string
	fsize, bsize     int        // file/block size in Bytes
	fcount, bcount   int        // file/block count
	wbar, rbar, sbar *utils.Bar // progress bar for write/read/stat
}

type benchmark struct {
	colorful   bool
	big, small *benchCase
	threads    int
	tmpdir     string
}

func (bc *benchCase) writeFiles(index int) {
	for i := 0; i < bc.fcount; i++ {
		fname := filepath.Join(bc.bm.tmpdir, fmt.Sprintf("%s.%d.%d", bc.name, index, i))
		fp, err := os.OpenFile(fname, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
		if err != nil {
			logger.Fatalf("Failed to open file %s: %s", fname, err)
		}
		buf := make([]byte, bc.bsize)
		utils.RandRead(buf)
		for j := 0; j < bc.bcount; j++ {
			if _, err = fp.Write(buf); err != nil {
				logger.Fatalf("Failed to write file %s: %s", fname, err)
			}
			bc.wbar.Increment()
		}
		_ = fp.Close()
	}
}

func (bc *benchCase) readFiles(index int) {
	for i := 0; i < bc.fcount; i++ {
		fname := filepath.Join(bc.bm.tmpdir, fmt.Sprintf("%s.%d.%d", bc.name, index, i))
		fp, err := os.Open(fname)
		if err != nil {
			logger.Fatalf("Failed to open file %s: %s", fname, err)
		}
		buf := make([]byte, bc.bsize)
		for j := 0; j < bc.bcount; j++ {
			if n, err := fp.Read(buf); err != nil || n != bc.bsize {
				logger.Fatalf("Failed to read file %s: %d %s", fname, n, err)
			}
			bc.rbar.Increment()
		}
		_ = fp.Close()
	}
}

func (bc *benchCase) statFiles(index int) {
	for i := 0; i < bc.fcount; i++ {
		fname := filepath.Join(bc.bm.tmpdir, fmt.Sprintf("%s.%d.%d", bc.name, index, i))
		if _, err := os.Stat(fname); err != nil {
			logger.Fatalf("Failed to stat file %s: %s", fname, err)
		}
		bc.sbar.Increment()
	}
}

func (bc *benchCase) run(test string) float64 {
	var fn func(int)
	switch test {
	case "write":
		fn = bc.writeFiles
	case "read":
		fn = bc.readFiles
	case "stat":
		fn = bc.statFiles
	} // default: fatal
	var wg sync.WaitGroup
	start := time.Now()
	for i := 0; i < bc.bm.threads; i++ {
		index := i
		wg.Add(1)
		go func() {
			fn(index)
			wg.Done()
		}()
	}
	wg.Wait()
	return time.Since(start).Seconds()
}

func newBenchmark(tmpdir string, blockSize, bigSize, smallSize, smallCount, threads int) *benchmark {
	bm := &benchmark{threads: threads, tmpdir: tmpdir}
	if bigSize > 0 {
		bm.big = bm.newCase("bigfile", bigSize, 1, blockSize)
	}
	if smallSize > 0 && smallCount > 0 {
		bm.small = bm.newCase("smallfile", smallSize, smallCount, blockSize)
	}
	return bm
}

func (bm *benchmark) newCase(name string, fsize, fcount, bsize int) *benchCase {
	bc := &benchCase{
		bm:     bm,
		name:   name,
		fsize:  fsize,
		fcount: fcount,
		bsize:  bsize,
	}
	if fsize <= bsize {
		bc.bcount = 1
		bc.bsize = fsize
	} else {
		bc.bcount = (fsize-1)/bsize + 1
		bc.fsize = bc.bcount * bsize
	}
	return bc
}

func (bm *benchmark) colorize(item string, value, cost float64, prec int) (string, string) {
	svalue := strconv.FormatFloat(value, 'f', prec, 64)
	scost := strconv.FormatFloat(cost, 'f', 2, 64)
	if bm.colorful {
		r, ok := resultRange[item]
		if !ok {
			logger.Fatalf("Invalid item: %s", item)
		}
		if item == "smallwr" || item == "smallrd" || item == "stat" {
			r[0] *= float64(bm.threads)
			r[1] *= float64(bm.threads)
		}
		var color int
		if value > r[1] { // max
			color = GREEN
		} else if value > r[0] { // min
			color = YELLOW
		} else {
			color = RED
		}
		svalue = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, svalue, RESET_SEQ)
		if cost < r[2] { // min
			color = GREEN
		} else if cost < r[3] { // max
			color = YELLOW
		} else {
			color = RED
		}
		scost = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, scost, RESET_SEQ)
	}
	return svalue, scost
}

func printResult(result [][]string, leftAlign int, colorful bool) {
	if len(result) < 2 {
		logger.Fatalf("result must not be empty")
	}
	colNum := len(result[0])
	rawmax, max := make([]int, colNum), make([]int, colNum)
	for _, l := range result {
		for i := 0; i < colNum; i++ {
			if len(l[i]) > rawmax[i] {
				rawmax[i] = len(l[i])
			}
		}
	}
	copy(max, rawmax)
	if colorful {
		for i := 1; i < colNum; i++ {
			max[i] -= 11
		}
	}

	var b strings.Builder
	for i := 0; i < colNum; i++ {
		b.WriteByte('+')
		b.WriteString(strings.Repeat("-", max[i]+2))
	}
	b.WriteByte('+')
	divider := b.String()
	fmt.Println(divider)

	b.Reset()
	header := result[0]
	for i := 0; i < colNum; i++ {
		b.WriteString(" | ")
		b.WriteString(padding(header[i], max[i], ' '))
	}
	b.WriteString(" |")
	fmt.Println(b.String()[1:])
	fmt.Println(divider)

	for _, l := range result[1:] {
		b.Reset()
		for i := 0; i < colNum; i++ {
			b.WriteString(" | ")
			if i == leftAlign {
				b.WriteString(l[i])
			}
			if spaces := rawmax[i] - len(l[i]); spaces > 0 {
				b.WriteString(strings.Repeat(" ", spaces))
			}
			if i != leftAlign {
				b.WriteString(l[i])
			}
		}
		b.WriteString(" |")
		fmt.Println(b.String()[1:])
	}
	fmt.Println(divider)
}

func bench(ctx *cli.Context) error {
	setup(ctx, 1)
	/* --- Pre-check --- */
	blockSize := utils.ParseBytes(ctx, "block-size", 'M')
	if blockSize == 0 || ctx.Uint("threads") == 0 {
		return os.ErrInvalid
	}
	tmpdir, err := filepath.Abs(ctx.Args().First())
	if err != nil {
		logger.Fatalf("Failed to get absolute path of %s: %s", ctx.Args().First(), err)
	}
	bigSize := utils.ParseBytes(ctx, "big-file-size", 'M')
	smallSize := utils.ParseBytes(ctx, "small-file-size", 'K')
	tmpdir = filepath.Join(tmpdir, fmt.Sprintf("__juicefs_benchmark_%d__", time.Now().UnixNano()))
	bm := newBenchmark(tmpdir, int(blockSize), int(bigSize), int(smallSize),
		int(ctx.Uint("small-file-count")), int(ctx.Uint("threads")))
	if bm.big == nil && bm.small == nil {
		return os.ErrInvalid
	}
	var purgeArgs []string
	if os.Getuid() != 0 {
		purgeArgs = append(purgeArgs, "sudo")
	}
	switch runtime.GOOS {
	case "darwin":
		purgeArgs = append(purgeArgs, "purge")
	case "linux":
		purgeArgs = append(purgeArgs, "/bin/sh", "-c", "echo 3 > /proc/sys/vm/drop_caches")
	case "windows":
		break
	default:
		logger.Fatal("Currently only support Linux/MacOS/Windows")
	}

	/* --- Prepare --- */
	if _, err := os.Stat(bm.tmpdir); os.IsNotExist(err) {
		if err = os.MkdirAll(bm.tmpdir, 0777); err != nil {
			logger.Fatalf("Failed to create %s: %s", bm.tmpdir, err)
		}
	}
	mp, _ := findMountpoint(bm.tmpdir)
	dropCaches := func() {
		if os.Getenv("SKIP_DROP_CACHES") != "true" && runtime.GOOS != "windows" {
			if err := exec.Command(purgeArgs[0], purgeArgs[1:]...).Run(); err != nil {
				logger.Warnf("Failed to clean kernel caches: %s", err)
			}
		} else {
			logger.Warnf("Clear cache operation has been skipped")
		}
	}
	if os.Getuid() != 0 {
		fmt.Println("Cleaning kernel cache, may ask for root privilege...")
	}
	dropCaches()
	bm.colorful = utils.SupportANSIColor(os.Stdout.Fd())
	progress := utils.NewProgress(false)
	/* --- Run Benchmark --- */
	var stats map[string]float64
	if mp != "" {
		stats = readStats(mp)
	}
	var result [][]string
	result = append(result, []string{"ITEM", "VALUE", "COST"})
	if b := bm.big; b != nil {
		total := int64(bm.threads * b.fcount * b.bcount)
		b.wbar = progress.AddCountBar("Write big blocks", total)
		cost := b.run("write")
		b.wbar.Done()
		line := make([]string, 3)
		line[0] = "Write big file"
		line[1], line[2] = bm.colorize("bigwr", float64(b.fsize)/1024/1024*float64(b.fcount*bm.threads)/cost, cost/float64(b.fcount), 2)
		line[1] += " MiB/s"
		line[2] += " s/file"
		result = append(result, line)
		dropCaches()

		b.rbar = progress.AddCountBar("Read big blocks", total)
		cost = b.run("read")
		b.rbar.Done()
		line = make([]string, 3)
		line[0] = "Read big file"
		line[1], line[2] = bm.colorize("bigrd", float64(b.fsize)/1024/1024*float64(b.fcount*bm.threads)/cost, cost/float64(b.fcount), 2)
		line[1] += " MiB/s"
		line[2] += " s/file"
		result = append(result, line)
	}
	if s := bm.small; s != nil {
		total := int64(bm.threads * s.fcount * s.bcount)
		s.wbar = progress.AddCountBar("Write small blocks", total)
		cost := s.run("write")
		s.wbar.Done()
		line := make([]string, 3)
		line[0] = "Write small file"
		line[1], line[2] = bm.colorize("smallwr", float64(s.fcount*bm.threads)/cost, cost*1000/float64(s.fcount), 1)
		line[1] += " files/s"
		line[2] += " ms/file"
		result = append(result, line)
		dropCaches()

		s.rbar = progress.AddCountBar("Read small blocks", total)
		cost = s.run("read")
		s.rbar.Done()
		line = make([]string, 3)
		line[0] = "Read small file"
		line[1], line[2] = bm.colorize("smallrd", float64(s.fcount*bm.threads)/cost, cost*1000/float64(s.fcount), 1)
		line[1] += " files/s"
		line[2] += " ms/file"
		result = append(result, line)
		dropCaches()

		s.sbar = progress.AddCountBar("Stat small files", int64(bm.threads*s.fcount))
		cost = s.run("stat")
		s.sbar.Done()
		line = make([]string, 3)
		line[0] = "Stat file"
		line[1], line[2] = bm.colorize("stat", float64(s.fcount*bm.threads)/cost, cost*1000/float64(s.fcount), 1)
		line[1] += " files/s"
		line[2] += " ms/file"
		result = append(result, line)
	}
	progress.Done()

	/* --- Clean-up --- */
	if runtime.GOOS == "windows" {
		if err := exec.Command("cmd", "/C", "rd", "/s", "/q", bm.tmpdir).Run(); err != nil {
			logger.Warnf("Failed to cleanup %s: %s", bm.tmpdir, err)
		}
	} else {
		if err := exec.Command("rm", "-rf", bm.tmpdir).Run(); err != nil {
			logger.Warnf("Failed to cleanup %s: %s", bm.tmpdir, err)
		}
	}

	/* --- Report --- */
	fmt.Println("Benchmark finished!")
	fmt.Printf("BlockSize: %s, BigFileSize: %s, SmallFileSize: %s, SmallFileCount: %d, NumThreads: %d\n",
		humanize.IBytes(blockSize), humanize.IBytes(bigSize), humanize.IBytes(smallSize),
		ctx.Uint("small-file-count"), ctx.Uint("threads"))
	if stats != nil {
		stats2 := readStats(mp)
		diff := func(item string) float64 {
			return stats2["juicefs_"+item] - stats["juicefs_"+item]
		}
		show := func(title, nick, item string) {
			count := diff(item + "_total")
			var cost float64
			if count > 0 {
				cost = diff(item+"_sum") * 1000 / count
			}
			line := make([]string, 3)
			line[0] = title
			line[1], line[2] = bm.colorize(nick, count, cost, 0)
			line[1] += " operations"
			line[2] += " ms/op"
			result = append(result, line)
		}
		show("FUSE operation", "fuse", "fuse_ops_durations_histogram_seconds")
		show("Update meta", "meta", "transaction_durations_histogram_seconds")
		show("Put object", "put", "object_request_durations_histogram_seconds_PUT")
		show("Get object", "get", "object_request_durations_histogram_seconds_GET")
		show("Delete object", "delete", "object_request_durations_histogram_seconds_DELETE")
		show("Write into cache", "cachewr", "blockcache_write_hist_seconds")
		show("Read from cache", "cacherd", "blockcache_read_hist_seconds")
		var fmtString string
		if bm.colorful {
			greenSeq := fmt.Sprintf("%s%dm", COLOR_SEQ, GREEN)
			fmtString = fmt.Sprintf("Time used: %s%%.1f%s s, CPU: %s%%.1f%s%%%%, Memory: %s%%.1f%s MiB\n",
				greenSeq, RESET_SEQ, greenSeq, RESET_SEQ, greenSeq, RESET_SEQ)
		} else {
			fmtString = "Time used: %.1f s, CPU: %.1f%%, Memory: %.1f MiB\n"
		}
		fmt.Printf(fmtString, diff("uptime"), diff("cpu_usage")*100/diff("uptime"), stats2["juicefs_memory"]/1024/1024)
	}
	printResult(result, -1, bm.colorful)
	return nil
}


================================================
FILE: cmd/bench_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"os"
	"testing"
)

func TestBench(t *testing.T) {
	mountTemp(t, nil, []string{"--trash-days=0"}, nil)
	defer umountTemp(t)

	os.Setenv("SKIP_DROP_CACHES", "true")
	defer os.Unsetenv("SKIP_DROP_CACHES")
	if err := Main([]string{"", "bench", testMountPoint}); err != nil {
		t.Fatalf("test bench failed: %s", err)
	}
}

func TestBenchForObject(t *testing.T) {
	if err := Main([]string{"", "objbench", testMountPoint + "/", "-p", "4"}); err != nil {
		t.Fatalf("test bench failed: %s", err)
	}
}


================================================
FILE: cmd/clone.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"path"
	"path/filepath"
	"runtime"
	"strings"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdClone() *cli.Command {
	return &cli.Command{
		Name:      "clone",
		Action:    clone,
		Usage:     "clone a file or directory without copying the underlying data",
		ArgsUsage: "SRC DST",
		Category:  "TOOL",
		Description: `
This command can clone a file or directory without copying the underlying data,similar to the cp command but very fast.
Examples:
# Clone a file
$ juicefs clone /mnt/jfs/file1 /mnt/jfs/file2

# Clone a directory
$ juicefs clone /mnt/jfs/dir1 /mnt/jfs/dir2

# Clone with preserving the uid, gid, and mode of the file
$ juicefs clone -p /mnt/jfs/file1 /mnt/jfs/file2`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:    "preserve",
				Aliases: []string{"p"},
				Usage:   "preserve the uid, gid, and mode of the file. (This is forced on Windows)",
			},
			&cli.IntFlag{
				Name:  "threads",
				Value: meta.CLONE_DEFAULT_CONCURRENCY,
				Usage: "number of concurrent workers for cloning directories",
			},
		},
	}
}

func clone(ctx *cli.Context) error {
	setup(ctx, 2)
	srcPath := ctx.Args().Get(0)
	srcAbsPath, err := filepath.Abs(srcPath)
	if err != nil {
		return fmt.Errorf("abs of %s: %s", srcPath, err)
	}
	srcIno, err := utils.GetFileInode(srcPath)
	if err != nil {
		return fmt.Errorf("lookup inode for %s: %s", srcPath, err)
	}
	srcParentIno, err := utils.GetFileInode(filepath.Dir(srcAbsPath))
	if err != nil {
		return fmt.Errorf("lookup inode for %s: %s", filepath.Dir(srcAbsPath), err)
	}
	dst := ctx.Args().Get(1)
	if strings.HasSuffix(dst, string(filepath.Separator)) {
		dst = filepath.Join(dst, filepath.Base(srcPath))
	}
	if _, err := os.Stat(dst); err == nil {
		return fmt.Errorf("%s already exists", dst)
	} else if !os.IsNotExist(err) {
		return fmt.Errorf("stat %s: %s", dst, err)
	}
	dstAbsPath, err := filepath.Abs(dst)
	if err != nil {
		return fmt.Errorf("abs of %s: %s", dst, err)
	}

	srcMp, err := findMountpoint(srcAbsPath)
	if err != nil {
		return err
	}
	dstMp, err := findMountpoint(filepath.Dir(dstAbsPath))
	if err != nil {
		return err
	}
	if srcMp != dstMp {
		return fmt.Errorf("the clone DST path should be at the same mount point as the SRC path")
	}
	if strings.HasPrefix(dstAbsPath, path.Clean(srcAbsPath)+"/") {
		return fmt.Errorf("the clone DST path should not be under the SRC path")
	}

	dstParent := filepath.Dir(dstAbsPath)
	dstName := filepath.Base(dstAbsPath)
	dstParentIno, err := utils.GetFileInode(dstParent)
	if err != nil {
		return fmt.Errorf("lookup inode for %s: %s", dstParent, err)
	}
	var cmode uint8
	umask := utils.GetUmask()
	if ctx.Bool("preserve") || runtime.GOOS == "windows" {
		cmode |= meta.CLONE_MODE_PRESERVE_ATTR
	}
	threads := ctx.Int("threads")
	if threads < 1 {
		threads = 1
	} else if threads > 255 {
		threads = 255
	}
	headerSize := 4 + 4
	contentSize := 8 + 8 + 8 + 1 + uint32(len(dstName)) + 2 + 1 + 1 // +1 for threads
	wb := utils.NewBuffer(uint32(headerSize) + contentSize)
	wb.Put32(meta.Clone)
	wb.Put32(contentSize)
	wb.Put64(srcIno)
	wb.Put64(srcParentIno)
	wb.Put64(dstParentIno)
	wb.Put8(uint8(len(dstName)))
	wb.Put([]byte(dstName))
	wb.Put16(uint16(umask))
	wb.Put8(cmode)
	wb.Put8(uint8(threads))
	f, err := openController(srcMp)
	if err != nil {
		return err
	}
	defer f.Close()
	if _, err = f.Write(wb.Bytes()); err != nil {
		return fmt.Errorf("write message: %s", err)
	}

	progress := utils.NewProgress(false)
	defer progress.Done()
	bar := progress.AddCountBar("Cloning entries", 0)
	if _, errno := readProgress(f, func(count uint64, total uint64) {
		bar.SetTotal(int64(total))
		bar.SetCurrent(int64(count))
	}); errno != 0 {
		return fmt.Errorf("clone failed: %v", errno)
	}
	return nil
}

func findMountpoint(fpath string) (string, error) {
	for p := fpath; p != "/"; p = filepath.Dir(p) {
		inode, err := utils.GetFileInode(p)
		if err != nil {
			return "", fmt.Errorf("get inode of %s: %s", p, err)
		}
		if inode == uint64(meta.RootInode) {
			return p, nil
		}
	}
	return "", fmt.Errorf("%s is not inside JuiceFS", fpath)
}


================================================
FILE: cmd/compact.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"math"
	"path/filepath"
	"syscall"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdCompact() *cli.Command {
	return &cli.Command{
		Name:      "compact",
		Action:    compact,
		Category:  "TOOL",
		Usage:     "Trigger compaction of chunks",
		ArgsUsage: "PATH...",
		Description: `
 Examples:
 # compact with path
 $ juicefs compact /mnt/jfs/foo
 `,
		Flags: []cli.Flag{
			&cli.UintFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   10,
				Usage:   "compact concurrency",
			},
		},
	}
}

func compact(ctx *cli.Context) error {
	setup0(ctx, 1, 0)

	coCnt := ctx.Int("threads")
	if coCnt <= 0 {
		logger.Warn("threads should be > 0")
		coCnt = 1
	} else if coCnt >= math.MaxUint16 {
		logger.Warn("threads should be < MaxUint16")
		coCnt = math.MaxUint16
	}

	paths := ctx.Args().Slice()
	for i := 0; i < len(paths); i++ {
		path, err := filepath.Abs(paths[i])
		if err != nil {
			logger.Fatalf("get absolute path of %s error: %v", paths[i], err)
		}

		inodeNo, err := utils.GetFileInode(path)
		if err != nil {
			logger.Errorf("lookup inode for %s error: %v", path, err)
			continue
		}
		inode := meta.Ino(inodeNo)

		if !inode.IsValid() {
			logger.Fatalf("inode numbe %d not valid", inode)
		}

		if err = doCompact(inode, path, uint16(coCnt)); err != nil {
			logger.Error(err)
		}
	}
	return nil
}

func doCompact(inode meta.Ino, path string, coCnt uint16) error {
	f, err := openController(path)
	if err != nil {
		return fmt.Errorf("open control file for [%d:%s]: %w", inode, path, err)
	}
	defer f.Close()

	headerLen, bodyLen := uint32(8), uint32(8+2)
	wb := utils.NewBuffer(headerLen + bodyLen)
	wb.Put32(meta.CompactPath)
	wb.Put32(bodyLen)
	wb.Put64(uint64(inode))
	wb.Put16(coCnt)

	_, err = f.Write(wb.Bytes())
	if err != nil {
		logger.Fatalf("write message: %s", err)
	}

	progress := utils.NewProgress(false)
	bar := progress.AddCountBar("Compacted chunks", 0)
	_, errno := readProgress(f, func(totalChunks, currChunks uint64) {
		bar.SetTotal(int64(totalChunks))
		bar.SetCurrent(int64(currChunks))
	})

	bar.Done()
	progress.Done()

	if errno == syscall.EINVAL {
		logger.Fatalf("compact is not supported, please upgrade and mount again")
	}
	if errno != 0 {
		return fmt.Errorf("compact [%d:%s] error: %s", inode, path, errno)
	}

	logger.Infof("compact [%d:%s] success.", inode, path)
	return nil
}


================================================
FILE: cmd/compact_test.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"testing"

	"github.com/stretchr/testify/assert"
)

func createTestFile(path string, size int, partCnt int) error {
	file, err := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666)
	if err != nil {
		return err
	}
	defer file.Close()

	content := []byte(strings.Repeat("a", size/partCnt))
	for i := 0; i < partCnt; i++ {
		if _, err = file.Write(content); err != nil {
			return err
		}
		if err = file.Sync(); err != nil {
			return err
		}
	}
	return nil
}

type testDir struct {
	path     string
	fileCnt  int
	fileSize int
	filePart int
}

func initForCompactTest(mountDir string, dirs map[string]testDir) {
	for _, d := range dirs {
		dirPath := filepath.Join(mountDir, d.path)

		err := os.MkdirAll(dirPath, 0755)
		if err != nil {
			panic(err)
		}

		for i := 0; i < d.fileCnt; i++ {
			if err := createTestFile(filepath.Join(dirPath, fmt.Sprintf("%d", i)), d.fileSize, d.filePart); err != nil {
				panic(err)
			}
		}
	}
}

func TestCompact(t *testing.T) {
	var bucket string
	mountTemp(t, &bucket, []string{"--trash-days=0"}, nil)
	defer umountTemp(t)

	dirs := map[string]testDir{
		"d1/d11": {
			path:     "d1/d11",
			fileCnt:  10,
			fileSize: 10,
			filePart: 2,
		},
		"d1": {
			path:     "d1",
			fileCnt:  20,
			fileSize: 10,
			filePart: 5,
		},
		"d2": {
			path:     "d2",
			fileCnt:  5,
			fileSize: 20,
			filePart: 4,
		},
	}
	initForCompactTest(testMountPoint, dirs)
	dataDir := filepath.Join(bucket, testVolume, "chunks")

	sumChunks := 0
	for _, d := range dirs {
		sumChunks += d.fileCnt * d.filePart
	}

	chunkCnt := getFileCount(dataDir)
	assert.Equal(t, sumChunks, chunkCnt)

	orderedDirs := []string{"d1/d11", "d1", "d2"}
	for _, path := range orderedDirs {
		d := dirs[path]

		err := Main([]string{"", "compact", filepath.Join(testMountPoint, d.path)})
		assert.Nil(t, err)

		chunkCnt = getFileCount(dataDir)
		sumChunks -= d.fileCnt * (d.filePart - 1)
		assert.Equal(t, sumChunks, chunkCnt)
	}
}


================================================
FILE: cmd/config.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bufio"
	"fmt"
	"os"
	"path/filepath"
	"strings"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/pkg/errors"
	"github.com/urfave/cli/v2"
)

func cmdConfig() *cli.Command {
	return &cli.Command{
		Name:      "config",
		Action:    config,
		Category:  "ADMIN",
		Usage:     "Change configuration of a volume",
		ArgsUsage: "META-URL",
		Description: `
Only flags explicitly specified are changed.

Examples:
# Show the current configurations
$ juicefs config redis://localhost

# Change volume "quota"
$ juicefs config redis://localhost --inodes 10000000 --capacity 1048576

# Change maximum days before files in trash are deleted
$ juicefs config redis://localhost --trash-days 7

# Limit client version that is allowed to connect
$ juicefs config redis://localhost --min-client-version 1.0.0 --max-client-version 1.1.0`,
		Flags: expandFlags(
			formatStorageFlags(),
			addCategories("DATA STORAGE", []cli.Flag{
				&cli.StringFlag{
					Name:  "upload-limit",
					Usage: "default bandwidth limit of a client for upload in Mbps",
				},
				&cli.StringFlag{
					Name:  "download-limit",
					Usage: "default bandwidth limit of a client for download in Mbps",
				},
			}),
			formatManagementFlags(),
			configManagementFlags(),
			configFlags()),
	}
}

func configManagementFlags() []cli.Flag {
	return addCategories("MANAGEMENT", []cli.Flag{
		&cli.BoolFlag{
			Name:  "encrypt-secret",
			Usage: "encrypt the secret key if it was previously stored in plain format",
		},
		&cli.StringFlag{
			Name:  "min-client-version",
			Usage: "minimum client version allowed to connect",
		},
		&cli.StringFlag{
			Name:  "max-client-version",
			Usage: "maximum client version allowed to connect",
		},
		&cli.BoolFlag{
			Name:  "dir-stats",
			Usage: "enable dir stats, which is necessary for fast summary and dir quota",
		},
		&cli.BoolFlag{
			Name:  "user-group-quota",
			Usage: "enable user and group quota management",
		},
	})
}

func configFlags() []cli.Flag {
	return []cli.Flag{
		&cli.BoolFlag{
			Name:    "yes",
			Aliases: []string{"y"},
			Usage:   "automatically answer 'yes' to all prompts and run non-interactively",
		},
		&cli.BoolFlag{
			Name:  "force",
			Usage: "skip sanity check and force update the configurations",
		},
	}
}

func warn(format string, a ...interface{}) {
	fmt.Printf("\033[1;33mWARNING\033[0m: "+format+"\n", a...)
}

func userConfirmed() bool {
	fmt.Print("Proceed anyway? [y/N]: ")
	scanner := bufio.NewScanner(os.Stdin)
	for scanner.Scan() {
		if text := strings.ToLower(scanner.Text()); text == "y" || text == "yes" {
			return true
		} else if text == "" || text == "n" || text == "no" {
			return false
		} else {
			fmt.Print("Please input y(yes) or n(no): ")
		}
	}
	return false
}

func config(ctx *cli.Context) error {
	setup(ctx, 1)
	removePassword(ctx.Args().Get(0))
	m := meta.NewClient(ctx.Args().Get(0), nil)

	format, err := m.Load(false)
	if err != nil {
		return err
	}
	if len(ctx.LocalFlagNames()) == 0 {
		fmt.Println(format)
		return nil
	}

	originDirStats := format.DirStats
	originUGQuota := format.UserGroupQuota
	var quota, storage, trash, clientVer bool
	var msg strings.Builder
	encrypted := format.KeyEncrypted
	for _, flag := range ctx.LocalFlagNames() {
		switch flag {
		case "capacity":
			if new := utils.ParseBytes(ctx, flag, 'G'); new != format.Capacity {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag,
					humanize.IBytes(format.Capacity), humanize.IBytes(new)))
				format.Capacity = new
				quota = true
			}
		case "inodes":
			if new := ctx.Uint64(flag); new != format.Inodes {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag,
					humanize.Comma(int64(format.Inodes)), humanize.Comma(int64(new))))
				format.Inodes = new
				quota = true
			}
		case "storage":
			if new := ctx.String(flag); new != format.Storage {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, format.Storage, new))
				format.Storage = new
				storage = true
			}
		case "bucket":
			// bucket will be accessed before storage, so it is necessary to determine if storage is a file
			if new := ctx.String(flag); new != format.Bucket {
				effectiveStorage := format.Storage
				if ctx.IsSet("storage") {
					effectiveStorage = ctx.String("storage")
				}
				if effectiveStorage == "file" {
					if p, err := filepath.Abs(new); err == nil {
						new = p + "/"
					} else {
						logger.Fatalf("Failed to get absolute path of %s: %s", new, err)
					}
				}
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, format.Bucket, new))
				format.Bucket = new
				storage = true
			}
		case "access-key":
			if new := ctx.String(flag); new != format.AccessKey {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, format.AccessKey, new))
				format.AccessKey = new
				storage = true
			}
		case "secret-key": // always update
			msg.WriteString(fmt.Sprintf("%10s: updated\n", flag))
			if err := format.Decrypt(); err != nil && strings.Contains(err.Error(), "secret was removed") {
				logger.Warnf("decrypt secrets: %s", err)
			}
			format.SecretKey = ctx.String(flag)
			storage = true
		case "session-token": // always update
			msg.WriteString(fmt.Sprintf("%10s: updated\n", flag))
			if err := format.Decrypt(); err != nil && strings.Contains(err.Error(), "secret was removed") {
				logger.Warnf("decrypt secrets: %s", err)
			}
			format.SessionToken = ctx.String(flag)
			storage = true
		case "storage-class": // always update
			if new := ctx.String(flag); new != format.StorageClass {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, format.StorageClass, new))
				format.StorageClass = new
				storage = true
			}
		case "upload-limit":
			if new := utils.ParseMbps(ctx, flag); new != format.UploadLimit {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, utils.Mbps(format.UploadLimit), utils.Mbps(new)))
				format.UploadLimit = new
			}
		case "download-limit":
			if new := utils.ParseMbps(ctx, flag); new != format.DownloadLimit {
				msg.WriteString(fmt.Sprintf("%10s: %s -> %s\n", flag, utils.Mbps(format.DownloadLimit), utils.Mbps(new)))
				format.DownloadLimit = new
			}
		case "trash-days":
			if new := ctx.Int(flag); new != format.TrashDays {
				if new < 0 {
					return fmt.Errorf("Invalid trash days: %d", new)
				}
				msg.WriteString(fmt.Sprintf("%10s: %d -> %d\n", flag, format.TrashDays, new))
				format.TrashDays = new
				trash = true
			}
		case "dir-stats":
			if new := ctx.Bool(flag); new != format.DirStats {
				msg.WriteString(fmt.Sprintf("%10s: %t -> %t\n", flag, format.DirStats, new))
				format.DirStats = new
			}
		case "user-group-quota":
			if new := ctx.Bool(flag); new != format.UserGroupQuota {
				msg.WriteString(fmt.Sprintf("%10s: %t -> %t\n", flag, format.UserGroupQuota, new))
				format.UserGroupQuota = new
			}
		case "min-client-version":
			if new := ctx.String(flag); new != format.MinClientVersion {
				if version.Parse(new) == nil {
					return fmt.Errorf("Invalid version string: %s", new)
				}
				msg.WriteString(fmt.Sprintf("%s: %s -> %s\n", flag, format.MinClientVersion, new))
				format.MinClientVersion = new
				clientVer = true
			}
		case "max-client-version":
			if new := ctx.String(flag); new != format.MaxClientVersion {
				if version.Parse(new) == nil {
					return fmt.Errorf("Invalid version string: %s", new)
				}
				msg.WriteString(fmt.Sprintf("%s: %s -> %s\n", flag, format.MaxClientVersion, new))
				format.MaxClientVersion = new
				clientVer = true
			}
		case "enable-acl":
			if enableACL := ctx.Bool(flag); enableACL != format.EnableACL {
				if enableACL {
					msg.WriteString(fmt.Sprintf("%s: %v -> %v\n", flag, format.EnableACL, true))
					msg.WriteString(fmt.Sprintf("%s: %s -> %s\n", "min-client-version", format.MinClientVersion, "1.2.0-A"))
					format.EnableACL = true
					format.MinClientVersion = "1.2.0-A"
					clientVer = true
				} else {
					return errors.New("cannot disable acl")
				}
			}
		case "ranger-rest-url":
			if newUrl := ctx.String(flag); newUrl != format.RangerRestUrl {
				msg.WriteString(fmt.Sprintf("%s: %s -> %s\n", flag, format.RangerRestUrl, newUrl))
				format.RangerRestUrl = newUrl
				format.MinClientVersion = "1.3.0-A"
				clientVer = true
			}
		case "ranger-service":
			if newService := ctx.String(flag); newService != format.RangerService {
				msg.WriteString(fmt.Sprintf("%s: %s -> %s\n", flag, format.RangerService, newService))
				format.RangerService = newService
				format.MinClientVersion = "1.3.0-A"
				clientVer = true
			}
		case "kerberos-config-file":
			msg.WriteString(fmt.Sprintf("%s: updated\n", flag))
			format.KerbConf = readKerbConf(ctx.String(flag))
			format.MinClientVersion = "1.4.0-A"
			clientVer = true
		}
	}
	if msg.Len() == 0 {
		fmt.Println("Nothing changed.")
		return nil
	}

	if !ctx.Bool("force") {
		yes := ctx.Bool("yes")
		if storage {
			blob, err := createStorage(*format)
			if err != nil {
				return err
			}
			if err = test(blob); err != nil {
				return err
			}
		}
		if quota {
			var totalSpace, availSpace, iused, iavail uint64
			_ = m.StatFS(meta.Background(), meta.RootInode, &totalSpace, &availSpace, &iused, &iavail)
			usedSpace := totalSpace - availSpace
			if format.Capacity > 0 && usedSpace >= format.Capacity ||
				format.Inodes > 0 && iused >= format.Inodes {
				warn("New quota is too small (used / quota): %d / %d bytes, %d / %d inodes.",
					usedSpace, format.Capacity, iused, format.Inodes)
				if !yes && !userConfirmed() {
					return fmt.Errorf("Aborted.")
				}
			}
		}
		if trash && format.TrashDays == 0 {
			warn("The current trash will be emptied and future removed files will purged immediately.")
			if !yes && !userConfirmed() {
				return fmt.Errorf("Aborted.")
			}
		}
		if originDirStats && !format.DirStats {
			qs := make(map[string]*meta.Quota)
			err := m.HandleQuota(meta.Background(), meta.QuotaList, "", 0, 0, qs, false, false, false)
			if err != nil {
				return errors.Wrap(err, "list quotas")
			}
			if len(qs) != 0 {
				paths := make([]string, 0, len(qs))
				for path := range qs {
					paths = append(paths, path)
				}
				return fmt.Errorf("cannot disable dir stats when there are still %d dir quotas: %v", len(qs), paths)
			}
		}
		if clientVer {
			if format.CheckVersion() != nil {
				warn("Clients with the same version of this will be rejected after modification.")
				if !yes && !userConfirmed() {
					return fmt.Errorf("Aborted.")
				}
			}

			// check all clients
			if sessions, err := m.ListSessions(); err == nil {
				warnMsg := ""
				for _, session := range sessions {
					if err := format.CheckCliVersion(version.Parse(session.Version)); err != nil {
						warnMsg += fmt.Sprintf("host %s pid %d client version error: %s\n", session.HostName, session.ProcessID, err)
					}
				}
				if warnMsg != "" {
					fmt.Println(warnMsg)
				}
			}
		}
	}

	if encrypted || ctx.Bool("encrypt-secret") {
		if err = format.Encrypt(); err != nil {
			logger.Fatalf("Format encrypt: %s", err)
		}
	}
	if err = m.Init(format, false); err == nil {
		fmt.Println(msg.String()[:msg.Len()-1])
	}

	if !originUGQuota && format.UserGroupQuota {
		if err = m.ScanUserGroupUsage(meta.Background()); err != nil {
			logger.Warnf("Scan user group usage: %s", err)
		}
	}

	return err
}


================================================
FILE: cmd/config_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"encoding/json"
	"os"
	"testing"

	"github.com/agiledragon/gomonkey/v2"
	"github.com/juicedata/juicefs/pkg/meta"
)

// mutate_test_job_number: 3
func getStdout(args []string) ([]byte, error) {
	tmp, err := os.CreateTemp("/tmp", "jfstest-*")
	if err != nil {
		return nil, err
	}
	defer tmp.Close()
	defer os.Remove(tmp.Name())
	patch := gomonkey.ApplyGlobalVar(os.Stdout, *tmp)
	defer patch.Reset()

	if err = Main(args); err != nil {
		return nil, err
	}
	return os.ReadFile(tmp.Name())
}

func TestConfig(t *testing.T) {
	_ = resetTestMeta()
	bucketPath := "/tmp/testBucket"
	_ = os.RemoveAll(bucketPath)
	if err := Main([]string{"", "format", testMeta, "--bucket", bucketPath, testVolume}); err != nil {
		t.Fatalf("format: %s", err)
	}

	if err := Main([]string{"", "config", testMeta, "--trash-days", "2"}); err != nil {
		t.Fatalf("config: %s", err)
	}
	data, err := getStdout([]string{"", "config", testMeta})
	if err != nil {
		t.Fatalf("getStdout: %s", err)
	}
	var format meta.Format
	if err = json.Unmarshal(data, &format); err != nil {
		t.Fatalf("json unmarshal: %s", err)
	}
	if format.TrashDays != 2 {
		t.Fatalf("trash-days %d != expect 2", format.TrashDays)
	}

	if err = Main([]string{"", "config", testMeta, "--capacity", "10", "--inodes", "1000000"}); err != nil {
		t.Fatalf("config: %s", err)
	}
	if err = Main([]string{"", "config", testMeta, "--bucket", "/tmp/newBucket", "--access-key", "testAK", "--secret-key", "testSK", "--session-token", "token"}); err != nil {
		t.Fatalf("config: %s", err)
	}
	if data, err = getStdout([]string{"", "config", testMeta}); err != nil {
		t.Fatalf("getStdout: %s", err)
	}
	if err = json.Unmarshal(data, &format); err != nil {
		t.Fatalf("json unmarshal: %s", err)
	}
	if format.Capacity != 10<<30 || format.Inodes != 1000000 ||
		format.Bucket != "/tmp/newBucket/" || format.AccessKey != "testAK" || format.SecretKey != "removed" || format.SessionToken != "removed" {
		t.Fatalf("unexpect format: %+v", format)
	}

	if err = Main([]string{"", "config", testMeta, "--bucket", "http://localhost:9000/miniofs", "--storage", "minio", "--force"}); err != nil {
		t.Fatalf("config: %s", err)
	}
	if data, err = getStdout([]string{"", "config", testMeta}); err != nil {
		t.Fatalf("getStdout: %s", err)
	}
	if err = json.Unmarshal(data, &format); err != nil {
		t.Fatalf("json unmarshal: %s", err)
	}
	if format.Bucket != "http://localhost:9000/miniofs" || format.Storage != "minio" {
		t.Fatalf("unexpect format: %+v", format)
	}

	if err = Main([]string{"", "config", testMeta, "--bucket", "http://localhost:9000/miniofs2", "--force"}); err != nil {
		t.Fatalf("config: %s", err)
	}
	if data, err = getStdout([]string{"", "config", testMeta}); err != nil {
		t.Fatalf("getStdout: %s", err)
	}
	if err = json.Unmarshal(data, &format); err != nil {
		t.Fatalf("json unmarshal: %s", err)
	}
	if format.Bucket != "http://localhost:9000/miniofs2" || format.Storage != "minio" {
		t.Fatalf("unexpect format: %+v", format)
	}
}


================================================
FILE: cmd/debug.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"archive/zip"
	"bufio"
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/urfave/cli/v2"
)

var defaultOutDir = filepath.Join(".", "debug")

func cmdDebug() *cli.Command {
	return &cli.Command{
		Name:      "debug",
		Action:    debug,
		Category:  "INSPECTOR",
		ArgsUsage: "MOUNTPOINT",
		Usage:     "Collect and display system static and runtime information",
		Description: `
It collects and displays information from multiple dimensions such as the running environment and system logs, etc.

Examples:
$ juicefs debug /mnt/jfs

# Result will be output to /var/log/
$ juicefs debug --out-dir=/var/log /mnt/jfs

# Get the last up to 1000 log entries
$ juicefs debug --out-dir=/var/log --limit=1000 /mnt/jfs
`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "out-dir",
				Value: defaultOutDir,
				Usage: "the output directory of the result file",
			},
			&cli.Uint64Flag{
				Name:  "limit",
				Usage: "the number of last entries to be collected",
				Value: 5000,
			},
			&cli.Uint64Flag{
				Name:  "stats-sec",
				Value: 5,
				Usage: "stats sampling duration",
			},
			&cli.Uint64Flag{
				Name:  "trace-sec",
				Value: 5,
				Usage: "trace sampling duration",
			},
			&cli.Uint64Flag{
				Name:  "profile-sec",
				Value: 30,
				Usage: "profile sampling duration",
			},
		},
	}
}

func copyFileOnWindows(srcPath, destPath string) error {
	srcFile, err := os.Open(srcPath)
	if err != nil {
		return err
	}
	defer closeFile(srcFile)
	destFile, err := os.Create(destPath)
	if err != nil {
		return err
	}
	defer closeFile(destFile)
	if _, err := io.Copy(destFile, srcFile); err != nil {
		return err
	}
	return nil
}

func copyFile(srcPath, destPath string, requireRootPrivileges bool) error {
	if runtime.GOOS == "windows" {
		return utils.WithTimeout(context.TODO(), func(context.Context) error {
			return copyFileOnWindows(srcPath, destPath)
		}, 3*time.Second)
	}

	var copyArgs []string
	if requireRootPrivileges {
		copyArgs = append(copyArgs, "sudo")
	}
	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
	defer cancel()
	copyArgs = append(copyArgs, "/bin/sh", "-c", fmt.Sprintf("cat %s > %s", srcPath, destPath))
	return exec.CommandContext(ctx, copyArgs[0], copyArgs[1:]...).Run()
}

var logArg = regexp.MustCompile(`--log(\s*=?\s*)(\S+)`)

func getLogPath(cmd string) (string, error) {
	var logPath string
	tmp := logArg.FindStringSubmatch(cmd)
	if len(tmp) == 3 {
		logPath = tmp[2]
	} else {
		logPath = filepath.Join(getDefaultLogDir(), "juicefs.log")
	}

	return logPath, nil
}

func closeFile(file *os.File) {
	if err := file.Close(); err != nil {
		logger.Fatalf("failed to close file %s: %v", file.Name(), err)
	}
}

func getPprofPort(pid, amp string, requireRootPrivileges bool) (int, error) {
	cfg := vfs.Config{}
	_ = utils.WithTimeout(context.TODO(), func(context.Context) error {
		content, err := readConfig(amp)
		if err != nil {
			logger.Warnf("failed to read config file: %v", err)
		}
		if err := json.Unmarshal(content, &cfg); err != nil {
			logger.Warnf("failed to unmarshal config file: %v", err)
		}
		return nil
	}, 3*time.Second)

	if cfg.Port != nil {
		if len(strings.Split(cfg.Port.DebugAgent, ":")) >= 2 {
			if port, err := strconv.Atoi(strings.Split(cfg.Port.DebugAgent, ":")[1]); err != nil {
				logger.Warnf("failed to parse debug agent port: %v", err)
			} else {
				return port, nil
			}
		}
	}

	var lsofArgs []string
	if requireRootPrivileges {
		lsofArgs = append(lsofArgs, "sudo")
	}
	lsofArgs = append(lsofArgs, "/bin/sh", "-c", "lsof -i -nP | grep -v grep | grep LISTEN | grep "+pid)
	ret, err := exec.Command(lsofArgs[0], lsofArgs[1:]...).CombinedOutput()
	if err != nil {
		return 0, fmt.Errorf("failed to execute command `%s`: %v", strings.Join(lsofArgs, " "), err)
	}
	logger.Debugf("lsof output: \n%s", string(ret))
	lines := strings.Split(string(ret), "\n")
	if len(lines) == 0 {
		return 0, fmt.Errorf("pprof will be collected, but no listen port")
	}

	var listenPort = -1
	for _, line := range lines {
		fields := strings.Fields(line)
		if len(fields) != 0 {
			port, err := func() (port int, err error) {
				defer func() {
					e := recover()
					if e != nil {
						err = fmt.Errorf("failed to parse listen port: %v", e)
					}
				}()
				port, err = strconv.Atoi(strings.Split(fields[len(fields)-2], ":")[1])
				if err != nil {
					logger.Errorf("failed to parse port %v: %v", port, err)
				}
				return
			}()
			if err != nil {
				continue
			}
			if port >= 6060 && port <= 6099 && port > listenPort {
				if err := checkPort(port, amp); err == nil {
					listenPort = port
				}
				continue
			}
		}
	}

	if listenPort == -1 {
		return 0, fmt.Errorf("no valid pprof port found")
	}
	return listenPort, nil
}

func getRequest(url string, timeout time.Duration) ([]byte, error) {
	ctx, cancel := context.WithTimeout(context.Background(), timeout)
	defer cancel()
	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
	if err != nil {
		return nil, fmt.Errorf("error creating GET request: %v", err)
	}
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return nil, fmt.Errorf("error GET request: %v", err)
	}
	if resp.StatusCode != 200 {
		return nil, fmt.Errorf("error GET request, status code %d", resp.StatusCode)
	}

	defer func(body io.ReadCloser) {
		if err := body.Close(); err != nil {
			logger.Errorf("error closing body: %v", err)
		}
	}(resp.Body)
	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("error reading response: %v", err)
	}

	return body, nil
}

// check pprof service status
func checkPort(port int, amp string) error {
	url := fmt.Sprintf("http://localhost:%d/debug/pprof/cmdline?debug=1", port)
	resp, err := getRequest(url, 3*time.Second)
	if err != nil {
		return fmt.Errorf("error checking pprof alive: %v", err)
	}
	resp = bytes.ReplaceAll(resp, []byte{0}, []byte{' '})
	fields := strings.Fields(string(resp))
	flag := false
	for _, field := range fields {
		if amp == field {
			flag = true
		}
	}
	if !flag {
		return fmt.Errorf("mount point mismatch: \n%s\n%s", resp, amp)
	}
	return nil
}

type metricItem struct {
	name, url string
}

func reqAndSaveMetric(name string, metric metricItem, outDir string, timeout time.Duration) error {
	resp, err := getRequest(metric.url, timeout)
	if err != nil {
		return fmt.Errorf("error getting metric: %v", err)
	}
	retPath := filepath.Join(outDir, fmt.Sprintf("juicefs.%s", metric.name))
	retFile, err := os.Create(retPath)
	if err != nil {
		logger.Fatalf("error creating metric file %s: %v", retPath, err)
	}
	defer closeFile(retFile)

	if name == "cmdline" {
		resp = bytes.ReplaceAll(resp, []byte{0}, []byte{' '})
	}

	writer := bufio.NewWriter(retFile)
	if _, err := writer.Write(resp); err != nil {
		return fmt.Errorf("failed to write metric %s: %v", name, err)
	}
	return writer.Flush()
}

func checkAgent(cmd string) bool {
	for _, field := range strings.Fields(cmd) {
		if field == "--no-agent" {
			return false
		}
	}
	return true
}

func geneZipFile(srcPath, destPath string) error {
	zipFile, err := os.Create(destPath)
	if err != nil {
		return err
	}
	defer closeFile(zipFile)
	archive := zip.NewWriter(zipFile)
	defer func() {
		if err := archive.Close(); err != nil {
			logger.Fatalf("error closing zip archive: %v", err)
		}
	}()

	return filepath.Walk(srcPath, func(path string, info os.FileInfo, _ error) error {
		if path == srcPath {
			return nil
		}

		header, err := zip.FileInfoHeader(info)
		if err != nil {
			return err
		}
		header.Name = strings.TrimPrefix(path, srcPath+`/`)
		if info.IsDir() {
			header.Name += `/`
		} else {
			header.Method = zip.Deflate
		}

		writer, err := archive.CreateHeader(header)
		if err != nil {
			return err
		}
		if !info.IsDir() {
			file, err := os.Open(path)
			if err != nil {
				return err
			}
			defer closeFile(file)
			if _, err := io.Copy(writer, file); err != nil {
				return err
			}
		}
		return nil
	})
}

func collectPprof(ctx *cli.Context, cmd string, pid string, amp string, requireRootPrivileges bool, currDir string, wg *sync.WaitGroup) error {
	if !checkAgent(cmd) {
		logger.Warnf("No agent found, the pprof metrics will not be collected")
		return nil
	}

	port, err := getPprofPort(pid, amp, requireRootPrivileges)
	if err != nil {
		return fmt.Errorf("failed to get pprof port: %v", err)
	}
	baseUrl := fmt.Sprintf("http://localhost:%d/debug/pprof/", port)
	logger.Infof("The pprof base url: %s", baseUrl)
	trace := ctx.Uint64("trace-sec")
	profile := ctx.Uint64("profile-sec")
	metrics := map[string]metricItem{
		"allocs":       {name: "allocs.pb.gz", url: baseUrl + "allocs"},
		"blocks":       {name: "block.pb.gz", url: baseUrl + "block"},
		"cmdline":      {name: "cmdline.txt", url: baseUrl + "cmdline"},
		"goroutine":    {name: "goroutine.pb.gz", url: baseUrl + "goroutine"},
		"stack":        {name: "goroutine.stack.txt", url: baseUrl + "goroutine?debug=1"},
		"stack-detail": {name: "goroutine.stack.detail.txt", url: baseUrl + "goroutine?debug=2"},
		"heap":         {name: "heap.pb.gz", url: baseUrl + "heap"},
		"mutex":        {name: "mutex.pb.gz", url: baseUrl + "mutex"},
		"threadcreate": {name: "threadcreate.pb.gz", url: baseUrl + "threadcreate"},
		"trace":        {name: fmt.Sprintf("trace.%ds.pb.gz", trace), url: fmt.Sprintf("%strace?seconds=%d", baseUrl, trace)},
		"profile":      {name: fmt.Sprintf("profile.%ds.pb.gz", profile), url: fmt.Sprintf("%sprofile?seconds=%d", baseUrl, profile)},
	}

	pprofOutDir := filepath.Join(currDir, "pprof")
	if err := os.Mkdir(pprofOutDir, os.ModePerm); err != nil {
		return fmt.Errorf("failed to create out directory: %v", err)
	}

	for name, metric := range metrics {
		wg.Add(1)
		go func(name string, metric metricItem) {
			timeout := 3 * time.Second
			defer wg.Done()
			if name == "profile" {
				logger.Infof("Profile metrics are being sampled, sampling duration: %ds", profile)
				timeout = time.Duration(profile+5) * time.Second
			}
			if name == "trace" {
				logger.Infof("Trace metrics are being sampled, sampling duration: %ds", trace)
				timeout = time.Duration(trace+5) * time.Second
			}
			if err := reqAndSaveMetric(name, metric, pprofOutDir, timeout); err != nil {
				logger.Errorf("Failed to get and save metric %s: %v", name, err)
			}
		}(name, metric)
	}
	return nil
}

func collectLog(ctx *cli.Context, cmd string, requireRootPrivileges bool, currDir string, uid string) error {
	mountdByWinSystem := runtime.GOOS == "windows" && uid == "S-1-5-18" // https://learn.microsoft.com/en-us/windows/win32/secauthz/well-known-sids
	if !(strings.Contains(cmd, "-d") || strings.Contains(cmd, "--background")) && !mountdByWinSystem {
		logger.Warnf("The juicefs mount by foreground, the log will not be collected")
		return nil
	}
	logPath, err := getLogPath(cmd)
	if err != nil {
		return fmt.Errorf("failed to get log path: %v", err)
	}
	limit := ctx.Uint64("limit")
	retLogPath := filepath.Join(currDir, "juicefs.log")

	if runtime.GOOS == "windows" {
		// check powershell is installed
		_, err = exec.LookPath("powershell")
		if err != nil {
			logger.Warnf("Powershell is not installed, the log will not be collected")
			return nil
		}

		copyArgs := []string{"powershell", "-Command", fmt.Sprintf("Get-Content -Tail %d %s > %s", limit, logPath, retLogPath)}
		logger.Infof("The last %d lines of %s will be collected", limit, logPath)
		timeoutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
		defer cancel()
		return exec.CommandContext(timeoutCtx, copyArgs[0], copyArgs[1:]...).Run()
	} else {
		var copyArgs []string
		if requireRootPrivileges {
			copyArgs = append(copyArgs, "sudo")
		}
		copyArgs = append(copyArgs, "/bin/sh", "-c", fmt.Sprintf("tail -n %d %s > %s", limit, logPath, retLogPath))
		logger.Infof("The last %d lines of %s will be collected", limit, logPath)
		timeoutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
		defer cancel()
		return exec.CommandContext(timeoutCtx, copyArgs[0], copyArgs[1:]...).Run()
	}
}

func collectSysInfo(ctx *cli.Context, currDir string) error {
	sysInfo := utils.GetSysInfo()
	result := fmt.Sprintf(`Platform: 
%s %s
%s`, runtime.GOOS, runtime.GOARCH, sysInfo)

	sysPath := filepath.Join(currDir, "system-info.log")
	sysFile, err := os.Create(sysPath)
	if err != nil {
		return fmt.Errorf("failed to create system info file %s: %v", sysPath, err)
	}
	defer closeFile(sysFile)
	if _, err = sysFile.WriteString(result); err != nil {
		return fmt.Errorf("failed to write system info file %s: %v", sysPath, err)
	}

	fmt.Printf("\n%s\n", result)
	return nil
}

func collectSpecialFile(ctx *cli.Context, amp string, currDir string, requireRootPrivileges bool, wg *sync.WaitGroup) error {
	prefixed := true
	configName := ".jfs.config"
	_ = utils.WithTimeout(context.TODO(), func(context.Context) error {
		if !utils.Exists(filepath.Join(amp, configName)) {
			configName = ".config"
			prefixed = false
		}
		return nil
	}, 3*time.Second)
	if err := copyFile(filepath.Join(amp, configName), filepath.Join(currDir, "config.txt"), requireRootPrivileges); err != nil {
		return fmt.Errorf("failed to get volume config %s: %v", configName, err)
	}

	statsName := ".jfs.stats"
	if !prefixed {
		statsName = statsName[4:]
	}
	stats := ctx.Uint64("stats-sec")
	wg.Add(1)
	go func() {
		defer wg.Done()
		srcPath := filepath.Join(amp, statsName)
		destPath := filepath.Join(currDir, "stats.txt")
		if err := copyFile(srcPath, destPath, requireRootPrivileges); err != nil {
			logger.Errorf("Failed to get volume config %s: %v", statsName, err)
		}

		logger.Infof("Stats metrics are being sampled, sampling duration: %ds", stats)
		time.Sleep(time.Second * time.Duration(stats))
		destPath = filepath.Join(currDir, fmt.Sprintf("stats.%ds.txt", stats))
		if err := copyFile(srcPath, destPath, requireRootPrivileges); err != nil {
			logger.Errorf("Failed to get volume config %s: %v", statsName, err)
		}
	}()
	return nil
}

func debug(ctx *cli.Context) error {
	setup(ctx, 1)
	mp := ctx.Args().First()
	var inode uint64
	if err := utils.WithTimeout(context.TODO(), func(context.Context) error {
		var err error
		if inode, err = utils.GetFileInode(mp); err != nil {
			return fmt.Errorf("failed to lookup inode for %s: %s", mp, err)
		}
		return nil
	}, 3*time.Second); err != nil {
		logger.Warnf(err.Error())
		logger.Warnf("assuming the mount point is JuiceFS mount point")
	} else {
		if inode != uint64(meta.RootInode) {
			return fmt.Errorf("path %s is not a mount point", mp)
		}
	}

	amp, err := filepath.Abs(mp)
	if err != nil {
		return fmt.Errorf("failed to get absolute path: %v", err)
	}
	timestamp := time.Now().Format("20060102150405")
	prefix := strings.Trim(strings.Join(strings.Split(amp, "/"), "-"), "-")
	if runtime.GOOS == "windows" {
		prefix = strings.ReplaceAll(prefix, ":", "")
	}
	outDir := ctx.String("out-dir")
	currDir := filepath.Join(outDir, fmt.Sprintf("%s-%s", prefix, timestamp))
	if err := os.MkdirAll(currDir, os.ModePerm); err != nil {
		return fmt.Errorf("failed to create current out dir %s: %v", currDir, err)
	}

	if err := collectSysInfo(ctx, currDir); err != nil {
		logger.Errorf("Failed to collect system info: %v", err)
	}

	uid, pid, cmd, err := getCmdMount(amp)
	logger.Infof("mount point:%s pid:%s uid:%s", amp, pid, uid)
	if err != nil {
		return fmt.Errorf("failed to get mount command: %v", err)
	}
	fmt.Printf("\nMount Command:\n%s\n\n", cmd)

	requireRootPrivileges := false
	if (uid == "0" || uid == "root") && os.Getuid() != 0 {
		fmt.Println("Mount point is mounted by the root user, may ask for root privilege...")
		requireRootPrivileges = true
	}

	var wg sync.WaitGroup
	if err := collectSpecialFile(ctx, amp, currDir, requireRootPrivileges, &wg); err != nil {
		logger.Errorf("Failed to collect special file: %v", err)
	}

	if err := collectLog(ctx, cmd, requireRootPrivileges, currDir, uid); err != nil {
		logger.Errorf("Failed to collect log: %v", err)
	}

	if err := collectPprof(ctx, cmd, pid, amp, requireRootPrivileges, currDir, &wg); err != nil {
		logger.Errorf("Failed to collect pprof: %v", err)
	}

	wg.Wait()
	abs, _ := filepath.Abs(currDir)
	logger.Infof("All files are collected to %s", abs)
	return geneZipFile(currDir, filepath.Join(outDir, fmt.Sprintf("%s-%s.zip", prefix, timestamp)))
}


================================================
FILE: cmd/debug_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"testing"

	"github.com/stretchr/testify/require"
)

func TestDebug(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	require.NotNil(t, Main([]string{"", "debug", "/jfs/test/mp"}), "mount point does not exist")
	require.NotNil(t, Main([]string{"", "debug", "./"}), "directory is not a mount point")
	require.NotNil(t, Main([]string{"", "debug", "--out-dir", "./debug_test.go", testMountPoint}), "specify a file as out dir")

	cases := []struct {
		arg string
		val string
	}{
		{"--log /var/log/jfs.log", "/var/log/jfs.log"},
		{"--log=/var/log/jfs.log", "/var/log/jfs.log"},
		{"--log   =   /var/log/jfs.log", "/var/log/jfs.log"},
		{"--log=    /var/log/jfs.log", "/var/log/jfs.log"},
		{"--log    =/var/log/jfs.log", "/var/log/jfs.log"},
		{"--log      /var/log/jfs.log", "/var/log/jfs.log"},
	}
	for i, c := range cases {
		require.True(t, logArg.FindStringSubmatch(c.arg)[2] == c.val, fmt.Sprintf("valid log arg %d", i))
	}
}


================================================
FILE: cmd/debug_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"encoding/json"
	"fmt"
	"os/exec"
	"strconv"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
)

func getCmdMount(mp string) (uid, pid, cmd string, err error) {
	var tmpPid string
	_ = utils.WithTimeout(context.TODO(), func(context.Context) error {
		content, err := readConfig(mp)
		if err != nil {
			logger.Warnf("failed to read config file: %v", err)
		}
		cfg := vfs.Config{}
		if err := json.Unmarshal(content, &cfg); err != nil {
			logger.Warnf("failed to unmarshal config file: %v", err)
		}
		if cfg.Pid != 0 {
			tmpPid = strconv.Itoa(cfg.Pid)
		}
		return nil
	}, 3*time.Second)

	var psArgs []string
	if tmpPid != "" {
		pid = tmpPid
		psArgs = []string{"/bin/sh", "-c", fmt.Sprintf("ps -f -p %s", pid)}
	} else {
		psArgs = []string{"/bin/sh", "-c", fmt.Sprintf("ps -ef | grep -v grep | grep mount | grep %s", mp)}
	}
	ret, err := exec.Command(psArgs[0], psArgs[1:]...).CombinedOutput()
	if err != nil {
		return "", "", "", fmt.Errorf("failed to execute command `%s`: %v", strings.Join(psArgs, " "), err)
	}
	var find bool
	var ppid string
	lines := strings.Split(string(ret), "\n")
	for i := len(lines) - 1; i >= 0; i-- {
		line := lines[i]
		fields := strings.Fields(line)
		if len(fields) <= 7 {
			continue
		}
		cmdFields := fields[7:]
		for _, arg := range cmdFields {
			if mp == arg {
				if find {
					newCmd := strings.Join(fields[7:], " ")
					newUid, newPid, newPpid := strings.TrimSpace(fields[0]), strings.TrimSpace(fields[1]), strings.TrimSpace(fields[2])
					if newPid == ppid {
						return uid, pid, cmd, nil
					} else if pid == newPpid {
						return newUid, newPid, newCmd, nil
					} else {
						return "", "", "", fmt.Errorf("find more than one mount process for %s", mp)
					}
				}
				cmd = strings.Join(fields[7:], " ")
				uid, pid, ppid = strings.TrimSpace(fields[0]), strings.TrimSpace(fields[1]), strings.TrimSpace(fields[2])
				find = true
			}
		}
	}
	if cmd == "" {
		return "", "", "", fmt.Errorf("no mount command found for %s", mp)
	}
	return uid, pid, cmd, nil
}


================================================
FILE: cmd/debug_windows.go
================================================
package cmd

/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import (
	"context"
	"encoding/json"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"golang.org/x/sys/windows"
)

func getprocessCommandLine(pid int) (string, error) {
	cmd := exec.Command("wmic", "process", "where", "ProcessID="+strconv.Itoa(pid), "get", "CommandLine")
	out, err := cmd.CombinedOutput()
	if err != nil {
		return "", fmt.Errorf("failed to run command line: %s, %v", cmd.String(), err)
	}

	lines := strings.Split(string(out), "\r\n")
	if len(lines) < 2 {
		return "", fmt.Errorf("failed to find command line for pid: %d", pid)
	}

	for _, line := range lines[1:] {
		sline := strings.TrimSpace(line)
		if sline == "" {
			continue
		}
		return sline, nil
	}

	return "", fmt.Errorf("cannot find command line for pid %d. If the juicefs are mounted at background, Please rerun this with the admin permission.", pid)
}

func findMountProcess(mp string) (int, error) {
	processName := filepath.Base(os.Args[0])
	cmd := exec.Command("wmic", "process", "where", fmt.Sprintf("name='%s'", processName), "get", "CommandLine,ProcessId")
	out, err := cmd.CombinedOutput()
	if err != nil {
		return 0, fmt.Errorf("failed to exec command line: %s, %s", cmd.String(), err)
	}

	lines := strings.Split(string(out), "\r\n")
	if len(lines) < 2 {
		return 0, fmt.Errorf("failed to find mount process")
	}

	mp = strings.TrimRight(mp, "\\")
	for _, line := range lines[1:] {
		sline := strings.TrimSpace(line)

		if sline == "" {
			continue
		}

		// the first part of commandline contains 'xxx/mount.exe"'
		slines := strings.SplitN(sline, ".exe\" ", 2)
		if len(slines) < 2 {
			logger.Warnf("failed to split command line: %s", sline)
			continue
		}

		sline = slines[1]
		logger.Infof("sline: %s", sline)

		args := strings.Split(sline, " ")
		if len(args) < 3 {
			continue
		}
		mpFound := false
		mountFound := false
		for _, arg := range args {
			arg = strings.TrimSpace(arg)
			if arg == "" {
				continue
			}

			if arg == "mount" {
				mountFound = true
				continue
			}

			arg = strings.TrimRight(arg, "\\")

			if strings.EqualFold(arg, mp) {
				mpFound = true
			}
		}

		if mpFound && mountFound {
			// THE LAST PART IS PID
			pid, err := strconv.Atoi(args[len(args)-1])
			if err != nil {
				return 0, fmt.Errorf("failed to parse pid: %s", args[len(args)-1])
			}
			return pid, nil
		}
	}

	return 0, fmt.Errorf("cannot find the mount process for %s", mp)
}

func getProcessUserSid(pid int) (string, error) {
	h, err := windows.OpenProcess(windows.PROCESS_QUERY_INFORMATION, false, uint32(pid))
	if err != nil {
		return "", err
	}
	defer windows.CloseHandle(h)

	var token windows.Token
	err = windows.OpenProcessToken(h, windows.TOKEN_QUERY, &token)
	if err != nil {
		return "", err
	}
	defer token.Close()

	user, err := token.GetTokenUser()
	if err != nil {
		return "", err
	}

	return user.User.Sid.String(), nil

}

func getCmdMount(mp string) (uid, pid, cmd string, err error) {
	var tmpPid string
	_ = utils.WithTimeout(context.TODO(), func(context.Context) error {
		content, err := readConfig(mp)
		if err != nil {
			logger.Warnf("failed to read config file: %v", err)
		}
		cfg := vfs.Config{}
		if err := json.Unmarshal(content, &cfg); err != nil {
			logger.Warnf("failed to unmarshal config file: %v", err)
		}
		if cfg.Pid != 0 {
			tmpPid = strconv.Itoa(cfg.Pid)
		}
		return nil
	}, 3*time.Second)

	foundPid := 0
	if tmpPid != "" {
		pid = tmpPid
		foundPid, err = strconv.Atoi(pid)
		if err != nil {
			return "", "", "", fmt.Errorf("failed to parse pid: %s", pid)
		}
	} else {
		foundPid, err = findMountProcess(mp)
		if err != nil {
			return "", "", "", err
		}

		pid = strconv.Itoa(foundPid)
	}

	cmd, err = getprocessCommandLine(foundPid)
	if err != nil {
		return "", "", "", err
	}

	uid, err = getProcessUserSid(foundPid)
	if err != nil {
		return "", "", "", err
	}

	return uid, pid, cmd, nil
}


================================================
FILE: cmd/destroy.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"sort"
	"strconv"
	"strings"
	"sync"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdDestroy() *cli.Command {
	return &cli.Command{
		Name:      "destroy",
		Action:    destroy,
		Category:  "ADMIN",
		Usage:     "Destroy an existing volume",
		ArgsUsage: "META-URL UUID",
		Description: `
Destroy the target volume, removing all objects in the data storage and all entries in its metadata engine.

WARNING: BE CAREFUL! This operation cannot be undone.

Examples:
$ juicefs destroy redis://localhost e94d66a8-2339-4abd-b8d8-6812df737892

Details: https://juicefs.com/docs/community/administration/destroy`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:    "yes",
				Aliases: []string{"y"},
				Usage:   "automatically answer 'yes' to all prompts and run non-interactively",
			},
			&cli.BoolFlag{
				Name:  "force",
				Usage: "skip sanity check and force destroy the volume",
			},
		},
	}
}

func printSessions(ss [][3]string) string {
	header := [3]string{"SID", "HostName", "MountPoint"}
	var max [3]int
	for i := 0; i < 3; i++ {
		max[i] = len(header[i])
	}
	for _, s := range ss {
		for i := 0; i < 3; i++ {
			if l := len(s[i]); l > max[i] {
				max[i] = l
			}
		}
	}

	var ret, b strings.Builder
	for i := 0; i < 3; i++ {
		b.WriteByte('+')
		b.WriteString(strings.Repeat("-", max[i]+2))
	}
	b.WriteString("+\n")
	divider := b.String()
	ret.WriteString(divider)

	b.Reset()
	for i := 0; i < 3; i++ {
		b.WriteString(" | ")
		b.WriteString(padding(header[i], max[i], ' '))
	}
	b.WriteString(" |\n")
	ret.WriteString(b.String()[1:])
	ret.WriteString(divider)

	for _, s := range ss {
		b.Reset()
		for i := 0; i < 3; i++ {
			b.WriteString(" | ")
			if spaces := max[i] - len(s[i]); spaces > 0 {
				b.WriteString(strings.Repeat(" ", spaces))
			}
			b.WriteString(s[i])
		}
		b.WriteString(" |\n")
		ret.WriteString(b.String()[1:])
	}
	ret.WriteString(divider)

	return ret.String()
}

func destroy(ctx *cli.Context) error {
	setup(ctx, 2)
	uri := ctx.Args().Get(0)
	if !strings.Contains(uri, "://") {
		uri = "redis://" + uri
	}
	removePassword(uri)
	m := meta.NewClient(uri, meta.DefaultConf())

	format, err := m.Load(true)
	if err != nil {
		logger.Fatalf("load setting: %s", err)
	}
	if uuid := ctx.Args().Get(1); uuid != format.UUID {
		logger.Fatalf("UUID %s != expected %s", uuid, format.UUID)
	}
	blob, err := createStorage(*format)
	if err != nil {
		logger.Fatalf("create object storage: %s", err)
	}

	if !ctx.Bool("force") {
		m.CleanStaleSessions(meta.Background())
		sessions, err := m.ListSessions()
		if err != nil {
			logger.Fatalf("list sessions: %s", err)
		}
		if num := len(sessions); num > 0 {
			ss := make([][3]string, num)
			for i, s := range sessions {
				ss[i] = [3]string{strconv.FormatUint(s.Sid, 10), s.HostName, s.MountPoint}
			}
			logger.Fatalf("%d sessions are active, please disconnect them first:\n%s", num, printSessions(ss))
		}
		var totalSpace, availSpace, iused, iavail uint64
		_ = m.StatFS(meta.Background(), meta.RootInode, &totalSpace, &availSpace, &iused, &iavail)

		fmt.Printf(" volume name: %s\n", format.Name)
		fmt.Printf(" volume UUID: %s\n", format.UUID)
		fmt.Printf("data storage: %s\n", blob)
		fmt.Printf("  used bytes: %d\n", totalSpace-availSpace)
		fmt.Printf(" used inodes: %d\n", iused)
		warn("The target volume will be permanently destroyed, including:")
		warn("1. ALL objects in the data storage: %s", blob)
		warn("2. ALL entries in the metadata engine: %s", utils.RemovePassword(uri))
		if !ctx.Bool("yes") && !userConfirmed() {
			logger.Fatalln("Aborted.")
		}
	}

	objs, err := object.ListAll(ctx.Context, blob, "", "", true, false)
	if err != nil {
		logger.Fatalf("list all objects: %s", err)
	}
	progress := utils.NewProgress(false)
	spin := progress.AddCountSpinner("Deleted objects")
	var failed int
	var dirs []string
	var mu sync.Mutex
	var wg sync.WaitGroup
	for i := 0; i < 8; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for obj := range objs {
				if obj == nil {
					break // failed listing
				}
				if obj.IsDir() {
					mu.Lock()
					dirs = append(dirs, obj.Key())
					mu.Unlock()
					continue
				}
				if err := blob.Delete(ctx.Context, obj.Key()); err == nil {
					spin.Increment()
				} else {
					failed++
					logger.Warnf("delete %s: %s", obj.Key(), err)
				}
			}
		}()
	}
	wg.Wait()
	sort.Strings(dirs)
	for i := len(dirs) - 1; i >= 0; i-- {
		if err := blob.Delete(ctx.Context, dirs[i]); err == nil {
			spin.Increment()
		} else {
			failed++
			logger.Warnf("delete %s: %s", dirs[i], err)
		}
	}
	progress.Done()
	if progress.Quiet {
		logger.Infof("Deleted %d objects", spin.Current())
	}
	if failed > 0 {
		logger.Errorf("%d objects are failed to delete, please do it manually.", failed)
	}

	if err = m.Reset(); err != nil {
		logger.Fatalf("reset meta: %s", err)
	}

	logger.Infof("The volume has been destroyed! You may need to delete cache directory manually.")
	return nil
}


================================================
FILE: cmd/dump.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"compress/gzip"
	"errors"
	"io"
	"os"
	"strings"

	"github.com/DataDog/zstd"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdDump() *cli.Command {
	return &cli.Command{
		Name:      "dump",
		Action:    dump,
		Category:  "ADMIN",
		Usage:     "Dump metadata into a file",
		ArgsUsage: "META-URL [FILE]",
		Description: `
Supports two formats: JSON format and binary format.
1. Dump metadata of the volume in JSON format so users are able to see its content in an easy way.
Output of this command can be loaded later into an empty database, serving as a method to backup
metadata or to change metadata engine.

Examples:
$ juicefs dump redis://localhost meta-dump.json
$ juicefs dump redis://localhost meta-dump.json.gz

# Dump only a subtree of the volume to STDOUT
$ juicefs dump redis://localhost --subdir /dir/in/jfs

2. Binary format is more compact, faster, and memory-efficient.

Examples:
$ juicefs dump redis://localhost meta-dump.bin --binary

Details: https://juicefs.com/docs/community/metadata_dump_load`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "subdir",
				Usage: "only dump a sub-directory",
			},
			&cli.BoolFlag{
				Name:  "keep-secret-key",
				Usage: "keep secret keys intact (WARNING: Be careful as they may be leaked)",
			},
			&cli.IntFlag{
				Name:  "threads",
				Value: 10,
				Usage: "number of threads to dump metadata",
			},
			&cli.BoolFlag{
				Name:  "fast",
				Usage: "speedup dump by load all metadata into memory (only works with JSON format and DB/KV engine)",
			},
			&cli.BoolFlag{
				Name:  "skip-trash",
				Usage: "skip files in trash",
			},
			&cli.BoolFlag{
				Name:  "binary",
				Usage: "dump metadata into a binary file (different from original JSON format, subdir/fast/skip-trash will be ignored)",
			},
		},
	}
}

func dumpMeta(m meta.Meta, dst string, threads int, keepSecret, fast, skipTrash, isBinary bool) (err error) {
	var w io.WriteCloser
	if dst == "" {
		w = os.Stdout
	} else {
		tmp := dst + ".tmp"
		fp, e := os.Create(tmp)
		if e != nil {
			return e
		}
		defer func() {
			err = errors.Join(err, fp.Close())
			if err == nil {
				err = os.Rename(tmp, dst)
			} else {
				_ = os.Remove(tmp)
			}
		}()

		if strings.HasSuffix(dst, ".gz") {
			w, _ = gzip.NewWriterLevel(fp, gzip.BestSpeed)
			defer func() {
				err = errors.Join(err, w.Close())
			}()
		} else if strings.HasSuffix(dst, ".zstd") {
			w = zstd.NewWriterLevel(fp, zstd.BestSpeed)
			defer func() {
				err = errors.Join(err, w.Close())
			}()
		} else {
			w = fp
		}
	}
	if isBinary {
		progress := utils.NewProgress(false)
		defer progress.Done()

		bars := make(map[string]*utils.Bar)
		for _, name := range meta.SegType2Name {
			bars[name] = progress.AddCountSpinner(name)
		}

		return m.DumpMetaV2(meta.Background(), w, &meta.DumpOption{
			KeepSecret: keepSecret,
			Threads:    threads,
			Progress: func(name string, cnt int) {
				bars[name].IncrBy(cnt)
			},
		})
	}
	return m.DumpMeta(w, 1, threads, keepSecret, fast, skipTrash)
}

func dump(ctx *cli.Context) error {
	setup0(ctx, 1, 2)
	metaUri := ctx.Args().Get(0)
	var dst string
	if ctx.Args().Len() > 1 {
		dst = ctx.Args().Get(1)
	}
	removePassword(metaUri)

	metaConf := meta.DefaultConf()
	metaConf.Subdir = ctx.String("subdir")
	m := meta.NewClient(metaUri, metaConf)
	if _, err := m.Load(true); err != nil {
		return err
	}
	if st := m.Chroot(meta.Background(), metaConf.Subdir); st != 0 {
		return st
	}

	threads := ctx.Int("threads")
	if threads <= 0 {
		logger.Warnf("Invalid threads number %d, reset to 1", threads)
		threads = 1
	}

	err := dumpMeta(m, dst, threads, ctx.Bool("keep-secret-key"), ctx.Bool("fast"), ctx.Bool("skip-trash"), ctx.Bool("binary"))
	if err == nil {
		if dst == "" {
			dst = "STDOUT"
		}
		logger.Infof("Dump metadata into %s succeed", dst)
	}
	return err
}


================================================
FILE: cmd/dump_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"os"
	"testing"

	"github.com/redis/go-redis/v9"
)

func TestDumpAndLoad(t *testing.T) {
	metaUrl := "redis://127.0.0.1:6379/15"
	opt, err := redis.ParseURL(metaUrl)
	if err != nil {
		t.Fatalf("ParseURL: %v", err)
	}
	rdb := redis.NewClient(opt)
	rdb.FlushDB(context.Background())

	t.Run("Test Load", func(t *testing.T) {
		loadArgs := []string{"", "load", metaUrl, "./../pkg/meta/metadata.sample"}
		err = Main(loadArgs)
		if err != nil {
			t.Fatalf("load failed: %v", err)
		}
		if rdb.DBSize(context.Background()).Val() == 0 {
			t.Fatalf("load error: %v", err)
		}
	})
	t.Run("Test dump", func(t *testing.T) {
		dumpArgs := []string{"", "dump", metaUrl, "/tmp/dump_test.json.gz"}
		err := Main(dumpArgs)
		if err != nil {
			t.Fatalf("dump error: %v", err)
		}
		_, err = os.Stat("/tmp/dump_test.json.gz")
		if err != nil {
			t.Fatalf("dump error: %v", err)
		}
	})

	rdb.FlushDB(context.Background())
	t.Run("Test load compressed", func(t *testing.T) {
		loadArgs := []string{"", "load", metaUrl, "/tmp/dump_test.json.gz"}
		err := Main(loadArgs)
		if err != nil {
			t.Fatalf("load error: %v", err)
		}
		if rdb.DBSize(context.Background()).Val() == 0 {
			t.Fatalf("load error: %v", err)
		}
	})

	t.Run("Test dump with subdir", func(t *testing.T) {
		dumpArgs := []string{"", "dump", metaUrl, "/tmp/dump_subdir_test.json", "--subdir", "d1"}
		err := Main(dumpArgs)
		if err != nil {
			t.Fatalf("dump error: %v", err)
		}
		_, err = os.Stat("/tmp/dump_subdir_test.json")
		if err != nil {
			t.Fatalf("dump error: %v", err)
		}
	})
	rdb.FlushDB(context.Background())
}


================================================
FILE: cmd/flags.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"path"
	"runtime"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/urfave/cli/v2"
)

func globalFlags() []cli.Flag {
	return []cli.Flag{
		&cli.BoolFlag{
			Name:    "verbose",
			Aliases: []string{"debug", "v"},
			Usage:   "enable debug log",
		},
		&cli.BoolFlag{
			Name:    "quiet",
			Aliases: []string{"q"},
			Usage:   "show warning and errors only",
		},
		&cli.BoolFlag{
			Name:  "trace",
			Usage: "enable trace log",
		},
		&cli.StringFlag{
			Name:   "log-level",
			Usage:  "set log level (trace, debug, info, warn, error, fatal, panic)",
			Hidden: true,
		},
		&cli.StringFlag{
			Name:  "log-id",
			Usage: "append the given log id in log, use \"random\" to use random uuid",
		},
		&cli.BoolFlag{
			Name:  "no-agent",
			Usage: "disable pprof (:6060) agent",
		},
		&cli.StringFlag{
			Name:  "pyroscope",
			Usage: "pyroscope address",
		},
		&cli.BoolFlag{
			Name:  "no-color",
			Usage: "disable colors",
		},
	}
}

func addCategory(f cli.Flag, cat string) {
	switch ff := f.(type) {
	case *cli.StringFlag:
		ff.Category = cat
	case *cli.BoolFlag:
		ff.Category = cat
	case *cli.IntFlag:
		ff.Category = cat
	case *cli.Int64Flag:
		ff.Category = cat
	case *cli.Uint64Flag:
		ff.Category = cat
	case *cli.Float64Flag:
		ff.Category = cat
	case *cli.StringSliceFlag:
		ff.Category = cat
	default:
		panic(f)
	}
}

func addCategories(cat string, flags []cli.Flag) []cli.Flag {
	for _, f := range flags {
		addCategory(f, cat)
	}
	return flags
}

func storageFlags() []cli.Flag {
	return addCategories("DATA STORAGE", []cli.Flag{
		&cli.StringFlag{
			Name:  "storage",
			Usage: "customized storage type (e.g. s3, gs, oss, cos) to access object store",
		},
		&cli.StringFlag{
			Name:  "bucket",
			Usage: "customized endpoint to access object store",
		},
		&cli.StringFlag{
			Name:  "storage-class",
			Usage: "the storage class for data written by current client",
		},
		&cli.StringFlag{
			Name:  "get-timeout",
			Value: "60s",
			Usage: "the timeout to download an object",
		},
		&cli.StringFlag{
			Name:  "put-timeout",
			Value: "60s",
			Usage: "the timeout to upload an object",
		},
		&cli.IntFlag{
			Name:  "io-retries",
			Value: 10,
			Usage: "number of retries after network failure",
		},
		&cli.IntFlag{
			Name:  "max-uploads",
			Value: 20,
			Usage: "number of connections to upload",
		},
		&cli.IntFlag{
			Name:  "max-downloads",
			Value: 200,
			Usage: "number of connections to download",
		},
		&cli.IntFlag{
			Name:  "max-stage-write",
			Value: 1000, // large enough for normal cases, also prevents unlimited concurrency in abnormal cases
			Usage: "number of threads allowed to write staged files, other requests will be uploaded directly (this option is only effective when 'writeback' mode is enabled)",
		},
		&cli.IntFlag{
			Name:  "max-deletes",
			Value: 10,
			Usage: "number of threads to delete objects",
		},
		&cli.StringFlag{
			Name:  "upload-limit",
			Usage: "bandwidth limit for upload in Mbps",
		},
		&cli.StringFlag{
			Name:  "download-limit",
			Usage: "bandwidth limit for download in Mbps",
		},
		&cli.BoolFlag{
			Name: "check-storage",
			// AK/SK should have been checked before creating volume, here checks client access to the storage
			Usage: "test storage before mounting to expose access issues early",
		},
	})
}

func getDefaultCacheDir() string {
	var defaultCacheDir = "/var/jfsCache"
	switch runtime.GOOS {
	case "linux":
		if os.Getuid() == 0 {
			break
		}
		fallthrough
	case "darwin":
		homeDir, err := os.UserHomeDir()
		if err != nil {
			logger.Warnf("%v", err)
			homeDir = defaultCacheDir
		}
		defaultCacheDir = path.Join(homeDir, ".juicefs", "cache")
	case "windows":
		homeDir, err := os.UserHomeDir()
		if err != nil {
			logger.Fatalf("%v", err)
			return ""
		}
		defaultCacheDir = path.Join(homeDir, ".juicefs", "cache")
	}
	return defaultCacheDir
}

func dataCacheFlags() []cli.Flag {
	var defaultCacheDir = getDefaultCacheDir()
	return addCategories("DATA CACHE", []cli.Flag{
		&cli.StringFlag{
			Name:  "buffer-size",
			Value: "300M",
			Usage: "total read/write buffering in MiB",
		},
		&cli.StringFlag{
			Name:  "max-readahead",
			Usage: "max buffering for read ahead in MiB per read session",
		},
		&cli.IntFlag{
			Name:  "prefetch",
			Value: 1,
			Usage: "prefetch N blocks in parallel",
		},
		&cli.BoolFlag{
			Name:  "writeback",
			Usage: "upload blocks in background",
		},
		&cli.StringFlag{
			Name:  "writeback-threshold-size",
			Value: "0",
			Usage: "blocks smaller than this size will be staged, 0 means all staged.",
		},
		&cli.StringFlag{
			Name:  "upload-delay",
			Value: "0s",
			Usage: "delayed duration for uploading blocks",
		},
		&cli.StringFlag{
			Name:  "upload-hours",
			Usage: "(start-end) hour of a day between which the delayed blocks can be uploaded",
		},
		&cli.StringFlag{
			Name:  "cache-dir",
			Value: defaultCacheDir,
			Usage: "directory paths of local cache, use colon to separate multiple paths",
		},
		&cli.StringFlag{
			Name:  "cache-mode",
			Value: "0600", // only owner can read/write cache
			Usage: "file permissions for cached blocks",
		},
		&cli.StringFlag{
			Name:  "cache-size",
			Value: "100G",
			Usage: "size of cached object for read in MiB",
		},
		&cli.Int64Flag{
			Name:  "cache-items",
			Value: 0,
			Usage: "max number of cached items (0 will be automatically calculated based on the `free‑space‑ratio`.)",
		},
		&cli.Float64Flag{
			Name:  "free-space-ratio",
			Value: 0.1,
			Usage: "min free space (ratio)",
		},
		&cli.BoolFlag{
			Name:  "cache-partial-only",
			Usage: "cache only random/small read",
		},
		&cli.BoolFlag{
			Name:  "cache-large-write",
			Usage: "cache full blocks after uploading",
		},
		&cli.StringFlag{
			Name:  "verify-cache-checksum",
			Value: "extend",
			Usage: "checksum level (none, full, shrink, extend)",
		},
		&cli.StringFlag{
			Name:  "cache-eviction",
			Value: chunk.Eviction2Random,
			Usage: fmt.Sprintf("cache eviction policy [%s, %s, %s]", chunk.EvictionNone, chunk.Eviction2Random, chunk.EvictionLRU),
		},
		&cli.StringFlag{
			Name:  "cache-scan-interval",
			Value: "1h",
			Usage: "interval to scan cache-dir to rebuild in-memory index",
		},
		&cli.StringFlag{
			Name:  "cache-expire",
			Value: "0s",
			Usage: "cached blocks not accessed for longer than this option will be automatically evicted (0 means never)",
		},
	})
}

func metaFlags() []cli.Flag {
	return addCategories("META", []cli.Flag{
		&cli.StringFlag{
			Name:  "subdir",
			Usage: "mount a sub-directory as root",
		},
		&cli.StringFlag{
			Name:  "backup-meta",
			Value: "1h",
			Usage: "interval to automatically backup metadata in the object storage (0 means disable backup)",
		},
		&cli.BoolFlag{
			Name:  "backup-skip-trash",
			Usage: "skip files in trash when backup metadata",
		},
		&cli.StringFlag{
			Name:  "heartbeat",
			Value: "12s",
			Usage: "interval to send heartbeat; it's recommended that all clients use the same heartbeat value",
		},
		&cli.BoolFlag{
			Name:  "read-only",
			Usage: "allow lookup/read operations only",
		},
		&cli.BoolFlag{
			Name:  "no-bgjob",
			Usage: "disable background jobs (clean-up, backup, etc.)",
		},
		&cli.StringFlag{
			Name:  "atime-mode",
			Value: "noatime",
			Usage: "when to update atime, supported mode includes: noatime, relatime, strictatime",
		},
		&cli.IntFlag{
			Name:  "skip-dir-nlink",
			Value: 20,
			Usage: "number of retries after which the update of directory nlink will be skipped (used for tkv only, 0 means never)",
		},
		&cli.StringFlag{
			Name:  "skip-dir-mtime",
			Value: "100ms",
			Usage: "skip updating attribute of a directory if the mtime difference is smaller than this value",
		},
		&cli.BoolFlag{
			Name:  "sort-dir",
			Usage: "sort entries within a directory by name",
		},
		&cli.BoolFlag{
			Name:  "fast-statfs",
			Value: false,
			Usage: "Use local counters for statfs instead of querying metadata service",
		},
		&cli.StringFlag{
			Name:  "network-interfaces",
			Usage: "comma-separated list of network interfaces to use for IP discovery (e.g. eth0,en0), empty means all",
		},
	})
}

func clientFlags(defaultEntryCache float64) []cli.Flag {
	return expandFlags(
		metaFlags(),
		metaCacheFlags(defaultEntryCache),
		storageFlags(),
		dataCacheFlags(),
	)
}

func shareInfoFlags() []cli.Flag {
	return addCategories("METRICS", []cli.Flag{
		&cli.StringFlag{
			Name:  "metrics",
			Value: "127.0.0.1:9567",
			Usage: "address to export metrics",
		},
		&cli.StringFlag{
			Name:  "custom-labels",
			Usage: "custom labels for metrics",
		},
		&cli.StringFlag{
			Name:  "consul",
			Value: "127.0.0.1:8500",
			Usage: "consul address to register",
		},
		&cli.BoolFlag{
			Name:  "no-usage-report",
			Usage: "do not send usage report",
		},
	})
}

func metaCacheFlags(defaultEntryCache float64) []cli.Flag {
	return addCategories("META CACHE", []cli.Flag{
		&cli.StringFlag{
			Name:  "attr-cache",
			Value: "1.0s",
			Usage: "attributes cache timeout",
		},
		&cli.StringFlag{
			Name:  "entry-cache",
			Value: fmt.Sprintf("%.1fs", defaultEntryCache),
			Usage: "file entry cache timeout",
		},
		&cli.StringFlag{
			Name:  "dir-entry-cache",
			Value: "1.0s",
			Usage: "dir entry cache timeout",
		},
		&cli.StringFlag{
			Name:  "negative-entry-cache",
			Usage: "cache timeout for negative entry lookups",
		},
		&cli.BoolFlag{
			Name:  "readdir-cache",
			Usage: "enable kernel caching of readdir entries, with timeout controlled by attr-cache flag (require linux kernel 4.20+)",
		},
		&cli.StringFlag{
			Name:  "open-cache",
			Value: "0s",
			Usage: "The cache time to reuse open file without checking update (0 means disable this feature)",
		},
		&cli.Uint64Flag{
			Name:  "open-cache-limit",
			Value: 10000,
			Usage: "max number of open files to cache (soft limit, 0 means unlimited)",
		},
	})
}

func expandFlags(compoundFlags ...[]cli.Flag) []cli.Flag {
	var flags []cli.Flag
	for _, flag := range compoundFlags {
		flags = append(flags, flag...)
	}
	return flags
}


================================================
FILE: cmd/flags_test.go
================================================
package cmd

import (
	"github.com/juicedata/juicefs/pkg/utils"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
)

func Test_duration(t *testing.T) {
	type args struct {
		s string
	}
	tests := []struct {
		name string
		args args
		want time.Duration
	}{
		{
			name: "DurationWithSeconds",
			args: args{s: "60"},
			want: time.Minute,
		},
		{
			name: "DurationWithHours",
			args: args{s: "2h"},
			want: 2 * time.Hour,
		},
		{
			name: "DurationWithDays",
			args: args{s: "1d"},
			want: 24 * time.Hour,
		},
		{
			name: "DurationWithDaysAndTime",
			args: args{s: "1d2h"},
			want: 26 * time.Hour,
		},
		{
			name: "DurationWithInvalidInput",
			args: args{s: "invalid"},
			want: 0,
		},
		{
			name: "DurationWithEmptyString",
			args: args{s: ""},
			want: 0,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			assert.Equalf(t, tt.want, utils.Duration(tt.args.s), "duration(%v)", tt.args.s)
		})
	}
}


================================================
FILE: cmd/format.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bytes"
	"context"
	"crypto/tls"
	"crypto/x509"
	"fmt"
	"io"
	"math/rand"
	"net/http"
	_ "net/http/pprof"
	"net/url"
	"os"
	"path"
	"path/filepath"
	"regexp"
	"runtime"
	"strconv"
	"strings"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/google/uuid"
	"github.com/juicedata/juicefs/pkg/compress"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/pkg/errors"
	"github.com/urfave/cli/v2"
)

func cmdFormat() *cli.Command {
	return &cli.Command{
		Name:      "format",
		Action:    format,
		Category:  "ADMIN",
		Usage:     "Format a volume",
		ArgsUsage: "META-URL NAME",
		Description: `
Create a new JuiceFS volume. Here META-URL is used to set up the metadata engine (Redis, TiKV, MySQL, etc.),
and NAME is the prefix of all objects in data storage.

DEPRECATED: It was also used to change configuration of an existing volume, but now this function is
deprecated, instead please use the "config" command.

Examples:
# Create a simple test volume (data will be stored in a local directory)
$ juicefs format sqlite3://myjfs.db myjfs

# Create a volume with Redis and S3
$ juicefs format redis://localhost myjfs --storage s3 --bucket https://mybucket.s3.us-east-2.amazonaws.com

# Create a volume with password protected MySQL
$ juicefs format mysql://jfs:mypassword@(127.0.0.1:3306)/juicefs myjfs
# A safer alternative
$ META_PASSWORD=mypassword juicefs format mysql://jfs:@(127.0.0.1:3306)/juicefs myjfs

# Create a volume with "quota" enabled
$ juicefs format sqlite3://myjfs.db myjfs --inodes 1000000 --capacity 102400

# Create a volume with "trash" disabled
$ juicefs format sqlite3://myjfs.db myjfs --trash-days 0

Details: https://juicefs.com/docs/community/quick_start_guide`,
		Flags: expandFlags(
			formatStorageFlags(),
			formatFlags(),
			formatManagementFlags(),
			[]cli.Flag{
				&cli.BoolFlag{
					Name:  "force",
					Usage: "overwrite existing format",
				},
				&cli.BoolFlag{
					Name:  "no-update",
					Usage: "don't update existing volume",
				},
			}),
	}
}

func formatStorageFlags() []cli.Flag {
	var defaultBucket = "/var/jfs"
	switch runtime.GOOS {
	case "linux":
		if os.Getuid() == 0 {
			break
		}
		fallthrough
	case "darwin":
		homeDir, err := os.UserHomeDir()
		if err != nil {
			logger.Warn(err)
			homeDir = defaultBucket
		}
		defaultBucket = path.Join(homeDir, ".juicefs", "local")
	case "windows":
		defaultBucket = path.Join("C:/jfs/local")
	}
	return addCategories("DATA STORAGE", []cli.Flag{
		&cli.StringFlag{
			Name:  "storage",
			Value: "file",
			Usage: "object storage type (e.g. s3, gs, oss, cos)",
		},
		&cli.StringFlag{
			Name:  "bucket",
			Value: defaultBucket,
			Usage: "the bucket URL of object storage to store data",
		},
		&cli.StringFlag{
			Name:  "access-key",
			Usage: "access key for object storage (env ACCESS_KEY)",
		},
		&cli.StringFlag{
			Name:  "secret-key",
			Usage: "secret key for object storage (env SECRET_KEY)",
		},
		&cli.StringFlag{
			Name:  "session-token",
			Usage: "session token for object storage",
		},
		&cli.StringFlag{
			Name:  "storage-class",
			Usage: "the default storage class",
		},
	})
}

func formatFlags() []cli.Flag {
	return addCategories("DATA FORMAT", []cli.Flag{
		&cli.StringFlag{
			Name:  "block-size",
			Value: "4M",
			Usage: "size of block in KiB",
		},
		&cli.StringFlag{
			Name:  "compress",
			Value: "none",
			Usage: "compression algorithm (lz4, zstd, none)",
		},
		&cli.StringFlag{
			Name:  "encrypt-rsa-key",
			Usage: "a path to RSA private key (PEM)",
		},
		&cli.StringFlag{
			Name:  "encrypt-algo",
			Usage: "encrypt algorithm (aes256gcm-rsa, chacha20-rsa)",
			Value: object.AES256GCM_RSA,
		},
		&cli.BoolFlag{
			Name:  "hash-prefix",
			Usage: "add a hash prefix to name of objects",
		},
		&cli.IntFlag{
			Name:  "shards",
			Usage: "store the blocks into N buckets by hash of key",
		},
	})
}

func formatManagementFlags() []cli.Flag {
	return addCategories("MANAGEMENT", []cli.Flag{
		&cli.StringFlag{
			Name:  "capacity",
			Usage: "hard quota of the volume limiting its usage of space in GiB",
		},
		&cli.Uint64Flag{
			Name:  "inodes",
			Usage: "hard quota of the volume limiting its number of inodes",
		},
		&cli.IntFlag{
			Name:  "trash-days",
			Value: 1,
			Usage: "number of days after which removed files will be permanently deleted",
		},
		&cli.BoolFlag{
			Name:  "enable-acl",
			Usage: "enable POSIX ACL (this flag is irreversible once enabled)",
		},
		&cli.StringFlag{
			Name:  "ranger-rest-url",
			Usage: "URL of the RangerAdmin",
		},
		&cli.StringFlag{
			Name:  "ranger-service",
			Usage: "Name of the Ranger service used For JuiceFS",
		},
		&cli.StringFlag{
			Name:  "kerberos-config-file",
			Usage: "Path to Kerberos configuration file",
		},
	})
}

func fixObjectSize(s uint64) uint64 {
	const min, max = 64 << 10, 16 << 20
	var bits uint
	for s > 1 {
		bits++
		s >>= 1
	}
	s = s << bits
	if s < min {
		logger.Warnf("block size is too small: %s, use %s instead", humanize.IBytes(s), humanize.IBytes(min))
		s = min
	} else if s > max {
		logger.Warnf("block size is too large: %s, use %s instead", humanize.IBytes(s), humanize.IBytes(max))
		s = max
	}
	return s
}

func createStorage(format meta.Format) (object.ObjectStorage, error) {

	if err := format.Decrypt(); err != nil {
		return nil, fmt.Errorf("format decrypt: %s", err)
	}
	object.UserAgent = "JuiceFS-" + version.Version()
	var blob object.ObjectStorage
	var err error
	if u, err := url.Parse(format.Bucket); err == nil {
		values := u.Query()
		if values.Get("tls-insecure-skip-verify") != "" {
			var tlsSkipVerify bool
			if tlsSkipVerify, err = strconv.ParseBool(values.Get("tls-insecure-skip-verify")); err != nil {
				return nil, err
			}
			object.GetHttpClient().Transport.(*http.Transport).TLSClientConfig.InsecureSkipVerify = tlsSkipVerify
			values.Del("tls-insecure-skip-verify")
			u.RawQuery = values.Encode()
			format.Bucket = u.String()
		}

		// Configure client TLS when params are provided
		if values.Get("ca-certs") != "" && values.Get("ssl-cert") != "" && values.Get("ssl-key") != "" {

			clientTLSCert, err := tls.LoadX509KeyPair(values.Get("ssl-cert"), values.Get("ssl-key"))
			if err != nil {
				return nil, fmt.Errorf("error loading certificate and key file: %s", err.Error())
			}

			certPool := x509.NewCertPool()
			caCertPEM, err := os.ReadFile(values.Get("ca-certs"))
			if err != nil {
				return nil, fmt.Errorf("error loading CA cert file: %s", err.Error())
			}

			if certAdded := certPool.AppendCertsFromPEM(caCertPEM); !certAdded {
				return nil, fmt.Errorf("error appending CA cert to pool")
			}

			object.GetHttpClient().Transport.(*http.Transport).TLSClientConfig.RootCAs = certPool
			object.GetHttpClient().Transport.(*http.Transport).TLSClientConfig.Certificates = []tls.Certificate{clientTLSCert}
		}
	}

	if format.Shards > 1 {
		blob, err = object.NewSharded(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey, format.SessionToken, format.Shards)
	} else {
		blob, err = object.CreateStorage(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey, format.SessionToken)
	}
	if err != nil {
		return nil, err
	}
	blob = object.WithPrefix(blob, format.Name+"/")
	if format.StorageClass != "" {
		if os, ok := blob.(object.SupportStorageClass); ok {
			err := os.SetStorageClass(format.StorageClass)
			if err != nil {
				logger.Warnf("set storage class %q: %v", format.StorageClass, err)
			}
		} else {
			logger.Warnf("Storage class is not supported by %q, will ignore", format.Storage)
		}
	}
	if format.EncryptKey != "" {
		privKey, err := object.ParsePrivateKeyFromPem([]byte(format.EncryptKey), []byte(os.Getenv("JFS_RSA_PASSPHRASE")))
		if err != nil {
			if errors.Is(err, object.ErrKeyNeedPasswd) {
				return nil, fmt.Errorf("%w: please set the 'JFS_RSA_PASSPHRASE' environment variable", err)
			}
			return nil, fmt.Errorf("parse private key: %s", err)
		}
		encryptor, err := object.NewDataEncryptor(object.NewKeyEncryptor(privKey), format.EncryptAlgo)
		if err != nil {
			return nil, err
		}
		blob = object.NewEncrypted(blob, encryptor)
	}
	return blob, nil
}

var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789")

func randSeq(n int) string {
	b := make([]rune, n)
	r := rand.New(rand.NewSource(time.Now().UnixNano()))
	for i := range b {
		b[i] = letters[r.Intn(len(letters))]
	}
	return string(b)
}

func doTesting(store object.ObjectStorage, key string, data []byte) error {
	ctx := context.Background()
	if err := store.Put(ctx, key, bytes.NewReader(data)); err != nil {
		if strings.Contains(strings.ToLower(err.Error()), "denied") {
			return fmt.Errorf("Failed to put: %s", err)
		}
		if err2 := store.Create(ctx); err2 != nil {
			if strings.Contains(err.Error(), "NoSuchBucket") {
				return fmt.Errorf("Failed to create bucket %s: %s, previous error: %s\nPlease create bucket %s manually, then format again.",
					store, err2, err, store)
			} else {
				return fmt.Errorf("Failed to create bucket %s: %s, previous error: %s",
					store, err2, err)
			}
		}
		if err := store.Put(ctx, key, bytes.NewReader(data)); err != nil {
			return fmt.Errorf("Failed to put: %s", err)
		}
	}
	p, err := store.Get(ctx, key, 0, -1)
	if err != nil {
		return fmt.Errorf("Failed to get: %s", err)
	}
	data2, err := io.ReadAll(p)
	_ = p.Close()
	if err != nil {
		return err
	}
	if !bytes.Equal(data, data2) {
		return fmt.Errorf("read wrong data: expected %x, got %x", data, data2)
	}
	err = store.Delete(ctx, key)
	if err != nil {
		// it's OK to don't have delete permission, but we should warn user explicitly
		logger.Warnf("Failed to delete, err: %s", err)
	}
	return nil
}

func test(store object.ObjectStorage) error {
	key := "testing/" + randSeq(10)
	data := make([]byte, 100)
	utils.RandRead(data)
	nRetry := 3
	var err error
	for i := 0; i < nRetry; i++ {
		err = doTesting(store, key, data)
		if err == nil {
			break
		}
		logger.Warnf("Test storage %s failed: %s, tries: #%d", store, err, i+1)
		time.Sleep(time.Second * time.Duration(i*3+1))
	}
	if err == nil {
		_ = store.Delete(ctx, "testing/")
	}
	return err
}

func loadEncrypt(keyPath string) string {
	if keyPath == "" {
		return ""
	}
	pem, err := os.ReadFile(keyPath)
	if err != nil {
		logger.Fatalf("load RSA key from %s: %s", keyPath, err)
	}
	return string(pem)
}

func readKerbConf(file string) string {
	if file == "" {
		return ""
	}
	data, err := os.ReadFile(file)
	if err != nil {
		logger.Fatalf("load Kerberos config from %s: %s", file, err)
	}
	return string(data)
}

func format(c *cli.Context) error {
	setup(c, 2)
	removePassword(c.Args().Get(0))
	m := meta.NewClient(c.Args().Get(0), nil)
	name := c.Args().Get(1)
	validName := regexp.MustCompile(`^[a-z0-9][a-z0-9\-]{1,61}[a-z0-9]$`)
	if !validName.MatchString(name) {
		logger.Fatalf("invalid name: %s, only alphabet, number and - are allowed, and the length should be 3 to 63 characters.", name)
	}
	if v := c.String("compress"); compress.NewCompressor(v) == nil {
		logger.Fatalf("Unsupported compress algorithm: %s", v)
	}
	if v := c.Int("trash-days"); v < 0 {
		logger.Fatalf("Invalid trash days: %d", v)
	}
	if v := c.Int("shards"); v > 256 {
		logger.Fatalf("too many shards: %d", v)
	}

	var create, encrypted bool
	format, err := m.Load(false)
	if err == nil {
		if c.Bool("no-update") {
			return nil
		}
		format.Name = name
		for _, flag := range c.LocalFlagNames() {
			switch flag {
			case "capacity":
				format.Capacity = utils.ParseBytes(c, flag, 'G')
			case "inodes":
				format.Inodes = c.Uint64(flag)
			case "bucket":
				format.Bucket = c.String(flag)
			case "access-key":
				format.AccessKey = c.String(flag)
			case "secret-key":
				encrypted = format.KeyEncrypted
				if err := format.Decrypt(); err != nil && strings.Contains(err.Error(), "secret was removed") {
					logger.Warnf("decrypt secrets: %s", err)
				}
				format.SecretKey = c.String(flag)
			case "session-token":
				encrypted = format.KeyEncrypted
				if err := format.Decrypt(); err != nil && strings.Contains(err.Error(), "secret was removed") {
					logger.Warnf("decrypt secrets: %s", err)
				}
				format.SessionToken = c.String(flag)
			case "trash-days":
				format.TrashDays = c.Int(flag)
			case "block-size":
				format.BlockSize = int(fixObjectSize(utils.ParseBytes(c, flag, 'K')) >> 10)
			case "compress":
				format.Compression = c.String(flag)
			case "shards":
				format.Shards = c.Int(flag)
			case "hash-prefix":
				format.HashPrefix = c.Bool(flag)
			case "storage":
				format.Storage = c.String(flag)
			case "encrypt-rsa-key", "encrypt-algo":
				logger.Warnf("Flag %s is ignored since it cannot be updated", flag)
			case "ranger-rest-url":
				format.RangerRestUrl = c.String(flag)
			case "ranger-service":
				format.RangerService = c.String(flag)
			case "kerberos-config-file":
				format.KerbConf = readKerbConf(c.String(flag))
			}
		}
	} else if strings.HasPrefix(err.Error(), "database is not formatted") {
		create = true
		format = &meta.Format{
			Name:             name,
			UUID:             uuid.New().String(),
			Storage:          c.String("storage"),
			StorageClass:     c.String("storage-class"),
			Bucket:           c.String("bucket"),
			AccessKey:        c.String("access-key"),
			SecretKey:        c.String("secret-key"),
			SessionToken:     c.String("session-token"),
			EncryptKey:       loadEncrypt(c.String("encrypt-rsa-key")),
			EncryptAlgo:      c.String("encrypt-algo"),
			Shards:           c.Int("shards"),
			HashPrefix:       c.Bool("hash-prefix"),
			Capacity:         utils.ParseBytes(c, "capacity", 'G'),
			Inodes:           c.Uint64("inodes"),
			BlockSize:        int(fixObjectSize(utils.ParseBytes(c, "block-size", 'K')) >> 10),
			Compression:      c.String("compress"),
			TrashDays:        c.Int("trash-days"),
			DirStats:         true,
			UserGroupQuota:   false,
			MetaVersion:      meta.MaxVersion,
			MinClientVersion: "1.1.0-A",
			EnableACL:        c.Bool("enable-acl"),
			RangerRestUrl:    c.String("ranger-rest-url"),
			RangerService:    c.String("ranger-service"),
			KerbConf:         readKerbConf(c.String("kerberos-config-file")),
		}
		if format.EnableACL {
			format.MinClientVersion = "1.2.0-A"
		}
		if format.RangerRestUrl != "" || format.RangerService != "" {
			format.MinClientVersion = "1.3.0-A"
		}
		if format.KerbConf != "" {
			format.MinClientVersion = "1.4.0-A"
		}

		if format.AccessKey == "" && os.Getenv("ACCESS_KEY") != "" {
			format.AccessKey = os.Getenv("ACCESS_KEY")
			_ = os.Unsetenv("ACCESS_KEY")
		}
		if format.SecretKey == "" && os.Getenv("SECRET_KEY") != "" {
			format.SecretKey = os.Getenv("SECRET_KEY")
			_ = os.Unsetenv("SECRET_KEY")
		}
		if format.SessionToken == "" && os.Getenv("SESSION_TOKEN") != "" {
			format.SessionToken = os.Getenv("SESSION_TOKEN")
			_ = os.Unsetenv("SESSION_TOKEN")
		}
	} else {
		logger.Fatalf("Load metadata: %s", err)
	}
	if format.Storage == "file" || format.Storage == "sqlite3" {
		p, err := filepath.Abs(format.Bucket)
		if err == nil {
			format.Bucket = p
		} else {
			logger.Fatalf("Failed to get absolute path of %s: %s", format.Bucket, err)
		}
		if format.Storage == "file" {
			format.Bucket += "/"
		}
	}

	blob, err := createStorage(*format)
	if err != nil {
		logger.Fatalf("object storage: %s", err)
	}
	logger.Infof("Data use %s", blob)
	if os.Getenv("JFS_NO_CHECK_OBJECT_STORAGE") == "" {
		if err := test(blob); err != nil {
			logger.Fatalf("Storage %s is not configured correctly: %s", blob, err)
		}
		if create {
			if objs, err := object.ListAll(c.Context, blob, "", "", true, false); err == nil {
				for o := range objs {
					if o == nil {
						logger.Warnf("List storage %s failed", blob)
						break
					} else if o.IsDir() || o.Size() == 0 {
						continue
					} else if o.Key() != "testing" && !strings.HasPrefix(o.Key(), "testing/") {
						logger.Fatalf("Storage %s is not empty; please clean it up or pick another volume name", blob)
					}
				}
			} else {
				logger.Warnf("List storage %s failed: %s", blob, err)
			}
			if err = blob.Put(ctx, "juicefs_uuid", strings.NewReader(format.UUID)); err != nil {
				logger.Warnf("Put uuid object: %s", err)
			}
		}
	}

	if create || encrypted {
		if err = format.Encrypt(); err != nil {
			logger.Fatalf("Format encrypt: %s", err)
		}
	}
	if err = m.Init(format, c.Bool("force")); err != nil {
		if create {
			_ = blob.Delete(ctx, "juicefs_uuid")
		}
		logger.Fatalf("format: %s", err)
	}
	logger.Infof("Volume is formatted as %s", format)
	return nil
}


================================================
FILE: cmd/format_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"encoding/json"
	"testing"

	"github.com/juicedata/juicefs/pkg/meta"
)

func TestFixObjectSize(t *testing.T) {
	t.Run("Should make sure the size is in range", func(t *testing.T) {
		cases := []struct {
			input, expected uint64
		}{
			{30 << 10, 64 << 10},
			{0, 64 << 10},
			{2 << 40, 16 << 20},
			{16 << 21, 16 << 20},
		}
		for _, c := range cases {
			if size := fixObjectSize(c.input); size != c.expected {
				t.Fatalf("Expected %d, got %d", c.expected, size)
			}
		}
	})
	t.Run("Should use powers of two", func(t *testing.T) {
		cases := []struct {
			input, expected uint64
		}{
			{150 << 10, 128 << 10},
			{99 << 10, 64 << 10},
			{1077 << 10, 1024 << 10},
		}
		for _, c := range cases {
			if size := fixObjectSize(c.input); size != c.expected {
				t.Fatalf("Expected %d, got %d", c.expected, size)
			}
		}
	})
}

func TestFormat(t *testing.T) {
	rdb := resetTestMeta()
	if err := Main([]string{"", "format", "--bucket", t.TempDir(), testMeta, testVolume}); err != nil {
		t.Fatalf("format error: %s", err)
	}
	body, err := rdb.Get(context.Background(), "setting").Bytes()
	if err != nil {
		t.Fatalf("get setting: %s", err)
	}
	f := meta.Format{}
	if err = json.Unmarshal(body, &f); err != nil {
		t.Fatalf("json unmarshal: %s", err)
	}
	if f.Name != testVolume {
		t.Fatalf("volume name %s != expected %s", f.Name, testVolume)
	}

	if err = Main([]string{"", "format", testMeta, testVolume, "--capacity", "1", "--inodes", "1000"}); err != nil {
		t.Fatalf("format error: %s", err)
	}
	if body, err = rdb.Get(context.Background(), "setting").Bytes(); err != nil {
		t.Fatalf("get setting: %s", err)
	}
	if err = json.Unmarshal(body, &f); err != nil {
		t.Fatalf("json unmarshal: %s", err)
	}
	if f.Capacity != 1<<30 || f.Inodes != 1000 {
		t.Fatalf("unexpected volume: %+v", f)
	}
}


================================================
FILE: cmd/fsck.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"sort"
	"strconv"
	"strings"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"

	"github.com/urfave/cli/v2"
)

func cmdFsck() *cli.Command {
	return &cli.Command{
		Name:      "fsck",
		Action:    fsck,
		Category:  "ADMIN",
		Usage:     "Check consistency of a volume",
		ArgsUsage: "META-URL",
		Description: `
It scans all objects in data storage and slices in metadata, comparing them to see if there is any
lost object or broken file.

Examples:
$ juicefs fsck redis://localhost

# Repair broken directories
$ juicefs fsck redis://localhost --path /d1/d2 --repair

# recursively check
$ juicefs fsck redis://localhost --path /d1/d2 --recursive`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "path",
				Usage: "absolute path within JuiceFS to check",
			},
			&cli.BoolFlag{
				Name:  "repair",
				Usage: "repair specified path if it's broken",
			},
			&cli.BoolFlag{
				Name:    "recursive",
				Aliases: []string{"r"},
				Usage:   "recursively check or repair",
			},
			&cli.BoolFlag{
				Name:  "sync-dir-stat",
				Usage: "sync stat of all directories, even if they are existed and not broken (NOTE: it may take a long time for huge trees)",
			},
			&cli.StringFlag{
				Name:  "repair-dir-mode",
				Value: "0755",
				Usage: "permission mode for repaired directories (octal, e.g., 0755)",
			},
		},
	}
}

func fsck(ctx *cli.Context) error {
	setup(ctx, 1)
	if ctx.Bool("repair") && ctx.String("path") == "" {
		logger.Fatalf("Please provide the path to repair with `--path` option")
	}
	removePassword(ctx.Args().Get(0))
	m := meta.NewClient(ctx.Args().Get(0), nil)
	format, err := m.Load(true)
	if err != nil {
		logger.Fatalf("load setting: %s", err)
	}
	var c = meta.NewContext(0, 0, []uint32{0})
	progress := utils.NewProgress(false)
	// prepare slices
	sliceCSpin := progress.AddCountSpinner("Listed slices")
	slices := make(map[meta.Ino][]meta.Slice)
	path := ctx.String("path")
	repairDirMode, err := strconv.ParseUint(ctx.String("repair-dir-mode"), 8, 16) // base 8 (octal), 16-bit result
	if err != nil {
		logger.Fatalf("invalid repair-dir-mode: %s", err)
	}
	if path != "" {
		if !strings.HasPrefix(path, "/") {
			logger.Fatalf("File path should be the absolute path within JuiceFS")
		}
		err := m.Check(c, path, &meta.CheckOpt{
			Repair:        ctx.Bool("repair"),
			Recursive:     ctx.Bool("recursive"),
			SyncDirStat:   ctx.Bool("sync-dir-stat"),
			RepairDirMode: uint16(repairDirMode),
			ShowProgress:  sliceCSpin.IncrBy,
			Slices:        slices,
		})
		if err != nil {
			logger.Fatalf("check: %s", err)
		}
	} else {
		r := m.ListSlices(c, slices, false, false, sliceCSpin.Increment)
		if r != 0 {
			logger.Fatalf("list all slices: %s", r)
		}
	}
	sliceCSpin.Done()

	chunkConf := *getDefaultChunkConf(format)
	chunkConf.CacheDir = "memory"

	blob, err := createStorage(*format)
	if err != nil {
		logger.Fatalf("object storage: %s", err)
	}
	logger.Infof("Data use %s", blob)
	blob = object.WithPrefix(blob, "chunks/")

	// Find all blocks in object storage
	blockDSpin := progress.AddDoubleSpinner("Found blocks")
	var blocks = make(map[string]int64)
	if path == "" {
		objs, err := object.ListAll(ctx.Context, blob, "", "", true, false)
		if err != nil {
			logger.Fatalf("list all blocks: %s", err)
		}
		for obj := range objs {
			if obj == nil {
				break // failed listing
			}
			if obj.IsDir() {
				continue
			}

			logger.Debugf("found block %s", obj.Key())
			parts := strings.Split(obj.Key(), "/")
			if len(parts) != 3 {
				continue
			}
			name := parts[2]
			blocks[name] = obj.Size()
			blockDSpin.IncrInt64(obj.Size())
		}
		blockDSpin.Done()
	}
	delfiles := make(map[meta.Ino]bool)
	err = m.ScanDeletedObject(c, nil, nil, nil, func(ino meta.Ino, size uint64, ts int64) (clean bool, err error) {
		delfiles[ino] = true
		return false, nil
	})
	if err != nil {
		logger.Warnf("scan deleted objects: %s", err)
	}
	// Scan all slices to find lost blocks
	delfilesSpin := progress.AddCountSpinner("Deleted files")
	skippedSlices := progress.AddCountSpinner("Skipped slices")
	sliceCBar := progress.AddCountBar("Scanned slices", sliceCSpin.Current())
	sliceBSpin := progress.AddByteSpinner("Scanned slices")
	lostDSpin := progress.AddDoubleSpinner("Lost blocks")
	brokens := make(map[meta.Ino]string)
	for inode, ss := range slices {
		if delfiles[inode] {
			delfilesSpin.Increment()
			skippedSlices.IncrBy(len(ss))
			continue
		}
		for _, s := range ss {
			n := (s.Size - 1) / uint32(chunkConf.BlockSize)
			for i := uint32(0); i <= n; i++ {
				sz := chunkConf.BlockSize
				if i == n {
					sz = int(s.Size) - int(i)*chunkConf.BlockSize
				}
				key := fmt.Sprintf("%d_%d_%d", s.Id, i, sz)
				if _, ok := blocks[key]; !ok {
					var objKey string
					if format.HashPrefix {
						objKey = fmt.Sprintf("%02X/%v/%s", s.Id%256, s.Id/1000/1000, key)
					} else {
						objKey = fmt.Sprintf("%v/%v/%s", s.Id/1000/1000, s.Id/1000, key)
					}
					obj, err := blob.Head(ctx.Context, objKey)
					if err != nil {
						if _, ok := brokens[inode]; !ok {
							if ps := m.GetPaths(meta.Background(), inode); len(ps) > 0 {
								brokens[inode] = ps[0]
							} else {
								brokens[inode] = fmt.Sprintf("inode:%d", inode)
							}
						}
						logger.Errorf("can't find block %s for file %s: %s", objKey, brokens[inode], err)
						lostDSpin.IncrInt64(int64(sz))
						continue
					}
					blockDSpin.IncrInt64(obj.Size())
				}
			}
			sliceCBar.Increment()
			sliceBSpin.IncrInt64(int64(s.Size))
		}
	}
	progress.Done()
	if progress.Quiet {
		c, b := blockDSpin.Current()
		logger.Infof("Found %d blocks (%d bytes)", c, b)
		logger.Infof("Used by %d slices (%d bytes)", sliceCBar.Current(), sliceBSpin.Current())
	}
	if lc, lb := lostDSpin.Current(); lc > 0 {
		msg := fmt.Sprintf("%d objects are lost (%d bytes), %d broken files:\n", lc, lb, len(brokens))
		msg += fmt.Sprintf("%13s: PATH\n", "INODE")
		var fileList []string
		for i, p := range brokens {
			fileList = append(fileList, fmt.Sprintf("%13d: %s", i, p))
		}
		sort.Strings(fileList)
		msg += strings.Join(fileList, "\n")
		logger.Fatal(msg)
	}

	return nil
}


================================================
FILE: cmd/fsck_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"testing"
)

func TestFsck(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	for i := 0; i < 10; i++ {
		filename := fmt.Sprintf("%s/f%d.txt", testMountPoint, i)
		if err := os.WriteFile(filename, []byte("test"), 0644); err != nil {
			t.Fatalf("write file failed: %s", err)
		}
	}
	if err := Main([]string{"", "fsck", testMeta}); err != nil {
		t.Fatalf("fsck failed: %s", err)
	}
	if err := Main([]string{"", "fsck", testMeta, "--path", "/f3.txt"}); err != nil {
		t.Fatalf("fsck failed: %s", err)
	}
}

func TestFsckRepairDirMode(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	if err := os.MkdirAll(testMountPoint+"/testdir", 0755); err != nil {
		t.Fatalf("mkdir failed: %s", err)
	}

	if err := Main([]string{"", "fsck", testMeta, "--path", "/testdir", "--repair-dir-mode", "0700"}); err != nil {
		t.Fatalf("fsck with repair-dir-mode 0700 failed: %s", err)
	}

	if err := Main([]string{"", "fsck", testMeta, "--path", "/testdir", "--repair-dir-mode", "0755"}); err != nil {
		t.Fatalf("fsck with repair-dir-mode 0755 failed: %s", err)
	}
}


================================================
FILE: cmd/gateway.go
================================================
//go:build !nogateway
// +build !nogateway

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"errors"
	_ "net/http/pprof"
	"os"
	"os/signal"
	"path"
	"strconv"
	"syscall"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"

	jfsgateway "github.com/juicedata/juicefs/pkg/gateway"
	"github.com/urfave/cli/v2"

	mcli "github.com/minio/cli"
	minio "github.com/minio/minio/cmd"
)

func cmdGateway() *cli.Command {
	selfFlags := []cli.Flag{
		&cli.StringFlag{
			Name:  "log",
			Usage: "path for gateway log",
			Value: path.Join(getDefaultLogDir(), "juicefs-gateway.log"),
		},
		&cli.StringFlag{
			Name:  "access-log",
			Usage: "path for JuiceFS access log",
		},
		&cli.BoolFlag{
			Name:    "background",
			Aliases: []string{"d"},
			Usage:   "run in background",
		},
		&cli.BoolFlag{
			Name:  "no-banner",
			Usage: "disable MinIO startup information",
		},
		&cli.BoolFlag{
			Name:  "multi-buckets",
			Usage: "use top level of directories as buckets",
		},
		&cli.BoolFlag{
			Name:  "keep-etag",
			Usage: "keep the ETag for uploaded objects",
		},
		&cli.StringFlag{
			Name:  "umask",
			Value: "022",
			Usage: "umask for new files and directories in octal",
		},
		&cli.BoolFlag{
			Name:  "object-tag",
			Usage: "enable object tagging api",
		},
		&cli.BoolFlag{
			Name:  "object-meta",
			Usage: "enable object metadata api",
		},
		&cli.BoolFlag{
			Name:  "head-dir",
			Usage: "allow HEAD request on directories",
		},
		&cli.BoolFlag{
			Name:  "hide-dir-object",
			Usage: "hide the directories created by PUT Object API",
		},
		&cli.StringFlag{
			Name:  "domain",
			Usage: "domain for virtual-host-style requests",
		},
		&cli.StringFlag{
			Name:  "refresh-iam-interval",
			Value: "5m",
			Usage: "interval to reload gateway IAM from configuration",
		},
		&cli.StringFlag{
			Name:  "mountpoint",
			Value: "s3gateway",
			Usage: "the mount point for current volume (to follow symlink)",
		},
	}

	return &cli.Command{
		Name:      "gateway",
		Action:    gateway,
		Category:  "SERVICE",
		Usage:     "Start an S3-compatible gateway",
		ArgsUsage: "META-URL ADDRESS",
		Description: `
It is implemented based on the MinIO S3 Gateway. Before starting the gateway, you need to set
MINIO_ROOT_USER and MINIO_ROOT_PASSWORD environment variables, which are the access key and secret
key used for accessing S3 APIs.

Examples:
$ export MINIO_ROOT_USER=admin
$ export MINIO_ROOT_PASSWORD=12345678
$ juicefs gateway redis://localhost localhost:9000

Details: https://juicefs.com/docs/community/s3_gateway`,
		Flags: expandFlags(selfFlags, clientFlags(0), shareInfoFlags()),
	}
}

func gateway(c *cli.Context) error {
	setup(c, 2)
	ak := os.Getenv("MINIO_ROOT_USER")
	if ak == "" {
		ak = os.Getenv("MINIO_ACCESS_KEY")
	}
	if len(ak) < 3 {
		logger.Fatalf("MINIO_ROOT_USER should be specified as an environment variable with at least 3 characters")
	}
	sk := os.Getenv("MINIO_ROOT_PASSWORD")
	if sk == "" {
		sk = os.Getenv("MINIO_SECRET_KEY")
	}
	if len(sk) < 8 {
		logger.Fatalf("MINIO_ROOT_PASSWORD should be specified as an environment variable with at least 8 characters")
	}
	if c.IsSet("domain") {
		os.Setenv("MINIO_DOMAIN", c.String("domain"))
	}

	if c.IsSet("refresh-iam-interval") {
		os.Setenv("MINIO_REFRESH_IAM_INTERVAL", c.String("refresh-iam-interval"))
	}

	metaAddr := c.Args().Get(0)
	listenAddr := c.Args().Get(1)
	conf, jfs := initForSvc(c, c.String("mountpoint"), "s3gateway", metaAddr, listenAddr)

	umask, err := strconv.ParseUint(c.String("umask"), 8, 16)
	if err != nil {
		logger.Fatalf("invalid umask %s: %s", c.String("umask"), err)
	}

	readonly := c.Bool("read-only")
	jfsGateway, err = jfsgateway.NewJFSGateway(
		jfs,
		conf,
		&jfsgateway.Config{
			MultiBucket: c.Bool("multi-buckets"),
			KeepEtag:    c.Bool("keep-etag"),
			Umask:       uint16(umask),
			ObjTag:      c.Bool("object-tag"),
			ObjMeta:     c.Bool("object-meta"),
			HeadDir:     c.Bool("head-dir"),
			HideDir:     c.Bool("hide-dir-object"),
			ReadOnly:    readonly,
		},
	)
	if err != nil {
		return err
	}
	if readonly {
		os.Setenv("JUICEFS_META_READ_ONLY", "1")
	} else {
		if _, err := jfsGateway.GetBucketInfo(context.Background(), minio.MinioMetaBucket); errors.As(err, &minio.BucketNotFound{}) {
			if err := jfsGateway.MakeBucketWithLocation(context.Background(), minio.MinioMetaBucket, minio.BucketOptions{}); err != nil {
				logger.Fatalf("init MinioMetaBucket error %s: %s", minio.MinioMetaBucket, err)
			}
		}
	}

	args := []string{"server", "--address", listenAddr, "--anonymous"}
	if c.Bool("no-banner") {
		args = append(args, "--quiet")
	}
	app := &mcli.App{
		Action: gateway2,
		Flags: []mcli.Flag{
			mcli.StringFlag{
				Name:  "address",
				Value: ":9000",
				Usage: "bind to a specific ADDRESS:PORT, ADDRESS can be an IP or hostname",
			},
			mcli.BoolFlag{
				Name:  "anonymous",
				Usage: "hide sensitive information from logging",
			},
			mcli.BoolFlag{
				Name:  "json",
				Usage: "output server logs and startup information in json format",
			},
			mcli.BoolFlag{
				Name:  "quiet",
				Usage: "disable MinIO startup information",
			},
		},
	}
	return app.Run(args)
}

var jfsGateway minio.ObjectLayer

func gateway2(ctx *mcli.Context) error {
	minio.ServerMainForJFS(ctx, jfsGateway)
	return nil
}

func initForSvc(c *cli.Context, mp string, svcType, metaUrl, listenAddr string) (*vfs.Config, *fs.FileSystem) {
	removePassword(metaUrl)
	metaConf := getMetaConf(c, mp, c.Bool("read-only"))
	metaCli := meta.NewClient(metaUrl, metaConf)
	if c.Bool("background") {
		if err := makeDaemonForSvc(c, metaCli, metaUrl, listenAddr); err != nil {
			logger.Fatalf("make daemon: %s", err)
		}
	}

	format, err := metaCli.Load(true)
	if err != nil {
		logger.Fatalf("load setting: %s", err)
	}
	if st := metaCli.Chroot(meta.Background(), metaConf.Subdir); st != 0 {
		logger.Fatalf("Chroot to %s: %s", metaConf.Subdir, st)
	}
	registerer, registry := wrapRegister(c, svcType, format.Name)

	blob, err := NewReloadableStorage(format, metaCli, updateFormat(c))
	if err != nil {
		logger.Fatalf("object storage: %s", err)
	}
	logger.Infof("Data use %s", blob)

	chunkConf := getChunkConf(c, format)
	store := chunk.NewCachedStore(blob, *chunkConf, registerer)
	registerMetaMsg(metaCli, store, chunkConf)

	err = metaCli.NewSession(true)
	if err != nil {
		logger.Fatalf("new session: %s", err)
	}
	metaCli.OnReload(func(fmt *meta.Format) {
		updateFormat(c)(fmt)
		store.UpdateLimit(fmt.UploadLimit, fmt.DownloadLimit)
	})

	// Go will catch all the signals
	signal.Ignore(syscall.SIGPIPE)
	signalChan := make(chan os.Signal, 1)
	signal.Notify(signalChan, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP)
	go func() {
		sig := <-signalChan
		logger.Infof("Received signal %s, exiting...", sig.String())
		if err := metaCli.CloseSession(); err != nil {
			logger.Fatalf("close session failed: %s", err)
		}
		object.Shutdown(blob)
		os.Exit(0)
	}()
	vfsConf := getVfsConf(c, metaConf, format, chunkConf)
	vfsConf.AccessLog = c.String("access-log")
	vfsConf.AttrTimeout = utils.Duration(c.String("attr-cache"))
	vfsConf.EntryTimeout = utils.Duration(c.String("entry-cache"))
	vfsConf.DirEntryTimeout = utils.Duration(c.String("dir-entry-cache"))
	vfsConf.Mountpoint = mp

	initBackgroundTasks(c, vfsConf, metaConf, metaCli, blob, registerer, registry)
	jfs, err := fs.NewFileSystem(vfsConf, metaCli, store, registry)
	if err != nil {
		logger.Fatalf("Initialize failed: %s", err)
	}
	jfs.InitMetrics(registerer)

	return vfsConf, jfs
}


================================================
FILE: cmd/gateway_noop.go
================================================
//go:build nogateway
// +build nogateway

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package cmd

import (
	"errors"

	"github.com/urfave/cli/v2"
)

func cmdGateway() *cli.Command {
	return &cli.Command{
		Name:        "gateway",
		Category:    "SERVICE",
		Usage:       "Start an S3-compatible gateway (not included)",
		Description: `This feature is not included. If you want it, recompile juicefs without "nogateway" flag`,
		Action: func(*cli.Context) error {
			return errors.New("not supported")
		},
	}
}


================================================
FILE: cmd/gc.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"os"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/pkg/errors"

	"github.com/urfave/cli/v2"
)

func cmdGC() *cli.Command {
	return &cli.Command{
		Name:      "gc",
		Action:    gc,
		Category:  "ADMIN",
		Usage:     "Garbage collector of objects in data storage",
		ArgsUsage: "META-URL",
		Description: `
It scans all objects in data storage and slices in metadata, comparing them to see if there is any
leaked object. It can also actively trigger compaction of slices and the cleanup of delayed deleted slices or files.
Use this command if you find that data storage takes more than expected.

Examples:
# Check only, no writable change
$ juicefs gc redis://localhost

# Trigger compaction of all slices
$ juicefs gc redis://localhost --compact

# Delete leaked objects or metadata and delayed deleted slices or files
$ juicefs gc redis://localhost --delete`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:  "compact",
				Usage: "compact small slices into bigger ones",
			},
			&cli.BoolFlag{
				Name:  "delete",
				Usage: "delete leaked objects or metadata and delayed deleted slices or files",
			},
			&cli.IntFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   10,
				Usage:   "number threads to delete leaked objects",
			},
		},
	}
}

func gc(ctx *cli.Context) error {
	setup(ctx, 1)
	removePassword(ctx.Args().Get(0))
	metaConf := meta.DefaultConf()
	metaConf.MaxDeletes = ctx.Int("threads")
	metaConf.NoBGJob = true
	m := meta.NewClient(ctx.Args().Get(0), metaConf)
	format, err := m.Load(true)
	if err != nil {
		logger.Fatalf("load setting: %s", err)
	}
	if err = m.NewSession(false); err == nil { // To sync all stats periodically
		defer m.CloseSession() //nolint:errcheck
	} else {
		logger.Fatalf("create session: %v", err)
	}

	chunkConf := *getDefaultChunkConf(format)
	chunkConf.CacheDir = "memory"

	blob, err := createStorage(*format)
	if err != nil {
		logger.Fatalf("object storage: %s", err)
	}
	logger.Infof("Data use %s", blob)
	store := chunk.NewCachedStore(blob, chunkConf, nil)

	// Scan all chunks first and do compaction if necessary
	progress := utils.NewProgress(false)
	// Delete pending slices while listing all slices
	delete := ctx.Bool("delete")
	threads := ctx.Int("threads")
	compact := ctx.Bool("compact")
	if (delete || compact) && threads <= 0 {
		logger.Fatal("threads should be greater than 0 to delete or compact objects")
	}
	maxMtime := time.Now().Add(time.Hour * -1)
	strDuration := os.Getenv("JFS_GC_SKIPPEDTIME")
	if strDuration != "" {
		iDuration, err := strconv.Atoi(strDuration)
		if err == nil {
			maxMtime = time.Now().Add(time.Second * -1 * time.Duration(iDuration))
		} else {
			logger.Errorf("parse JFS_GC_SKIPPEDTIME=%s: %s", strDuration, err)
		}
	}

	var wg sync.WaitGroup
	var delSpin *utils.Bar

	if delete || compact {
		delSpin = progress.AddCountSpinner("Cleaned pending slices")
		m.OnMsg(meta.DeleteSlice, func(args ...interface{}) error {
			delSpin.Increment()
			return store.Remove(args[0].(uint64), int(args[1].(uint32)))
		})
	}

	c := meta.WrapContext(ctx.Context)
	delayedFileSpin := progress.AddDoubleSpinnerTwo("Pending deleted files", "Pending deleted data")
	cleanedFileSpin := progress.AddDoubleSpinnerTwo("Cleaned pending files", "Cleaned pending data")
	edge := time.Now().Add(-time.Duration(format.TrashDays) * 24 * time.Hour)
	if delete {
		cleanTrashSpin := progress.AddCountSpinner("Cleaned trash")
		_ = m.CleanupTrashBefore(c, edge, cleanTrashSpin.IncrBy, nil)
		cleanTrashSpin.Done()

		cleanDetachedNodeSpin := progress.AddCountSpinner("Cleaned detached nodes")
		m.CleanupDetachedNodesBefore(c, time.Now().Add(-time.Hour*24), cleanDetachedNodeSpin.Increment)
		cleanDetachedNodeSpin.Done()
	}

	err = m.ScanDeletedObject(
		c,
		nil, nil, nil,
		func(_ meta.Ino, size uint64, ts int64) (bool, error) {
			delayedFileSpin.IncrInt64(int64(size))
			if delete {
				cleanedFileSpin.IncrInt64(int64(size))
				return true, nil
			}
			return false, nil
		},
	)
	if err != nil {
		logger.Fatalf("scan deleted object: %s", err)
	}
	delayedFileSpin.Done()
	cleanedFileSpin.Done()

	if compact {
		bar := progress.AddCountBar("Compacted chunks", 0)
		spin := progress.AddDoubleSpinnerTwo("Compacted slices", "Compacted data")
		m.OnMsg(meta.CompactChunk, func(args ...interface{}) error {
			slices := args[0].([]meta.Slice)
			err := vfs.Compact(chunkConf, store, slices, args[1].(uint64))
			for _, s := range slices {
				spin.IncrInt64(int64(s.Len))
			}
			return err
		})
		if st := m.CompactAll(meta.Background(), ctx.Int("threads"), bar); st == 0 {
			if progress.Quiet {
				c, b := spin.Current()
				logger.Infof("Compacted %d chunks (%d slices, %d bytes).", bar.Current(), c, b)
			}
		} else {
			logger.Errorf("compact all chunks: %s", st)
		}
		bar.Done()
		spin.Done()
	} else {
		m.OnMsg(meta.CompactChunk, func(args ...interface{}) error {
			return nil // ignore compaction
		})
	}

	// put it above delete count spinner
	sliceCSpin := progress.AddCountSpinner("Listed slices")

	// List all slices in metadata engine
	slices := make(map[meta.Ino][]meta.Slice)
	r := m.ListSlices(c, slices, true, delete, sliceCSpin.Increment)
	if r != 0 {
		logger.Fatalf("list all slices: %s", r)
	}

	delayedSliceSpin := progress.AddDoubleSpinnerTwo("Trash slices", "Trash data")
	cleanedSliceSpin := progress.AddDoubleSpinnerTwo("Cleaned trash slices", "Cleaned trash data")

	err = m.ScanDeletedObject(
		c,
		func(ss []meta.Slice, ts int64) (bool, error) {
			for _, s := range ss {
				delayedSliceSpin.IncrInt64(int64(s.Size))
				if delete && ts < edge.Unix() {
					cleanedSliceSpin.IncrInt64(int64(s.Size))
				}
			}
			if delete && ts < edge.Unix() {
				return true, nil
			}
			return false, nil
		},
		nil, nil, nil,
	)
	if err != nil {
		logger.Fatalf("statistic: %s", err)
	}
	delayedSliceSpin.Done()
	cleanedSliceSpin.Done()

	// Scan all objects to find leaked ones
	blob = object.WithPrefix(blob, "chunks/")
	objs, err := object.ListAll(ctx.Context, blob, "", "", true, false)
	if err != nil {
		logger.Fatalf("list all blocks: %s", err)
	}
	vkeys := make(map[uint64]uint32)
	pkeys := make(map[uint64]uint32)
	ckeys := make(map[uint64]uint32)
	var total int64
	var totalBytes uint64
	for _, s := range slices[0] {
		pkeys[s.Id] = s.Size
		total += int64(int(s.Size-1)/chunkConf.BlockSize) + 1
		totalBytes += uint64(s.Size)
	}
	slices[0] = nil
	for _, s := range slices[1] {
		ckeys[s.Id] = s.Size
		total += int64(int(s.Size-1)/chunkConf.BlockSize) + 1
		totalBytes += uint64(s.Size)
	}
	slices[1] = nil
	for _, ss := range slices {
		for _, s := range ss {
			vkeys[s.Id] = s.Size
			total += int64(int(s.Size-1)/chunkConf.BlockSize) + 1 // s.Size should be > 0
			totalBytes += uint64(s.Size)
		}
	}
	if progress.Quiet {
		logger.Infof("using %d slices (%d bytes)", len(vkeys)+len(ckeys), totalBytes)
	}

	bar := progress.AddCountBar("Scanned objects", total)
	valid := progress.AddDoubleSpinnerTwo("Valid objects", "Valid data")
	pending := progress.AddDoubleSpinnerTwo("Pending delete objects", "Pending delete data")
	compacted := progress.AddDoubleSpinnerTwo("Compacted objects", "Compacted data")
	leaked := progress.AddDoubleSpinnerTwo("Leaked objects", "Leaked data")
	skipped := progress.AddDoubleSpinnerTwo("Skipped objects", "Skipped data")

	var leakedObj = make(chan string, 10240)
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for key := range leakedObj {
				if err := blob.Delete(ctx.Context, key); err != nil {
					logger.Warnf("delete %s: %s", key, err)
				}
			}
		}()
	}

	foundLeaked := func(obj object.Object) {
		bar.IncrTotal(1)
		leaked.IncrInt64(obj.Size())
		if delete {
			leakedObj <- obj.Key()
		}
	}

	for obj := range objs {
		if obj == nil {
			break // failed listing
		}
		if obj.IsDir() {
			continue
		}
		if obj.Mtime().After(maxMtime) || obj.Mtime().Unix() == 0 {
			logger.Debugf("ignore new block: %s %s", obj.Key(), obj.Mtime())
			bar.Increment()
			skipped.IncrInt64(obj.Size())
			continue
		}

		logger.Debugf("found block %s", obj.Key())
		parts := strings.Split(obj.Key(), "/")
		if len(parts) != 3 {
			continue
		}
		name := parts[2]
		parts = strings.Split(name, "_")
		if len(parts) != 3 {
			continue
		}
		bar.Increment()
		cid, _ := strconv.Atoi(parts[0])
		size := vkeys[uint64(cid)]
		var pobj, cobj bool
		if size == 0 {
			size, pobj = pkeys[uint64(cid)]
		}
		if size == 0 {
			size, cobj = ckeys[uint64(cid)]
		}
		if size == 0 {
			logger.Debugf("find leaked object: %s, size: %d", obj.Key(), obj.Size())
			foundLeaked(obj)
			continue
		}
		indx, _ := strconv.Atoi(parts[1])
		csize, _ := strconv.Atoi(parts[2])
		if csize == chunkConf.BlockSize {
			if (indx+1)*csize > int(size) {
				logger.Warnf("size of slice %d is larger than expected: %d > %d", cid, indx*chunkConf.BlockSize+csize, size)
				foundLeaked(obj)
			} else if pobj {
				pending.IncrInt64(obj.Size())
			} else if cobj {
				compacted.IncrInt64(obj.Size())
			} else {
				valid.IncrInt64(obj.Size())
			}
		} else {
			if indx*chunkConf.BlockSize+csize != int(size) {
				logger.Warnf("size of slice %d is %d, but expect %d", cid, indx*chunkConf.BlockSize+csize, size)
				foundLeaked(obj)
			} else if pobj {
				pending.IncrInt64(obj.Size())
			} else if cobj {
				compacted.IncrInt64(obj.Size())
			} else {
				valid.IncrInt64(obj.Size())
			}
		}
	}
	m.OnMsg(meta.DeleteSlice, func(args ...interface{}) error {
		return errors.New("stop deleting slice")
	})
	close(leakedObj)
	wg.Wait()
	if delete || compact {
		delSpin.Done()
		if progress.Quiet {
			logger.Infof("Deleted %d pending slices", delSpin.Current())
		}
	}
	sliceCSpin.Done()
	progress.Done()

	vc, _ := valid.Current()
	pc, pb := pending.Current()
	cc, cb := compacted.Current()
	lc, lb := leaked.Current()
	sc, sb := skipped.Current()
	dsc, dsb := cleanedSliceSpin.Current()
	fc, fb := cleanedFileSpin.Current()
	logger.Infof("scanned %d objects, %d valid, %d pending delete (%d bytes), %d compacted (%d bytes), %d leaked (%d bytes), %d delslices (%d bytes), %d delfiles (%d bytes), %d skipped (%d bytes)",
		bar.Current(), vc, pc, pb, cc, cb, lc, lb, dsc, dsb, fc, fb, sc, sb)
	if lc > 0 && !delete {
		logger.Infof("Please add `--delete` to clean leaked objects")
	}
	return nil
}


================================================
FILE: cmd/gc_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/stretchr/testify/require"
)

func writeSmallBlocks(mountDir string) error {
	file, err := os.OpenFile(
		filepath.Join(mountDir, "test.txt"),
		os.O_WRONLY|os.O_TRUNC|os.O_CREATE,
		0666,
	)
	if err != nil {
		return err
	}
	defer file.Close()

	content := []byte(strings.Repeat("aaaaaaaabbbbbbbb", 256))
	for k := 0; k < 64; k++ {
		if _, err = file.Write(content); err != nil {
			return err
		}
		if err = file.Sync(); err != nil {
			return err
		}
	}

	return nil
}

func getFileCount(dir string) int {
	files, _ := os.ReadDir(dir)
	count := 0
	for _, f := range files {
		if f.IsDir() {
			count += getFileCount(filepath.Join(dir, f.Name()))
		} else {
			count++
		}
	}

	return count
}

func TestGc(t *testing.T) {
	var bucket string
	mountTemp(t, &bucket, []string{"--trash-days=0", "--hash-prefix"}, nil)
	defer umountTemp(t)

	if err := writeSmallBlocks(testMountPoint); err != nil {
		t.Fatalf("write small blocks failed: %s", err)
	}
	dataDir := filepath.Join(bucket, testVolume, "chunks")
	beforeCompactFileNum := getFileCount(dataDir)
	if err := Main([]string{"", "gc", "--compact", testMeta}); err != nil {
		t.Fatalf("gc compact failed: %s", err)
	}
	afterCompactFileNum := getFileCount(dataDir)
	if beforeCompactFileNum <= afterCompactFileNum {
		t.Fatalf("blocks before gc compact %d <= after %d", beforeCompactFileNum, afterCompactFileNum)
	}

	for i := 0; i < 10; i++ {
		filename := fmt.Sprintf("%s/f%d.txt", testMountPoint, i)
		if err := os.WriteFile(filename, []byte("test"), 0644); err != nil {
			t.Fatalf("write file failed: %s", err)
		}
	}

	os.Setenv("JFS_GC_SKIPPEDTIME", "0")
	defer os.Unsetenv("JFS_GC_SKIPPEDTIME")
	t.Logf("JFS_GC_SKIPPEDTIME is %s", os.Getenv("JFS_GC_SKIPPEDTIME"))

	leaked := filepath.Join(dataDir, "0", "0", "123456789_0_1048576")
	os.WriteFile(leaked, []byte(strings.Repeat("aaaaaaaabbbbbbbb", 64*1024)), 0644)
	time.Sleep(time.Second * 3)

	if err := Main([]string{"", "gc", "--delete", testMeta}); err != nil {
		t.Fatalf("gc delete failed: %s", err)
	}

	require.False(t, utils.Exists(leaked))

	if err := Main([]string{"", "gc", testMeta}); err != nil {
		t.Fatalf("gc failed: %s", err)
	}
}


================================================
FILE: cmd/info.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"encoding/json"
	"errors"
	"fmt"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"syscall"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/urfave/cli/v2"
)

func cmdInfo() *cli.Command {
	return &cli.Command{
		Name:      "info",
		Action:    info,
		Category:  "INSPECTOR",
		Usage:     "Show internal information of a path or inode",
		ArgsUsage: "PATH/INODE",
		Description: `
It is used to inspect internal metadata values of the target file.

Examples:
$ Check a path
$ juicefs info /mnt/jfs/foo

# Check an inode
$ cd /mnt/jfs
$ juicefs info -i 100`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:    "inode",
				Aliases: []string{"i"},
				Usage:   "use inode instead of path (current dir should be inside JuiceFS)",
			},
			&cli.BoolFlag{
				Name:    "recursive",
				Aliases: []string{"r"},
				Usage:   "get summary of directories recursively (NOTE: it may be inaccurate, use --strict to get accurate result)",
			},
			&cli.BoolFlag{
				Name:  "strict",
				Usage: "get accurate summary of directories (NOTE: it may take a long time for huge trees)",
			},
			&cli.BoolFlag{
				Name:  "raw",
				Usage: "show internal raw information",
			},
		},
	}
}

func info(ctx *cli.Context) error {
	setup0(ctx, 1, 0)
	var recursive, strict, raw uint8
	if ctx.Bool("recursive") {
		recursive = 1
	}
	if ctx.Bool("strict") {
		strict = 1
	}
	if ctx.Bool("raw") {
		raw = 1
	}
	for i := 0; i < ctx.Args().Len(); i++ {
		progress := utils.NewProgress(recursive == 0) // only show progress for recursive info
		path := ctx.Args().Get(i)
		dspin := progress.AddDoubleSpinner(path)
		var d string
		var inode uint64
		var err error
		if ctx.Bool("inode") {
			inode, err = strconv.ParseUint(path, 10, 64)
			d, _ = os.Getwd()
		} else {
			d, err = filepath.Abs(path)
			if err != nil {
				logger.Fatalf("abs of %s: %s", path, err)
			}
			inode, err = utils.GetFileInode(d)
		}
		if err != nil {
			logger.Errorf("lookup inode for %s: %s", path, err)
			continue
		}
		if inode < uint64(meta.RootInode) {
			logger.Fatalf("inode number shouldn't be less than %d", meta.RootInode)
		}
		f, err := openController(d)
		if err != nil {
			logger.Errorf("Open control file for %s: %s", d, err)
			continue
		}

		wb := utils.NewBuffer(8 + 11)
		wb.Put32(meta.InfoV2)
		wb.Put32(11)
		wb.Put64(inode)
		wb.Put8(recursive)
		wb.Put8(raw)
		wb.Put8(strict)
		_, err = f.Write(wb.Bytes())
		if err != nil {
			logger.Fatalf("write message: %s", err)
		}
		data, errno := readProgress(f, func(count, size uint64) {
			dspin.SetCurrent(int64(count), int64(size))
		})
		if errno == syscall.EINVAL {
			legacyInfo(d, path, inode, recursive, raw)
			continue
		} else if errno != 0 {
			logger.Errorf("failed to get info: %s", syscall.Errno(errno))
		}
		dspin.Done()
		progress.Done()

		var resp vfs.InfoResponse
		err = json.Unmarshal(data, &resp)
		_ = f.Close()
		if err == nil && resp.Failed {
			err = errors.New(resp.Reason)
		}
		if err != nil {
			logger.Fatalf("info: %s", err)
		}
		fmt.Println(path, ":")
		fmt.Printf("  inode: %d\n", resp.Ino)
		fmt.Printf("  files: %d\n", resp.Summary.Files)
		fmt.Printf("   dirs: %d\n", resp.Summary.Dirs)
		fmt.Printf(" length: %s\n", utils.FormatBytes(resp.Summary.Length))
		fmt.Printf("   size: %s\n", utils.FormatBytes(resp.Summary.Size))
		switch len(resp.Paths) {
		case 0:
			fmt.Printf("   path: %s\n", "unknown")
		case 1:
			fmt.Printf("   path: %s\n", resp.Paths[0])
		default:
			fmt.Printf("  paths:\n")
			for _, p := range resp.Paths {
				fmt.Printf("\t%s\n", p)
			}
		}
		if len(resp.Chunks) > 0 {
			fmt.Println(" chunks:")
			results := make([][]string, 0, 1+len(resp.Chunks))
			results = append(results, []string{"chunkIndex", "sliceId", "size", "offset", "length"})
			for _, c := range resp.Chunks {
				results = append(results, []string{
					strconv.FormatUint(c.ChunkIndex, 10),
					strconv.FormatUint(c.Id, 10),
					strconv.FormatUint(uint64(c.Size), 10),
					strconv.FormatUint(uint64(c.Off), 10),
					strconv.FormatUint(uint64(c.Len), 10),
				})
			}
			printResult(results, -1, false)
		}
		if len(resp.Objects) > 0 {
			fmt.Println(" objects:")
			results := make([][]string, 0, 1+len(resp.Objects))
			results = append(results, []string{"chunkIndex", "objectName", "size", "offset", "length", "pos"})
			var chunkOffset, lastChunk uint64
			for _, o := range resp.Objects {
				if lastChunk != o.ChunkIndex {
					chunkOffset = 0
				}
				lastChunk = o.ChunkIndex
				results = append(results, []string{
					strconv.FormatUint(o.ChunkIndex, 10),
					o.Key,
					strconv.FormatUint(uint64(o.Size), 10),
					strconv.FormatUint(uint64(o.Off), 10),
					strconv.FormatUint(uint64(o.Len), 10),
					strconv.FormatUint(chunkOffset+o.ChunkIndex*meta.ChunkSize, 10),
				})
				chunkOffset += uint64(o.Len)
			}
			printResult(results, 1, false)
		}
		if len(resp.FLocks) > 0 {
			fmt.Println(" flocks:")
			results := make([][]string, 0, 1+len(resp.FLocks))
			results = append(results, []string{"Sid", "Owner", "Type"})
			for _, l := range resp.FLocks {
				results = append(results, []string{
					strconv.FormatUint(l.Sid, 10),
					strconv.FormatUint(l.Owner, 10),
					l.Type,
				})
			}
			printResult(results, 0, false)
		}
		if len(resp.PLocks) > 0 {
			fmt.Println(" plocks:")
			results := make([][]string, 0, 1+len(resp.PLocks))
			results = append(results, []string{"Sid", "Owner", "Type", "Pid", "Start", "End"})
			for _, l := range resp.PLocks {
				results = append(results, []string{
					strconv.FormatUint(l.Sid, 10),
					strconv.FormatUint(l.Owner, 10),
					ltypeToString(l.Type),
					strconv.FormatUint(uint64(l.Pid), 10),
					strconv.FormatUint(l.Start, 10),
					strconv.FormatUint(l.End, 10),
				})
			}
			printResult(results, 0, false)
		}
	}
	return nil
}

func ltypeToString(t uint32) string {
	switch t {
	case meta.F_RDLCK:
		return "R"
	case meta.F_WRLCK:
		return "W"
	default:
		return "UNKNOWN"
	}
}

func legacyInfo(d, path string, inode uint64, recursive, raw uint8) {
	f, err := openController(d)
	if err != nil {
		logger.Errorf("Open control file for %s: %s", d, err)
		return
	}
	defer f.Close()
	wb := utils.NewBuffer(8 + 10)
	wb.Put32(meta.LegacyInfo)
	wb.Put32(10)
	wb.Put64(inode)
	wb.Put8(recursive)
	wb.Put8(raw)
	_, err = f.Write(wb.Bytes())
	if err != nil {
		logger.Fatalf("write message: %s", err)
	}
	data := make([]byte, 4)
	n := readControl(f, data)
	if n == 1 && data[0] == byte(syscall.EINVAL&0xff) {
		logger.Fatalf("info is not supported, please upgrade and mount again")
	}
	r := utils.ReadBuffer(data)
	size := r.Get32()
	data = make([]byte, size)
	n, err = f.Read(data)
	if err != nil {
		logger.Fatalf("read info: %s", err)
	}
	fmt.Println(path, ":")
	resp := string(data[:n])
	var p int
	if p = strings.Index(resp, "chunks:\n"); p > 0 {
		p += 8
		raw = 1 // legacy clients always return chunks
	} else if p = strings.Index(resp, "objects:\n"); p > 0 {
		p += 9
	}
	if p <= 0 {
		fmt.Println(resp)
	} else {
		fmt.Println(resp[:p-1])
		if len(resp[p:]) > 0 {
			legacyPrintChunks(resp[p:], raw == 1)
		}
	}
}

func legacyPrintChunks(resp string, raw bool) {
	cs := strings.Split(resp, "\n")
	result := make([][]string, len(cs))
	result[0] = []string{"chunkIndex", "objectName", "size", "offset", "length"}
	leftAlign := 1
	if raw {
		result[0][1] = "sliceId"
		leftAlign = -1
	}
	for i := 1; i < len(result); i++ {
		result[i] = make([]string, 5) // len(result[0])
	}

	for i, c := range cs[:len(cs)-1] { // remove the last empty string
		ps := strings.Split(c, "\t")[1:] // remove the first empty string
		for j, p := range ps {
			if j == 0 {
				p = p[:len(p)-1] // remove the last ':'
			}
			result[i+1][j] = p
		}
	}
	printResult(result, leftAlign, false)
}


================================================
FILE: cmd/info_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"strings"
	"testing"

	"github.com/agiledragon/gomonkey/v2"
	"github.com/stretchr/testify/require"
)

func TestInfo(t *testing.T) {
	tmpFile, err := os.CreateTemp("/tmp", "")
	if err != nil {
		t.Fatalf("create temporary file: %s", err)
	}
	defer tmpFile.Close()
	defer os.Remove(tmpFile.Name())
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)
	// mock os.Stdout
	patches := gomonkey.ApplyGlobalVar(os.Stdout, *tmpFile)
	defer patches.Reset()

	if err = os.MkdirAll(fmt.Sprintf("%s/dir1", testMountPoint), 0777); err != nil {
		t.Fatalf("mkdirAll failed: %s", err)
	}
	for i := 0; i < 10; i++ {
		filename := fmt.Sprintf("%s/dir1/f%d.txt", testMountPoint, i)
		if err = os.WriteFile(filename, []byte("test"), 0644); err != nil {
			t.Fatalf("write file failed: %s", err)
		}
	}

	if err = Main([]string{"", "info", fmt.Sprintf("%s/dir1", testMountPoint), "--strict"}); err != nil {
		t.Fatalf("info failed: %s", err)
	}
	content, err := os.ReadFile(tmpFile.Name())
	if err != nil {
		t.Fatalf("read file failed: %s", err)
	}
	replacer := strings.NewReplacer("\n", "", " ", "")
	res := replacer.Replace(string(content))
	answer := fmt.Sprintf("%s/dir1: inode: 2 files: 10 dirs: 1 length: 40 Bytes size: 44.00 KiB (45056 Bytes) path: /dir1", testMountPoint)
	answer = replacer.Replace(answer)
	require.Equal(t, answer, res)
}


================================================
FILE: cmd/integration_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"os"
	"os/exec"
	"testing"
	"time"

	"github.com/redis/go-redis/v9"
)

const gatewayMeta = "redis://127.0.0.1:6379/14"
const gatewayVolume = "gateway-volume"
const gatewayAddr = "localhost:9008"
const webdavMeta = "redis://127.0.0.1:6379/15"
const webdavVolume = "webdav-volume"
const webdavAddr = "localhost:9009"

func startGateway(t *testing.T) {
	opt, _ := redis.ParseURL(gatewayMeta)
	rdb := redis.NewClient(opt)
	_ = rdb.FlushDB(context.Background())
	testDir := t.TempDir()
	if err := Main([]string{"", "format", "--bucket", testDir, gatewayMeta, gatewayVolume}); err != nil {
		t.Fatalf("format failed: %s", err)
	}

	// must do reset, otherwise will panic
	ResetHttp()

	go func() {
		if err := Main([]string{"", "gateway", gatewayMeta, gatewayAddr, "--multi-buckets", "--keep-etag", "--object-tag", "--no-usage-report"}); err != nil {
			t.Errorf("gateway failed: %s", err)
		}
	}()
	time.Sleep(2 * time.Second)
}

func startWebdav(t *testing.T) {
	opt, _ := redis.ParseURL(webdavMeta)
	rdb := redis.NewClient(opt)
	_ = rdb.FlushDB(context.Background())
	testDir := t.TempDir()
	if err := Main([]string{"", "format", "--bucket", testDir, webdavMeta, webdavVolume}); err != nil {
		t.Fatalf("format failed: %s", err)
	}

	// must do reset, otherwise will panic
	ResetHttp()

	go func() {
		os.Setenv("WEBDAV_USER", "root")
		os.Setenv("WEBDAV_PASSWORD", "1234")
		if err := Main([]string{"", "webdav", webdavMeta, webdavAddr, "--no-usage-report"}); err != nil {
			t.Errorf("gateway failed: %s", err)
		}
	}()
	time.Sleep(2 * time.Second)
}

func TestIntegration(t *testing.T) {
	mountTemp(t, nil, nil, []string{"--enable-ioctl"})
	defer umountTemp(t)
	startGateway(t)
	startWebdav(t)
	_ = os.Chdir("../integration")
	makeCmd := exec.Command("make")
	out, err := makeCmd.CombinedOutput()
	if err != nil {
		t.Logf("std out:\n%s\n", string(out))
		t.Fatalf("std err failed with %s\n", err)
	} else {
		t.Logf("std out:\n%s\n", string(out))
	}
}


================================================
FILE: cmd/load.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"compress/gzip"
	"context"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"sort"
	"strings"

	"github.com/DataDog/zstd"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/pkg/errors"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdLoad() *cli.Command {
	return &cli.Command{
		Name:     "load",
		Action:   load,
		Category: "ADMIN",
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "encrypt-rsa-key",
				Usage: "a path to RSA private key (PEM)",
			},
			&cli.StringFlag{
				Name:  "encrypt-algo",
				Usage: "encrypt algorithm (aes256gcm-rsa, chacha20-rsa)",
				Value: object.AES256GCM_RSA,
			},
			&cli.BoolFlag{
				Name:  "binary",
				Usage: "load metadata from a binary file (different from original JSON format)",
			},
			&cli.BoolFlag{
				Name:  "stat",
				Usage: "show statistics of the metadata binary file",
			},
			&cli.Int64Flag{
				Name:  "offset",
				Usage: "offset of binary backup's segment (works with --stat and --binary). Use -1 to show all offsets, or specify one for details",
			},
			&cli.IntFlag{
				Name:  "threads",
				Value: 10,
				Usage: "number of threads to load binary metadata, only works with --binary",
			},
		},
		Usage:     "Load metadata from a previously dumped file",
		ArgsUsage: "META-URL [FILE]",
		Description: `
Load metadata into an empty metadata engine or show statistics of the backup file.

WARNING: Do NOT use new engine and the old one at the same time, otherwise it will probably break
consistency of the volume.

Examples:
$ juicefs load redis://localhost/1 meta-dump.json.gz
$ juicefs load redis://localhost/1 meta-dump.bin --binary --threads 10
$ juicefs load meta-dump.bin --binary --stat

Details: https://juicefs.com/docs/community/metadata_dump_load`,
	}
}

type reader struct {
	encryptR  io.ReadCloser
	compressR io.ReadCloser
}

func (r *reader) Read(p []byte) (n int, err error) {
	return r.compressR.Read(p)
}

func (r *reader) Close() error {
	if err := r.compressR.Close(); err != nil {
		return err
	}
	if r.encryptR != r.compressR {
		return r.encryptR.Close()
	}
	return nil
}

func open(src string, key string, algo string) (io.ReadCloser, error) {
	var r io.ReadCloser
	var ioErr error
	var fp io.ReadCloser
	if key != "" {
		privKey, err := object.ParsePrivateKeyFromPem([]byte(loadEncrypt(key)), []byte(os.Getenv("JFS_RSA_PASSPHRASE")))
		if err != nil {
			if errors.Is(err, object.ErrKeyNeedPasswd) {
				return nil, fmt.Errorf("%w: please set the 'JFS_RSA_PASSPHRASE' environment variable", err)
			}
			return nil, fmt.Errorf("parse private key: %s", err)
		}
		encryptor, err := object.NewDataEncryptor(object.NewKeyEncryptor(privKey), algo)
		if err != nil {
			return nil, err
		}
		if _, err := os.Stat(src); err != nil {
			return nil, fmt.Errorf("failed to stat %s: %s", src, err)
		}
		var srcAbsPath string
		srcAbsPath, err = filepath.Abs(src)
		if err != nil {
			return nil, fmt.Errorf("failed to get absolute path of %s: %s", src, err)
		}
		fileBlob, err := object.CreateStorage("file", strings.TrimSuffix(src, filepath.Base(srcAbsPath)), "", "", "")
		if err != nil {
			return nil, err
		}
		blob := object.NewEncrypted(fileBlob, encryptor)
		fp, ioErr = blob.Get(context.Background(), filepath.Base(srcAbsPath), 0, -1)
	} else {
		fp, ioErr = os.Open(src)
	}
	if ioErr != nil {
		return nil, ioErr
	}
	if strings.HasSuffix(src, ".gz") {
		var err error
		r, err = gzip.NewReader(fp)
		if err != nil {
			return nil, err
		}
	} else if strings.HasSuffix(src, ".zstd") {
		r = zstd.NewReader(fp)
	} else {
		r = fp
	}
	return &reader{compressR: r, encryptR: fp}, nil
}

func convert(path string, key, algo string) (string, error) {
	isCompress := false
	if strings.HasSuffix(path, ".gz") || strings.HasSuffix(path, ".zstd") {
		isCompress = true
	}

	if key == "" && !isCompress {
		return path, nil
	}

	nPath := path[:strings.LastIndex(path, ".")]
	if utils.Exists(nPath) {
		logger.Infof("plain backup %s already exists, skip conversion", nPath)
		return nPath, nil
	}

	r, err := open(path, key, algo)
	if err != nil {
		return "", err
	}
	defer r.Close()

	w, err := os.Create(nPath)
	if err != nil {
		return "", fmt.Errorf("failed to create plain backup %s: %w", nPath, err)
	}
	defer w.Close()

	if _, err = io.Copy(w, r); err != nil {
		return "", fmt.Errorf("failed to convert %s to %s: %w", path, nPath, err)
	}
	logger.Infof("converted backup %s to %s", path, nPath)
	return nPath, nil
}

func load(ctx *cli.Context) error {
	setup0(ctx, 1, 2)

	key, algo := ctx.String("encrypt-rsa-key"), ctx.String("encrypt-algo")
	src := ctx.Args().Get(1)
	var err error
	if ctx.Bool("binary") {
		if ctx.Bool("stat") {
			src = ctx.Args().Get(0)
		}
		if src, err = convert(src, key, algo); err != nil {
			return err
		}
		if ctx.Bool("stat") {
			return statBak(ctx, src)
		}
	}

	metaUri := ctx.Args().Get(0)
	removePassword(metaUri)
	var r io.ReadCloser
	if ctx.Args().Len() == 1 {
		r = os.Stdin
		src = "STDIN"
	} else {
		r, err = open(src, key, algo)
		if err != nil {
			return err
		}
		defer r.Close()
	}

	m := meta.NewClient(metaUri, nil)
	if format, err := m.Load(false); err == nil {
		return fmt.Errorf("database %s is used by volume %s", utils.RemovePassword(metaUri), format.Name)
	}

	if ctx.Bool("binary") {
		progress := utils.NewProgress(false)
		bars := make(map[string]*utils.Bar)
		for _, name := range meta.SegType2Name {
			bars[name] = progress.AddCountSpinner(name)
		}

		opt := &meta.LoadOption{
			Threads: ctx.Int("threads"),
			Progress: func(name string, cnt int) {
				bars[name].IncrBy(cnt)
			},
		}
		if err := m.LoadMetaV2(meta.WrapContext(ctx.Context), r, opt); err != nil {
			return err
		}
		progress.Done()
	} else {
		if err := m.LoadMeta(r); err != nil {
			return err
		}
	}
	if format, err := m.Load(true); err == nil {
		if format.SecretKey == "removed" {
			logger.Warnf("secret key was removed; please correct it with `config` command")
		}
	} else {
		return err
	}
	logger.Infof("load metadata from %s succeed", src)
	return nil
}

func statBak(ctx *cli.Context, path string) error {
	logger.Infof("load backup from %s", path)
	fp, err := os.Open(path)
	if err != nil {
		return fmt.Errorf("failed to open file %s: %w", path, err)
	}
	defer fp.Close()

	if !ctx.IsSet("offset") {
		return showBakSummary(ctx, fp, false)
	}

	offset := ctx.Int64("offset")
	if offset == -1 {
		return showBakSummary(ctx, fp, true)
	}

	return showBakDetail(ctx, fp, offset)
}

func showBakSummary(ctx *cli.Context, fp *os.File, withOffset bool) error {
	bak := &meta.BakFormat{}
	footer, err := bak.ReadFooter(fp)
	if err != nil {
		return fmt.Errorf("failed to read footer: %w", err)
	}

	fmt.Printf("Backup Version: %d\n", footer.Msg.Version)
	data := make([][]string, 0, len(footer.Msg.Infos))
	for name, info := range footer.Msg.Infos {
		if withOffset {
			data = append(data, []string{name, fmt.Sprintf("%d", info.Num), fmt.Sprintf("%d", info.Offset)})
		} else {
			data = append(data, []string{name, fmt.Sprintf("%d", info.Num)})
		}
	}
	sort.Slice(data, func(i, j int) bool {
		return data[i][0] < data[j][0]
	})

	if withOffset {
		fmt.Println(strings.Repeat("-", 34))
		fmt.Printf("%-10s| %-10s| %-10s\n", "Name", "Num", "Offset")
		fmt.Println(strings.Repeat("-", 34))
	} else {
		fmt.Println(strings.Repeat("-", 23))
		fmt.Printf("%-10s| %-10s\n", "Name", "Num")
		fmt.Println(strings.Repeat("-", 23))
	}
	for _, v := range data {
		fmt.Printf("%-10s| %-10s|", v[0], v[1])
		if withOffset {
			fmt.Printf(" %-10s", v[2])
		}
		fmt.Println()
	}
	return nil
}

func showBakDetail(ctx *cli.Context, fp *os.File, offset int64) error {
	bak := &meta.BakFormat{}
	if _, err := fp.Seek(offset, io.SeekStart); err != nil {
		return err
	}

	seg, err := bak.ReadSegment(fp)
	if err != nil {
		return fmt.Errorf("failed to read segment: %w", err)
	}

	fmt.Printf("Segment: %s\n", seg.Name())
	fmt.Printf("Value: %s\n", seg)
	return nil
}


================================================
FILE: cmd/main.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"net/http"
	_ "net/http/pprof"
	"os"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"syscall"

	"github.com/google/uuid"
	"github.com/grafana/pyroscope-go"
	_ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/sirupsen/logrus"
	"github.com/urfave/cli/v2"
	"go.uber.org/automaxprocs/maxprocs"
)

var logger = utils.GetLogger("juicefs")
var debugAgent string
var debugAgentOnce sync.Once

func Main(args []string) error {
	// we have to call this because gspt removes all arguments
	utils.SetProcTitle(os.Args)
	cli.VersionFlag = &cli.BoolFlag{
		Name: "version", Aliases: []string{"V"},
		Usage: "print version only",
	}
	app := &cli.App{
		Name:                 "juicefs",
		Usage:                "A POSIX file system built on Redis and object storage.",
		Version:              version.Version(),
		Copyright:            "Apache License 2.0",
		HideHelpCommand:      true,
		EnableBashCompletion: true,
		Flags:                globalFlags(),
		Commands: []*cli.Command{
			cmdFormat(),
			cmdConfig(),
			cmdQuota(),
			cmdDestroy(),
			cmdGC(),
			cmdFsck(),
			cmdRestore(),
			cmdDump(),
			cmdLoad(),
			cmdVersion(),
			cmdStatus(),
			cmdStats(),
			cmdProfile(),
			cmdInfo(),
			cmdMount(),
			cmdUmount(),
			cmdGateway(),
			cmdWebDav(),
			cmdBench(),
			cmdObjbench(),
			cmdMdtest(),
			cmdWarmup(),
			cmdRmr(),
			cmdSync(),
			cmdDebug(),
			cmdClone(),
			cmdSummary(),
			cmdCompact(),
		},
	}

	if runtime.GOOS == "windows" {
		app.Commands = append(app.Commands, cmdPrintSID())
	}

	if calledViaMount(args) {
		var err error
		args, err = handleSysMountArgs(args)
		if err != nil {
			return err
		}
		if len(args) < 1 {
			args = []string{"mount", "--help"}
		}
	}
	err := app.Run(reorderOptions(app, args))
	if errno, ok := err.(syscall.Errno); ok && errno == 0 {
		err = nil
	}
	return err
}

func calledViaMount(args []string) bool {
	if os.Getenv("CALL_VIA_MOUNT") != "" {
		return true
	}
	if strings.HasSuffix(args[0], "/mount.juicefs") {
		os.Setenv("CALL_VIA_MOUNT", "1")
		return true
	}
	return false
}

func handleSysMountArgs(args []string) ([]string, error) {
	optionToCmdFlag := map[string]string{
		"attrcacheto":     "attr-cache",
		"entrycacheto":    "entry-cache",
		"direntrycacheto": "dir-entry-cache",
	}
	newArgs := []string{"juicefs", "mount", "-d"}
	if len(args) < 3 {
		return nil, nil
	}
	mountOptions := args[3:]
	sysOptions := []string{"_netdev", "nofail", "rw", "defaults", "remount"}
	fuseOptions := make([]string, 0, 20)
	cmdFlagsLookup := make(map[string]bool, 20)
	for _, f := range append(cmdMount().Flags, globalFlags()...) {
		for _, name := range f.Names() {
			if len(name) > 1 {
				_, cmdFlagsLookup[name] = f.(*cli.BoolFlag)
			}
		}
	}

	parseFlag := false
	for _, option := range mountOptions {
		if option == "-o" {
			parseFlag = true
			continue
		}
		if !parseFlag {
			continue
		}

		opts := strings.Split(option, ",")
		for _, opt := range opts {
			opt = strings.TrimSpace(opt)
			if opt == "" || opt == "background" || utils.StringContains(sysOptions, opt) {
				continue
			}
			// Lower case option name is preferred, but if it's the same as flag name, we also accept it
			if strings.Contains(opt, "=") {
				fields := strings.SplitN(opt, "=", 2)
				if flagName, ok := optionToCmdFlag[fields[0]]; ok {
					newArgs = append(newArgs, fmt.Sprintf("--%s=%s", flagName, fields[1]))
				} else if _, ok := cmdFlagsLookup[fields[0]]; ok {
					newArgs = append(newArgs, fmt.Sprintf("--%s=%s", fields[0], fields[1]))
				} else {
					fuseOptions = append(fuseOptions, opt)
				}
			} else if flagName, ok := optionToCmdFlag[opt]; ok {
				newArgs = append(newArgs, fmt.Sprintf("--%s", flagName))
			} else if isBool, ok := cmdFlagsLookup[opt]; ok {
				if !isBool {
					return nil, fmt.Errorf("option %s requires a value", opt)
				}
				newArgs = append(newArgs, fmt.Sprintf("--%s", opt))
				if opt == "debug" {
					fuseOptions = append(fuseOptions, opt)
				}
			} else {
				fuseOptions = append(fuseOptions, opt)
			}
		}

		parseFlag = false
	}
	if len(fuseOptions) > 0 {
		newArgs = append(newArgs, "-o", strings.Join(fuseOptions, ","))
	}
	newArgs = append(newArgs, args[1], args[2])
	logger.Debug("Parsed mount args: ", strings.Join(newArgs, " "))
	return newArgs, nil
}

func isFlag(flags []cli.Flag, option string) (bool, bool) {
	if !strings.HasPrefix(option, "-") {
		return false, false
	}
	// --V or -v work the same
	option = strings.TrimLeft(option, "-")
	for _, flag := range flags {
		_, isBool := flag.(*cli.BoolFlag)
		for _, name := range flag.Names() {
			if option == name || strings.HasPrefix(option, name+"=") {
				return true, !isBool && !strings.Contains(option, "=")
			}
		}
	}
	return false, false
}

func reorderOptions(app *cli.App, args []string) []string {
	var newArgs = []string{args[0]}
	var others []string
	globalFlags := append(app.Flags, cli.VersionFlag)
	for i := 1; i < len(args); i++ {
		option := args[i]
		if ok, hasValue := isFlag(globalFlags, option); ok {
			newArgs = append(newArgs, option)
			if hasValue {
				i++
				if i >= len(args) {
					logger.Fatalf("option %s requires value", option)
				}
				newArgs = append(newArgs, args[i])
			}
		} else {
			others = append(others, option)
		}
	}
	// no command
	if len(others) == 0 {
		return newArgs
	}
	cmdName := others[0]
	var cmd *cli.Command
	for _, c := range app.Commands {
		if c.Name == cmdName {
			cmd = c
			break
		}
	}
	if cmd == nil {
		// can't recognize the command, skip it
		return append(newArgs, others...)
	}

	newArgs = append(newArgs, cmdName)
	args, others = others[1:], nil
	// -h is valid for all the commands
	cmdFlags := append(cmd.Flags, cli.HelpFlag)
	for i := 0; i < len(args); i++ {
		option := args[i]
		if ok, hasValue := isFlag(cmdFlags, option); ok {
			newArgs = append(newArgs, option)
			if hasValue && len(args[i+1:]) > 0 {
				i++
				newArgs = append(newArgs, args[i])
			}
		} else {
			if strings.HasPrefix(option, "-") && !utils.StringContains(args, "--generate-bash-completion") {
				logger.Fatalf("unknown option: %s", option)
			}
			others = append(others, option)
		}
	}
	return append(newArgs, others...)
}

// Check number of positional arguments, set logger level and setup agent if needed
func setup(c *cli.Context, n int) {
	setup0(c, n, n)
}

func setup0(c *cli.Context, min, max int) {
	if c.NArg() < min {
		fmt.Printf("ERROR: This command requires at least %d arguments\n", min)
		fmt.Printf("USAGE:\n   juicefs %s [command options] %s\n", c.Command.Name, c.Command.ArgsUsage)
		os.Exit(1)
	} else if max > 0 && c.NArg() > max {
		fmt.Printf("ERROR: This command accept at most %d arguments but got %+v\n", max, c.Args().Slice())
		fmt.Printf("USAGE:\n   juicefs %s [command options] %s\n", c.Command.Name, c.Command.ArgsUsage)
		logger.Exit(1)
	}

	switch c.String("log-level") {
	case "trace":
		utils.SetLogLevel(logrus.TraceLevel)
	case "debug":
		utils.SetLogLevel(logrus.DebugLevel)
	case "info":
		utils.SetLogLevel(logrus.InfoLevel)
	case "warn":
		utils.SetLogLevel(logrus.WarnLevel)
	case "error":
		utils.SetLogLevel(logrus.ErrorLevel)
	case "fatal":
		utils.SetLogLevel(logrus.FatalLevel)
	case "panic":
		utils.SetLogLevel(logrus.PanicLevel)
	default:
		if c.Bool("trace") {
			utils.SetLogLevel(logrus.TraceLevel)
		} else if c.Bool("verbose") {
			utils.SetLogLevel(logrus.DebugLevel)
		} else if c.Bool("quiet") {
			utils.SetLogLevel(logrus.WarnLevel)
		} else {
			utils.SetLogLevel(logrus.InfoLevel)
		}
	}
	if c.Bool("no-color") {
		utils.DisableLogColor()
	}
	// set the correct value when it runs inside container
	if undo, err := maxprocs.Set(maxprocs.Logger(logger.Debugf)); err != nil {
		undo()
	}

	logID := c.String("log-id")
	if logID != "" {
		if logID == "random" {
			logID = uuid.New().String()
		}
		utils.SetLogID("[" + logID + "] ")
	}

	if !c.Bool("no-agent") {
		go debugAgentOnce.Do(func() {
			for port := 6060; port < 6100; port++ {
				debugAgent = fmt.Sprintf("127.0.0.1:%d", port)
				logger.Debugf("Debug agent listening on %s", debugAgent)
				_ = http.ListenAndServe(debugAgent, nil)
			}
		})
	}

	if c.IsSet("pyroscope") {
		tags := make(map[string]string)
		appName := fmt.Sprintf("juicefs.%s", c.Command.Name)
		if c.Command.Name == "mount" {
			tags["mountpoint"] = c.Args().Get(1)
		}
		if hostname, err := os.Hostname(); err == nil {
			tags["hostname"] = hostname
		}
		tags["pid"] = strconv.Itoa(os.Getpid())
		tags["version"] = version.Version()

		types := []pyroscope.ProfileType{pyroscope.ProfileCPU, pyroscope.ProfileInuseObjects, pyroscope.ProfileAllocObjects,
			pyroscope.ProfileInuseSpace, pyroscope.ProfileAllocSpace, pyroscope.ProfileGoroutines, pyroscope.ProfileMutexCount,
			pyroscope.ProfileMutexDuration, pyroscope.ProfileBlockCount, pyroscope.ProfileBlockDuration}
		if _, err := pyroscope.Start(pyroscope.Config{
			ApplicationName: appName,
			ServerAddress:   c.String("pyroscope"),
			Logger:          logger,
			Tags:            tags,
			AuthToken:       os.Getenv("PYROSCOPE_AUTH_TOKEN"),
			ProfileTypes:    types,
		}); err != nil {
			logger.Errorf("start pyroscope agent: %v", err)
		}
	}
}

func removePassword(uris ...string) {
	args := make([]string, len(os.Args))
	copy(args, os.Args)
	var idx int
	for _, uri := range uris {
		uri2 := utils.RemovePassword(uri)
		if uri2 != uri {
			for i := idx; i < len(os.Args); i++ {
				if os.Args[i] == uri {
					args[i] = uri2
					idx = i + 1
					break
				}
			}
		}
	}
	utils.SetProcTitle(args)
}


================================================
FILE: cmd/main_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"reflect"
	"strings"
	"testing"

	"github.com/urfave/cli/v2"
)

func TestArgsOrder(t *testing.T) {
	var app = &cli.App{
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:    "verbose",
				Aliases: []string{"v"},
			},
			&cli.Int64Flag{
				Name:    "key",
				Aliases: []string{"k"},
			},
		},
		Commands: []*cli.Command{
			{
				Name: "cmd",
				Flags: []cli.Flag{
					&cli.Int64Flag{
						Name: "k2",
					},
				},
			},
		},
	}

	var cases = [][]string{
		{"test", "cmd", "a", "-k2", "v2", "b", "--v"},
		{"test", "--v", "cmd", "-k2", "v2", "a", "b"},
		{"test", "cmd", "a", "-k2=v", "--h"},
		{"test", "cmd", "-k2=v", "--h", "a"},
	}
	for i := 0; i < len(cases); i += 2 {
		oreded := reorderOptions(app, cases[i])
		if !reflect.DeepEqual(cases[i+1], oreded) {
			t.Fatalf("expecte %v, but got %v", cases[i+1], oreded)
		}
	}
}

func TestHandleSysMountArgs(t *testing.T) {
	var cases = []struct {
		args    []string
		newArgs string
		fail    bool
	}{
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "no-usage-report"},
			"juicefs mount -d --no-usage-report memkv:// /jfs",
			false,
		},
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "no-usage-report=true"},
			"juicefs mount -d --no-usage-report=true memkv:// /jfs",
			false,
		},
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "cache-size=204800"},
			"juicefs mount -d --cache-size=204800 memkv:// /jfs",
			false,
		},
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "verbose"},
			"juicefs mount -d --verbose memkv:// /jfs",
			false,
		},
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "debug"},
			"juicefs mount -d --debug -o debug memkv:// /jfs",
			false,
		},
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "cache-size=204800,no-usage-report=false,free-space-ratio=0.5,cache-dir=/data/juicfs,metrics=0.0.0.0:9567"},
			"juicefs mount -d --cache-size=204800 --no-usage-report=false --free-space-ratio=0.5 --cache-dir=/data/juicfs --metrics=0.0.0.0:9567 memkv:// /jfs",
			false,
		},
		{
			[]string{"/mount.juicefs", "memkv://", "/jfs", "-o", "cache-size"},
			"",
			true,
		},
	}
	for _, c := range cases {
		rawNewArgs, err := handleSysMountArgs(c.args)
		if c.fail && err == nil {
			t.Fatalf("expect error, but got nil")
		}
		if !c.fail && err != nil {
			t.Fatalf("expect nil, but got %v", err)
		}
		newArgs := strings.Join(rawNewArgs, " ")
		if c.newArgs != newArgs {
			t.Fatalf("expect `%v`, but got `%v`", c.newArgs, newArgs)
		}
	}
}


================================================
FILE: cmd/mdtest.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"math/rand"
	_ "net/http/pprof"
	"os"
	"path"
	"runtime"
	"sync"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/metric"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/mattn/go-isatty"
	"github.com/urfave/cli/v2"
)

var ctx = meta.NewContext(1, uint32(utils.GetCurrentUID()), []uint32{uint32(utils.GetCurrentGID())})
var umask = uint16(utils.GetUmask())

func init() {
	// For all the juicefs command, we treat admin/elevated privilege user as root(0) on Windows
	// just like the mount option '-adminasroot' does for the mounted filesystem.
	if runtime.GOOS == "windows" && utils.IsWinAdminOrElevatedPrivilege() {
		ctx = meta.NewContext(1, 0, []uint32{0})
	}
}

func createDir(jfs *fs.FileSystem, root string, d int, width int) error {
	if err := jfs.Mkdir(ctx, root, 0777, umask); err != 0 {
		return fmt.Errorf("Mkdir %s: %s", root, err)
	}
	if d > 0 {
		for i := 0; i < width; i++ {
			dn := path.Join(root, fmt.Sprintf("mdtest_tree.%d", i))
			if err := createDir(jfs, dn, d-1, width); err != nil {
				return err
			}
		}
	}
	return nil
}

func createFile(jfs *fs.FileSystem, bar *utils.Bar, np int, root string, d int, width, files, bytes int) error {
	m := jfs.Meta()
	for i := 0; i < files; i++ {
		fn := path.Join(root, fmt.Sprintf("file.mdtest.%d.%d", np, i))
		f, err := jfs.Create(ctx, fn, 0666, umask)
		if err != 0 {
			return fmt.Errorf("create %s: %s", fn, err)
		}
		if bytes > 0 {
			for indx := 0; indx*meta.ChunkSize < bytes; indx++ {
				var id uint64
				if st := m.NewSlice(ctx, &id); st != 0 {
					return fmt.Errorf("writechunk %s: %s", fn, st)
				}
				size := meta.ChunkSize
				if bytes < (indx+1)*meta.ChunkSize {
					size = bytes - indx*meta.ChunkSize
				}
				if st := m.Write(ctx, f.Inode(), uint32(indx), 0, meta.Slice{Id: id, Size: uint32(size), Len: uint32(size)}, time.Now()); st != 0 {
					return fmt.Errorf("writeend %s: %s", fn, st)
				}
			}
		}
		f.Close(ctx)
		bar.Increment()
	}
	if d > 0 {
		dirs := make([]int, width)
		for i := 0; i < width; i++ {
			dirs[i] = i
		}
		rand.Shuffle(width, func(i, j int) {
			dirs[i], dirs[j] = dirs[j], dirs[i]
		})
		for i := range dirs {
			dn := path.Join(root, fmt.Sprintf("mdtest_tree.%d", dirs[i]))
			if err := createFile(jfs, bar, np, dn, d-1, width, files, bytes); err != nil {
				return err
			}
		}
	}
	return nil
}

func runTest(jfs *fs.FileSystem, rootDir string, np, width, depth, files, bytes int) {
	dirs := 1
	w := width
	z := depth
	for z > 0 {
		dirs += w
		w = w * width
		z--
	}
	var total = dirs * np * files
	progress := utils.NewProgress(!isatty.IsTerminal(os.Stdout.Fd()))
	bar := progress.AddCountBar("create file", int64(total))
	logger.Infof("Create %d files in %d dirs", total, dirs)

	start := time.Now()
	if err := jfs.Mkdir(ctx, rootDir, 0777, umask); err != 0 {
		logger.Errorf("mkdir %s: %s", rootDir, err)
	}
	root := path.Join(rootDir, "test-dir.0-0")
	if err := jfs.Mkdir(ctx, root, 0777, umask); err != 0 {
		logger.Fatalf("Mkdir %s: %s", root, err)
	}
	root = path.Join(root, "mdtest_tree.0")
	if err := createDir(jfs, root, depth, width); err != nil {
		logger.Fatalf("initialize: %s", err)
	}
	t1 := time.Since(start)
	logger.Infof("Created %d dirs in %s (%d dirs/s)", dirs, t1, int(float64(dirs)/t1.Seconds()))

	var g sync.WaitGroup
	for i := 0; i < np; i++ {
		g.Add(1)
		go func(np int) {
			if err := createFile(jfs, bar, np, root, depth, width, files, bytes); err != nil {
				logger.Errorf("Create: %s", err)
			}
			g.Done()
		}(i)
	}
	g.Wait()
	progress.Done()
	used := time.Since(start) - t1
	logger.Infof("Created %d files in %s (%d files/s)", total, used, int(float64(total)/used.Seconds()))
}

func cmdMdtest() *cli.Command {
	selfFlags := []cli.Flag{
		&cli.IntFlag{
			Name:  "threads",
			Value: 1,
			Usage: "number of threads",
		},
		&cli.IntFlag{
			Name:  "dirs",
			Value: 3,
			Usage: "number of subdir",
		},
		&cli.IntFlag{
			Name:  "depth",
			Value: 2,
			Usage: "levels of tree",
		},
		&cli.IntFlag{
			Name:  "files",
			Value: 10,
			Usage: "number of files",
		},
		&cli.IntFlag{
			Name:  "write",
			Value: 0,
			Usage: "number of bytes",
		},
		&cli.StringFlag{
			Name:  "access-log",
			Usage: "path for JuiceFS access log",
		},
	}
	return &cli.Command{
		Name:      "mdtest",
		Action:    mdtest,
		Category:  "TOOL",
		Hidden:    true,
		Usage:     "run test on meta engines",
		ArgsUsage: "META-URL PATH",
		Description: `
Examples:
$ juicefs mdtest redis://localhost /test1`,
		Flags: expandFlags(selfFlags, clientFlags(0), shareInfoFlags()),
	}
}

func initForMdtest(c *cli.Context, mp string, metaUrl string) *fs.FileSystem {
	metaConf := getMetaConf(c, mp, c.Bool("read-only"))
	m := meta.NewClient(metaUrl, metaConf)
	format, err := m.Load(true)
	if err != nil {
		logger.Fatalf("load setting: %s", err)
	}
	if st := m.Chroot(meta.Background(), metaConf.Subdir); st != 0 {
		logger.Fatalf("Chroot to %s: %s", metaConf.Subdir, st)
	}
	registerer, registry := wrapRegister(c, mp, format.Name)

	blob, err := NewReloadableStorage(format, m, updateFormat(c))
	if err != nil {
		logger.Fatalf("object storage: %s", err)
	}
	logger.Infof("Data use %s", blob)

	chunkConf := getChunkConf(c, format)
	store := chunk.NewCachedStore(blob, *chunkConf, registerer)
	registerMetaMsg(m, store, chunkConf)

	err = m.NewSession(true)
	if err != nil {
		logger.Fatalf("new session: %s", err)
	}

	conf := getVfsConf(c, metaConf, format, chunkConf)
	conf.AccessLog = c.String("access-log")
	conf.AttrTimeout = utils.Duration(c.String("attr-cache"))
	conf.EntryTimeout = utils.Duration(c.String("entry-cache"))
	conf.DirEntryTimeout = utils.Duration(c.String("dir-entry-cache"))

	metricsAddr := exposeMetrics(c, registerer, registry)
	m.InitMetrics(registerer)
	vfs.InitMetrics(registerer)
	if c.IsSet("consul") {
		metadata := make(map[string]string)
		metadata["mountPoint"] = conf.Meta.MountPoint
		metric.RegisterToConsul(c.String("consul"), metricsAddr, metadata)
	}
	jfs, err := fs.NewFileSystem(conf, m, store, registry)
	if err != nil {
		logger.Fatalf("initialize failed: %s", err)
	}
	jfs.InitMetrics(registerer)
	return jfs
}

func mdtest(c *cli.Context) error {
	setup(c, 2)
	metaUrl := c.Args().Get(0)
	rootDir := c.Args().Get(1)
	removePassword(metaUrl)
	jfs := initForMdtest(c, "mdtest", metaUrl)
	runTest(jfs, rootDir, c.Int("threads"), c.Int("dirs"), c.Int("depth"), c.Int("files"), c.Int("write"))
	return jfs.Meta().CloseSession()
}


================================================
FILE: cmd/mount.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bufio"
	"context"
	"fmt"
	"log"
	"net"
	"net/http"
	_ "net/http/pprof"
	"os"
	"path"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/object"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/collectors"
	"github.com/prometheus/client_golang/prometheus/promhttp"
	"github.com/urfave/cli/v2"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/metric"
	"github.com/juicedata/juicefs/pkg/usage"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/juicedata/juicefs/pkg/vfs"
)

func cmdMount() *cli.Command {
	return &cli.Command{
		Name:      "mount",
		Action:    mount,
		Category:  "SERVICE",
		Usage:     "Mount a volume",
		ArgsUsage: "META-URL MOUNTPOINT",
		Description: `
Mount the target volume at the mount point.

Examples:
# Mount in foreground
$ juicefs mount redis://localhost /mnt/jfs

# Mount in background with password protected Redis
$ juicefs mount redis://:mypassword@localhost /mnt/jfs -d
# A safer alternative
$ META_PASSWORD=mypassword juicefs mount redis://localhost /mnt/jfs -d

# Mount with a sub-directory as root
$ juicefs mount redis://localhost /mnt/jfs --subdir /dir/in/jfs

# Enable "writeback" mode, which improves performance at the risk of losing objects
$ juicefs mount redis://localhost /mnt/jfs -d --writeback

# Enable "read-only" mode
$ juicefs mount redis://localhost /mnt/jfs -d --read-only

# Disable metadata backup
$ juicefs mount redis://localhost /mnt/jfs --backup-meta 0`,
		Flags: expandFlags(mountFlags(), clientFlags(1.0), shareInfoFlags()),
	}
}

func exposeMetrics(c *cli.Context, registerer prometheus.Registerer, registry *prometheus.Registry) string {
	var ip, port string
	// default set
	ip, port, err := net.SplitHostPort(c.String("metrics"))
	if err != nil {
		logger.Fatalf("metrics format error: %v", err)
	}
	go metric.UpdateMetrics(registerer)
	http.Handle("/metrics", promhttp.HandlerFor(
		registry,
		promhttp.HandlerOpts{
			// Opt into OpenMetrics to support exemplars.
			EnableOpenMetrics: true,
		},
	))
	registerer.MustRegister(collectors.NewBuildInfoCollector())

	// If not set metrics addr,the port will be auto set
	if !c.IsSet("metrics") {
		// If only set consul, ip will auto set
		if c.IsSet("consul") {
			ip, err = utils.GetLocalIp(c.String("consul"))
			if err != nil {
				logger.Errorf("Get local ip failed: %v", err)
				return ""
			}
		}
	}

	ln, err := net.Listen("tcp", net.JoinHostPort(ip, port))
	if err != nil {
		// Don't try other ports on metrics set but listen failed
		if c.IsSet("metrics") {
			logger.Errorf("listen on %s:%s failed: %v", ip, port, err)
			return ""
		}
		// Listen port on 0 will auto listen on a free port
		ln, err = net.Listen("tcp", net.JoinHostPort(ip, "0"))
		if err != nil {
			logger.Errorf("Listen failed: %v", err)
			return ""
		}
	}

	go func() {
		if err := http.Serve(ln, nil); err != nil {
			logger.Errorf("Serve for metrics: %s", err)
		}
	}()

	metricsAddr := ln.Addr().String()
	logger.Infof("Prometheus metrics listening on %s", metricsAddr)
	return metricsAddr
}

func wrapRegister(c *cli.Context, mp, name string) (prometheus.Registerer, *prometheus.Registry) {
	commonLabels := prometheus.Labels{"mp": mp, "vol_name": name, "juicefs_version": version.Version()}
	if h, err := os.Hostname(); err == nil {
		commonLabels["instance"] = h
	} else {
		logger.Warnf("cannot get hostname: %s", err)
	}
	if c.IsSet("custom-labels") {
		for _, kv := range strings.Split(c.String("custom-labels"), ";") {
			splited := strings.Split(kv, ":")
			if len(splited) != 2 {
				logger.Fatalf("invalid label format: %s", kv)
			}
			if utils.StringContains([]string{"mp", "vol_name", "instance"}, splited[0]) {
				logger.Warnf("overriding reserved label: %s", splited[0])
			}
			commonLabels[splited[0]] = splited[1]
		}
	}
	registry := prometheus.NewRegistry() // replace default so only JuiceFS metrics are exposed
	registerer := prometheus.WrapRegistererWithPrefix("juicefs_",
		prometheus.WrapRegistererWith(commonLabels, registry))

	registerer.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
	registerer.MustRegister(collectors.NewGoCollector())
	return registerer, registry
}

func updateFormat(c *cli.Context) func(*meta.Format) {
	return func(format *meta.Format) {
		if c.IsSet("bucket") {
			format.Bucket = c.String("bucket")
		}
		if c.IsSet("storage") {
			format.Storage = c.String("storage")
		}
		if c.IsSet("storage-class") {
			format.StorageClass = c.String("storage-class")
		}
		if c.IsSet("upload-limit") {
			format.UploadLimit = utils.ParseMbps(c, "upload-limit")
		}
		if c.IsSet("download-limit") {
			format.DownloadLimit = utils.ParseMbps(c, "download-limit")
		}
	}
}

func relPathToAbs(ss []string) []string {
	for i, d := range ss {
		if strings.HasPrefix(d, "/") {
			continue
		} else if strings.HasPrefix(d, "~/") {
			if h, err := os.UserHomeDir(); err == nil {
				ss[i] = filepath.Join(h, d[1:])
			} else {
				logger.Fatalf("Expand user home dir of %s: %s", d, err)
			}
		} else {
			if ad, err := filepath.Abs(d); err == nil {
				ss[i] = ad
			} else {
				logger.Fatalf("Find absolute path of %s: %s", d, err)
			}
		}
	}
	return ss
}

func cacheDirPathToAbs(c *cli.Context) {
	if runtime.GOOS != "windows" {
		if cd := c.String("cache-dir"); cd != "memory" {
			ds := utils.SplitDir(cd)
			ds = relPathToAbs(ds)
			for i, a := range os.Args {
				if a == cd || a == "--cache-dir="+cd {
					os.Args[i] = a[:len(a)-len(cd)] + strings.Join(ds, string(os.PathListSeparator))
				}
			}
		}
	}

	if rpAcLog := c.String("access-log"); rpAcLog != "" {
		ap, err := filepath.Abs(rpAcLog)
		if err == nil && ap != rpAcLog {
			for i, a := range os.Args {
				if a == rpAcLog || a == "--access-log="+rpAcLog {
					os.Args[i] = a[:len(a)-len(rpAcLog)] + ap
					break
				}
			}
		}
	}
}

func daemonRun(c *cli.Context, addr string, vfsConf *vfs.Config) {
	cacheDirPathToAbs(c)
	_ = expandPathForEmbedded(addr)
	// The default log to syslog is only in daemon mode.
	utils.InitLoggers(!c.Bool("no-syslog"))
	err := makeDaemon(c, vfsConf)
	if err != nil {
		logger.Fatalf("Failed to make daemon: %s", err)
	}
	if runtime.GOOS == "linux" {
		log.SetOutput(os.Stderr)
	}
}

func expandPathForEmbedded(addr string) string {
	embeddedSchemes := []string{"sqlite3://", "badger://"}
	for _, es := range embeddedSchemes {
		if strings.HasPrefix(addr, es) {
			path := addr[len(es):]
			absPath, err := filepath.Abs(path)
			if err == nil && absPath != path {
				for i, a := range os.Args {
					if a == addr {
						expanded := es + absPath
						os.Args[i] = expanded
						return expanded
					}
				}
			}
		}
	}
	return addr
}

func getVfsConf(c *cli.Context, metaConf *meta.Config, format *meta.Format, chunkConf *chunk.Config) *vfs.Config {
	cfg := &vfs.Config{
		Meta:   metaConf,
		Format: *format,
		Security: &vfs.SecurityConfig{
			EnableCap:     c.Bool("enable-cap"),
			EnableSELinux: c.Bool("enable-selinux"),
		},
		Version:         version.Version(),
		Chunk:           chunkConf,
		BackupMeta:      utils.Duration(c.String("backup-meta")),
		BackupSkipTrash: c.Bool("backup-skip-trash"),
		Port:            &vfs.Port{DebugAgent: debugAgent, PyroscopeAddr: c.String("pyroscope")},
		PrefixInternal:  c.Bool("prefix-internal"),
		Pid:             os.Getpid(),
		PPid:            os.Getppid(),
		UMask:           0xFFFF,
		HideInternal:    c.Bool("hide-internal"),
	}

	if c.IsSet("umask") {
		umask, err := strconv.ParseUint(c.String("umask"), 8, 16)
		if err != nil {
			logger.Fatalf("invalid umask %s: %s", c.String("umask"), err)
		}
		cfg.UMask = uint16(umask)
	}

	skip_check := os.Getenv("SKIP_BACKUP_META_CHECK") == "true"
	if !skip_check && cfg.BackupMeta > 0 && cfg.BackupMeta < time.Minute*5 {
		logger.Fatalf("backup-meta should not be less than 5 minutes: %s", cfg.BackupMeta)
	}
	return cfg
}

func registerMetaMsg(m meta.Meta, store chunk.ChunkStore, chunkConf *chunk.Config) {
	m.OnMsg(meta.DeleteSlice, func(args ...interface{}) error {
		return store.Remove(args[0].(uint64), int(args[1].(uint32)))
	})
	m.OnMsg(meta.CompactChunk, func(args ...interface{}) error {
		return vfs.Compact(*chunkConf, store, args[0].([]meta.Slice), args[1].(uint64))
	})
}

func readConfig(mp string) ([]byte, error) {
	contents, err := os.ReadFile(filepath.Join(mp, ".jfs.config"))
	if os.IsNotExist(err) {
		contents, err = os.ReadFile(filepath.Join(mp, ".config"))
	}
	return contents, err
}

func getMetaConf(c *cli.Context, mp string, readOnly bool) *meta.Config {
	conf := meta.DefaultConf()
	conf.Retries = c.Int("io-retries")
	conf.MaxDeletes = c.Int("max-deletes")
	conf.SkipDirNlink = c.Int("skip-dir-nlink")
	conf.ReadOnly = readOnly
	conf.NoBGJob = c.Bool("no-bgjob")
	conf.OpenCache = utils.Duration(c.String("open-cache"))
	conf.OpenCacheLimit = c.Uint64("open-cache-limit")
	conf.Heartbeat = utils.Duration(c.String("heartbeat"))
	conf.MountPoint = mp
	conf.Subdir = c.String("subdir")
	conf.SkipDirMtime = utils.Duration(c.String("skip-dir-mtime"))
	conf.Sid, _ = strconv.ParseUint(os.Getenv("_JFS_META_SID"), 10, 64)
	conf.SortDir = c.Bool("sort-dir")
	conf.FastStatfs = c.Bool("fast-statfs")

	atimeMode := c.String("atime-mode")
	if atimeMode != meta.RelAtime && atimeMode != meta.StrictAtime && atimeMode != meta.NoAtime {
		logger.Warnf("unknown atime-mode \"%s\", changed to %s", atimeMode, meta.NoAtime)
		atimeMode = meta.NoAtime
	}
	conf.AtimeMode = atimeMode

	// Parse network interfaces
	if ifaces := c.String("network-interfaces"); ifaces != "" {
		conf.NetworkInterfaces = strings.Split(ifaces, ",")
		// Trim whitespace from each interface name
		for i := range conf.NetworkInterfaces {
			conf.NetworkInterfaces[i] = strings.TrimSpace(conf.NetworkInterfaces[i])
		}
	}

	return conf
}

func getChunkConf(c *cli.Context, format *meta.Format) *chunk.Config {
	cm, err := strconv.ParseUint(c.String("cache-mode"), 8, 32)
	if err != nil {
		logger.Warnf("Invalid cache-mode %s, using default value 0600", c.String("cache-mode"))
		cm = 0600
	}
	chunkConf := &chunk.Config{
		BlockSize:  format.BlockSize * 1024,
		Compress:   format.Compression,
		HashPrefix: format.HashPrefix,

		GetTimeout:             utils.Duration(c.String("get-timeout")),
		PutTimeout:             utils.Duration(c.String("put-timeout")),
		MaxUpload:              c.Int("max-uploads"),
		MaxDownload:            c.Int("max-downloads"),
		MaxStageWrite:          c.Int("max-stage-write"),
		MaxRetries:             c.Int("io-retries"),
		Writeback:              c.Bool("writeback"),
		WritebackThresholdSize: int(utils.ParseBytes(c, "writeback-threshold-size", 'B')),
		Prefetch:               c.Int("prefetch"),
		BufferSize:             utils.ParseBytes(c, "buffer-size", 'M'),
		UploadLimit:            utils.ParseMbps(c, "upload-limit") * 1e6 / 8,
		DownloadLimit:          utils.ParseMbps(c, "download-limit") * 1e6 / 8,
		UploadDelay:            utils.Duration(c.String("upload-delay")),
		UploadHours:            c.String("upload-hours"),

		CacheDir:          c.String("cache-dir"),
		CacheSize:         utils.ParseBytes(c, "cache-size", 'M'),
		CacheItems:        c.Int64("cache-items"),
		FreeSpace:         float32(c.Float64("free-space-ratio")),
		CacheMode:         os.FileMode(cm),
		CacheFullBlock:    !c.Bool("cache-partial-only"),
		CacheLargeWrite:   c.Bool("cache-large-write"),
		CacheChecksum:     c.String("verify-cache-checksum"),
		CacheEviction:     c.String("cache-eviction"),
		CacheScanInterval: utils.Duration(c.String("cache-scan-interval")),
		CacheExpire:       utils.Duration(c.String("cache-expire")),
		OSCache:           os.Getenv("JFS_DROP_OSCACHE") == "",
		AutoCreate:        true,
	}
	if c.IsSet("max-readahead") {
		chunkConf.Readahead = int(utils.ParseBytes(c, "max-readahead", 'M'))
	} else {
		chunkConf.Readahead = 8 * chunkConf.BlockSize
	}

	if chunkConf.UploadLimit == 0 {
		chunkConf.UploadLimit = format.UploadLimit * 1e6 / 8
	}
	if chunkConf.DownloadLimit == 0 {
		chunkConf.DownloadLimit = format.DownloadLimit * 1e6 / 8
	}
	chunkConf.SelfCheck(format.UUID)
	return chunkConf
}

func initBackgroundTasks(c *cli.Context, vfsConf *vfs.Config, metaConf *meta.Config, m meta.Meta, blob object.ObjectStorage, registerer prometheus.Registerer, registry *prometheus.Registry) {
	metricsAddr := exposeMetrics(c, registerer, registry)
	m.InitMetrics(registerer)
	if !metaConf.NoBGJob {
		m.InitSharedMetrics(registerer)
	}
	vfs.InitMetrics(registerer)
	vfsConf.Port.PrometheusAgent = metricsAddr
	if c.IsSet("consul") {
		metadata := make(map[string]string)
		metadata["mountPoint"] = vfsConf.Meta.MountPoint
		metric.RegisterToConsul(c.String("consul"), metricsAddr, metadata)
		vfsConf.Port.ConsulAddr = c.String("consul")
	}
	if !metaConf.ReadOnly && !metaConf.NoBGJob && vfsConf.BackupMeta > 0 {
		registerer.MustRegister(vfs.LastBackupTimeG)
		registerer.MustRegister(vfs.LastBackupDurationG)
		go vfs.Backup(m, blob, vfsConf.BackupMeta, vfsConf.BackupSkipTrash)
	} else {
		logger.Warnf("Metadata backup is disabled")
	}
	if !c.Bool("no-usage-report") {
		go usage.ReportUsage(m, version.Version())
	}
}

type storageHolder struct {
	object.ObjectStorage
	fmt meta.Format
}

func (h *storageHolder) Shutdown() {
	object.Shutdown(h.ObjectStorage)
}

func NewReloadableStorage(format *meta.Format, cli meta.Meta, patch func(*meta.Format)) (object.ObjectStorage, error) {
	if patch != nil {
		patch(format)
	}
	blob, err := createStorage(*format)
	if err != nil {
		return nil, err
	}
	holder := &storageHolder{
		ObjectStorage: blob,
		fmt:           *format, // keep a copy to find the change
	}
	cli.OnReload(func(new *meta.Format) {
		if patch != nil {
			patch(new)
		}
		old := &holder.fmt
		if new.Storage != old.Storage || new.Bucket != old.Bucket || new.AccessKey != old.AccessKey || new.SecretKey != old.SecretKey || new.SessionToken != old.SessionToken || new.StorageClass != old.StorageClass {
			logger.Infof("found new configuration: storage=%s bucket=%s ak=%s storageClass=%s", new.Storage, new.Bucket, new.AccessKey, new.StorageClass)

			newBlob, err := createStorage(*new)
			if err != nil {
				logger.Warnf("object storage: %s", err)
				return
			}
			holder.ObjectStorage = newBlob
			holder.fmt = *new
		}
	})
	return holder, nil
}

func insideContainer() bool {
	if _, err := os.Stat("/.dockerenv"); err == nil {
		return true
	}
	mountinfo, err := os.Open("/proc/1/mountinfo")
	if err != nil {
		if os.IsNotExist(err) {
			return false
		} else {
			logger.Warnf("Open /proc/1/mountinfo: %s", err)
			return false
		}
	}
	defer mountinfo.Close()
	scanner := bufio.NewScanner(mountinfo)
	for scanner.Scan() {
		line := scanner.Text()
		fields := strings.Fields(line)
		if len(fields) > 8 && fields[4] == "/" {
			fstype := fields[8]
			return strings.Contains(fstype, "overlay") || strings.Contains(fstype, "aufs")
		}
	}
	if err = scanner.Err(); err != nil {
		logger.Warnf("scan /proc/1/mountinfo: %s", err)
	}
	return false
}

func getDefaultLogDir() string {
	var defaultLogDir = "/var/log"
	switch runtime.GOOS {
	case "linux":
		if os.Getuid() == 0 {
			break
		}
		fallthrough
	case "darwin":
		homeDir, err := os.UserHomeDir()
		if err != nil {
			logger.Warn(err)
			homeDir = defaultLogDir
		}
		defaultLogDir = path.Join(homeDir, ".juicefs")
	case "windows":
		homeDir, err := os.UserHomeDir()
		if err != nil {
			logger.Fatalf("%v", err)
		}
		defaultLogDir = path.Join(homeDir, ".juicefs")
	}
	return defaultLogDir
}

func mount(c *cli.Context) error {
	setup(c, 2)
	addr := c.Args().Get(0)
	removePassword(addr)
	mp := c.Args().Get(1)

	stage := getDaemonStage()
	if stage < 0 || stage > 2 {
		logger.Fatalf("Invalid daemon stage: %d", stage)
	}
	supervisor := os.Getenv("JFS_SUPERVISOR")
	if supervisor != "" || runtime.GOOS == "windows" {
		stage = 3
	}

	var err error
	if stage == 0 || supervisor == "test" {
		err = utils.WithTimeout(context.TODO(), func(context.Context) error {
			mp, err = filepath.Abs(mp)
			return err
		}, time.Second*3)
		if err != nil {
			logger.Fatalf("abs %s: %s", mp, err)
		}
		if mp == "/" {
			logger.Fatalf("should not mount on the root directory")
		}
		prepareMp(mp)
		if runtime.GOOS == "linux" && c.Bool("update-fstab") && !calledViaMount(os.Args) && !insideContainer() {
			if os.Getuid() != 0 {
				logger.Warnf("--update-fstab should be used with root")
			} else {
				var e1, e2 error
				if e1 = tryToInstallMountExec(); e1 != nil {
					logger.Warnf("failed to create /sbin/mount.juicefs: %s", e1)
				}
				if e2 = updateFstab(c); e2 != nil {
					logger.Warnf("failed to update fstab: %s", e2)
				}
				if e1 == nil && e2 == nil {
					logger.Infof("Successfully updated fstab, now you can mount with `mount %s`", mp)
				}
			}
		}
	}

	var format = &meta.Format{}
	var metaCli meta.Meta
	var blob object.ObjectStorage
	metaConf := getMetaConf(c, mp, c.Bool("read-only") || utils.StringContains(strings.Split(c.String("o"), ","), "ro"))
	if runtime.GOOS == "windows" {
		metaConf.CaseInsensi = !c.Bool("case-sensitive")
	}
	// stage 0: check the connection to fail fast
	// stage 2: need the volume name to check if it's already mounted
	// stage 3: the real service process
	if stage != 1 {
		metaCli = meta.NewClient(addr, metaConf)
		format, err = metaCli.Load(true)
		if err != nil {
			return err
		}
	}

	chunkConf := getChunkConf(c, format)
	vfsConf := getVfsConf(c, metaConf, format, chunkConf)
	setFuseOption(c, format, vfsConf)
	if stage == 0 || stage == 3 {
		blob, err = NewReloadableStorage(format, metaCli, updateFormat(c))
		if err != nil {
			return fmt.Errorf("object storage: %s", err)
		}
		logger.Infof("Data use %s", blob)

	}

	if stage < 3 {
		// supervisor serves no user request
		if metaCli != nil {
			if err = metaCli.Shutdown(); err != nil {
				logger.Errorf("[pid=%d] meta shutdown: %s", os.Getpid(), err)
			}
		}
		if blob != nil {
			// test storage at startup to fail fast instead of throwing EIO in the middle of user's workload
			if c.Bool("check-storage") {
				start := time.Now()
				if err = test(blob); err != nil {
					logger.Errorf("Object storage test failed: %s", err)
					return err
				} else {
					logger.Infof("Object storage test passed in %s", time.Since(start))
				}
			}
			object.Shutdown(blob)
		}
		var foreground bool
		if runtime.GOOS == "windows" || !c.Bool("background") || os.Getenv("JFS_FOREGROUND") != "" {
			foreground = true
		} else if c.Bool("background") || os.Getenv("__DAEMON_STAGE") != "" {
			foreground = false
		} else {
			foreground = os.Getppid() == 1 && !insideContainer()
		}
		if foreground {
			go checkMountpoint(format.Name, mp, c.String("log"), false)
		} else {
			daemonRun(c, addr, vfsConf) // only stage 0 needs the vfsConf
		}
		os.Setenv("JFS_SUPERVISOR", strconv.Itoa(os.Getppid()))
		return launchMount(c, mp, vfsConf)
	} else if runtime.GOOS == "windows" && c.Bool("background") {
		daemonRun(c, addr, vfsConf)
		return nil
	}
	logger.Infof("JuiceFS version %s", version.Version())

	if commPath := os.Getenv("_FUSE_FD_COMM"); commPath != "" {
		vfsConf.CommPath = commPath
		vfsConf.StatePath = fmt.Sprintf("/tmp/state%d.json", os.Getppid())
	}

	if st := metaCli.Chroot(meta.Background(), metaConf.Subdir); st != 0 {
		return st
	}
	// Wrap the default registry, all prometheus.MustRegister() calls should be afterwards
	registerer, registry := wrapRegister(c, mp, format.Name)

	store := chunk.NewCachedStore(blob, *chunkConf, registerer)
	registerMetaMsg(metaCli, store, chunkConf)

	err = metaCli.NewSession(true)
	if err != nil {
		logger.Fatalf("new session: %s", err)
	}

	metaCli.OnReload(func(fmt *meta.Format) {
		updateFormat(c)(fmt)
		store.UpdateLimit(fmt.UploadLimit, fmt.DownloadLimit)
	})
	v := vfs.NewVFS(vfsConf, metaCli, store, registerer, registry)
	installHandler(metaCli, mp, v, blob)
	v.UpdateFormat = updateFormat(c)
	initBackgroundTasks(c, vfsConf, metaConf, metaCli, blob, registerer, registry)
	mountMain(v, c)
	if err := v.FlushAll(""); err != nil {
		logger.Errorf("flush all delayed data: %s", err)
	}
	err = metaCli.CloseSession()
	object.Shutdown(blob)
	logger.Infof("The juicefs mount process exit successfully, mountpoint: %s", metaConf.MountPoint)
	return err
}


================================================
FILE: cmd/mount_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"reflect"
	"runtime"
	"strings"
	"sync"
	"syscall"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/version"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"

	"github.com/agiledragon/gomonkey/v2"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/redis/go-redis/v9"
	"github.com/urfave/cli/v2"
)

const testMeta = "redis://127.0.0.1:6379/11"
const testMountPoint = "/tmp/jfs-unit-test"
const testVolume = "test"

// gomonkey may encounter the problem of insufficient permissions under mac, please solve it by viewing this link https://github.com/agiledragon/gomonkey/issues/70
func Test_exposeMetrics(t *testing.T) {
	addr := "redis://127.0.0.1:6379/12"
	client := meta.NewClient(addr, nil)
	format := &meta.Format{
		Name:      "test",
		BlockSize: 4096,
		Capacity:  1 << 30,
		DirStats:  true,
	}
	_ = client.Init(format, true)
	var appCtx *cli.Context
	stringPatches := gomonkey.ApplyMethod(reflect.TypeOf(appCtx), "String", func(_ *cli.Context, arg string) string {
		switch arg {
		case "metrics":
			return "127.0.0.1:9567"
		case "consul":
			return "127.0.0.1:8500"
		case "custom-labels":
			return "key1:value1"
		default:
			return ""
		}
	})
	isSetPatches := gomonkey.ApplyMethod(reflect.TypeOf(appCtx), "IsSet", func(_ *cli.Context, arg string) bool {
		switch arg {
		case "custom-labels":
			return true
		default:
			return false
		}
	})
	defer stringPatches.Reset()
	defer isSetPatches.Reset()
	ResetHttp()
	registerer, registry := wrapRegister(appCtx, "test", "test")
	metricsAddr := exposeMetrics(appCtx, registerer, registry)
	client.InitMetrics(registerer)
	vfs.InitMetrics(registerer)
	u := url.URL{Scheme: "http", Host: metricsAddr, Path: "/metrics"}
	resp, err := http.Get(u.String())
	require.Nil(t, err)
	all, err := io.ReadAll(resp.Body)
	require.Nil(t, err)
	require.NotEmpty(t, all)
	require.Contains(t, string(all), `key1="value1"`)
}

func ResetHttp() {
	http.DefaultServeMux = http.NewServeMux()
}

func resetTestMeta() *redis.Client { // using Redis
	opt, _ := redis.ParseURL(testMeta)
	rdb := redis.NewClient(opt)
	_ = rdb.FlushDB(context.Background())
	return rdb
}

var mountLock sync.Mutex

func mountTemp(t *testing.T, bucket *string, extraFormatOpts []string, extraMountOpts []string) {
	// wait for last mount exit
	for !mountLock.TryLock() {
		time.Sleep(100 * time.Millisecond)
	}

	_ = resetTestMeta()
	testDir := t.TempDir()
	if bucket != nil {
		*bucket = testDir
	}
	formatArgs := []string{"", "format", "--bucket", testDir, testMeta, testVolume}
	if extraFormatOpts != nil {
		formatArgs = append(formatArgs, extraFormatOpts...)
	}
	if err := Main(formatArgs); err != nil {
		t.Fatalf("format failed: %s", err)
	}

	// must do reset, otherwise will panic
	ResetHttp()

	os.Setenv("JFS_SUPERVISOR", "test")
	mountArgs := []string{"", "mount", "--enable-xattr", testMeta, testMountPoint, "--attr-cache", "0", "--entry-cache", "0", "--dir-entry-cache", "0", "--no-usage-report"}
	if extraMountOpts != nil {
		mountArgs = append(mountArgs, extraMountOpts...)
	}
	go func() {
		defer mountLock.Unlock()
		if err := Main(mountArgs); err != nil {
			t.Errorf("mount failed: %s", err)
		}
	}()
	time.Sleep(3 * time.Second)
	inode, err := utils.GetFileInode(testMountPoint)
	if err != nil {
		t.Fatalf("get file inode failed: %s", err)
	}
	if inode != 1 {
		t.Fatalf("mount failed: inode of %s got %d, expect 1", testMountPoint, inode)
	} else {
		t.Logf("mount %s success", testMountPoint)
	}
}

func umountTemp(t *testing.T) {
	if err := Main([]string{"", "umount", testMountPoint}); err != nil {
		t.Fatalf("umount failed: %s", err)
	}
}

func TestMount(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	if err := os.WriteFile(fmt.Sprintf("%s/f1.txt", testMountPoint), []byte("test"), 0644); err != nil {
		t.Fatalf("write file failed: %s", err)
	}
}

func TestFtruncate(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	fpath := fmt.Sprintf("%s/f1.txt", testMountPoint)
	if err := os.WriteFile(fpath, []byte("test"), 0644); err != nil {
		t.Fatalf("write file failed: %s", err)
	}
	file, err := os.OpenFile(fpath, os.O_RDWR, 0644)
	if err != nil {
		t.Fatalf("open file failed: %s", err)
	}
	if err = syscall.Ftruncate(int(file.Fd()), 1024); err != nil {
		t.Fatalf("ftruncate failed: %s", err)
	}
	fileInfo, err := os.Stat(fpath)
	if err != nil {
		t.Fatalf("stat file failed: %s", err)
	}
	if fileInfo.Size() != 1024 {
		t.Fatalf("ftruncate failed: file size is %d, expect 1024", fileInfo.Size())
	}
	if err = os.Remove(fpath); err != nil {
		t.Fatalf("remove file failed: %s", err)
	}
	if _, err = os.Stat(fpath); !errors.Is(err, syscall.ENOENT) {
		t.Fatalf("file still exists after delete: %s", err)
	}
	err = syscall.Ftruncate(int(file.Fd()), 2048)
	if err != nil {
		t.Fatalf("ftruncate failed: %s", err)
	}
	file.Close()
	_, err = os.Stat(fpath)
	if !errors.Is(err, syscall.ENOENT) {
		t.Fatalf("file still exists after close: %s", err)
	}
}
func TestUpdateFstab(t *testing.T) {
	if runtime.GOOS != "linux" {
		t.SkipNow()
	}
	mockFstab, err := os.CreateTemp("/tmp", "fstab")
	if err != nil {
		t.Fatalf("cannot make temp file: %s", err)
	}
	defer os.Remove(mockFstab.Name())

	patches := gomonkey.ApplyFunc(os.Rename, func(src, dest string) error {
		content, err := os.ReadFile(mockFstab.Name())
		if err != nil {
			t.Fatalf("error reading mocked fstab: %s", err)
		}
		rv := "redis://127.0.0.1:6379/11 /tmp/jfs-unit-test juicefs _netdev,enable-xattr,entry-cache=2,max-uploads=3,max_read=99,no-usage-report,writeback 0 0"
		lv := strings.TrimSpace(string(content))
		if lv != rv {
			t.Fatalf("incorrect fstab entry: %s", content)
		}
		return os.Rename(src, dest)
	})
	defer patches.Reset()
	mountArgs := []string{"juicefs", "mount", "--enable-xattr", testMeta, testMountPoint, "--no-usage-report"}
	mountOpts := []string{"--update-fstab", "--writeback", "--entry-cache=2", "--max-uploads", "3", "-o", "max_read=99"}
	patches = gomonkey.ApplyGlobalVar(&os.Args, append(mountArgs, mountOpts...))
	defer patches.Reset()
	mountTemp(t, nil, nil, mountOpts)
	defer umountTemp(t)
}

func TestUmount(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	umountTemp(t)

	inode, err := utils.GetFileInode(testMountPoint)
	if err != nil {
		t.Fatalf("get file inode failed: %s", err)
	}
	if inode == 1 {
		t.Fatalf("umount failed: inode of %s is 1", testMountPoint)
	}
}

func tryMountTemp(t *testing.T, bucket *string, extraFormatOpts []string, extraMountOpts []string) error {
	// wait for last mount exit
	for !mountLock.TryLock() {
		time.Sleep(100 * time.Millisecond)
	}

	_ = resetTestMeta()
	testDir := t.TempDir()
	if bucket != nil {
		*bucket = testDir
	}
	formatArgs := []string{"", "format", "--bucket", testDir, testMeta, testVolume}
	if extraFormatOpts != nil {
		formatArgs = append(formatArgs, extraFormatOpts...)
	}
	if err := Main(formatArgs); err != nil {
		return fmt.Errorf("format failed: %w", err)
	}

	// must do reset, otherwise will panic
	ResetHttp()

	mountArgs := []string{"", "mount", "--enable-xattr", testMeta, testMountPoint, "--attr-cache", "0", "--entry-cache", "0", "--dir-entry-cache", "0", "--no-usage-report"}
	if extraMountOpts != nil {
		mountArgs = append(mountArgs, extraMountOpts...)
	}

	os.Setenv("JFS_SUPERVISOR", "test")
	errChan := make(chan error, 1)
	go func() {
		defer mountLock.Unlock()
		errChan <- Main(mountArgs)
	}()

	select {
	case err := <-errChan:
		if err != nil {
			return fmt.Errorf("mount failed: %w", err)
		}
	case <-time.After(3 * time.Second):
	}

	inode, err := utils.GetFileInode(testMountPoint)
	if err != nil {
		return fmt.Errorf("get file inode failed: %w", err)
	}
	if inode != 1 {
		return fmt.Errorf("mount failed: inode of %s is %d, expect 1", testMountPoint, inode)
	}
	t.Logf("mount %s success", testMountPoint)
	return nil
}

func TestMountVersionMatch(t *testing.T) {
	oriVersion := version.Version()
	version.SetVersion("1.1.0")
	defer version.SetVersion(oriVersion)

	err := tryMountTemp(t, nil, nil, nil)
	assert.Nil(t, err)
	umountTemp(t)

	err = tryMountTemp(t, nil, []string{"--enable-acl=true"}, nil)
	assert.Contains(t, err.Error(), "check version")
}

func TestParseUIDGID(t *testing.T) {
	tests := []struct {
		input       string
		defaultUid  uint32
		defaultGid  uint32
		expectedUid uint32
		expectedGid uint32
	}{
		{"1000:1000", 65534, 65534, 1000, 1000},
		{"1000:", 65534, 65534, 1000, 65534},
		{":1000", 65534, 65534, 65534, 1000},
		{"", 65534, 65534, 65534, 65534},
		{"0:1000", 65534, 65534, 65534, 1000},
		{"1000:0", 65534, 65534, 1000, 65534},
	}

	for _, tt := range tests {
		uid, gid := parseUIDGID(tt.input, tt.defaultUid, tt.defaultGid)
		if uid != tt.expectedUid || gid != tt.expectedGid {
			t.Errorf("parseUIDGID(%q) = (%d, %d), want (%d, %d)", tt.input, uid, gid, tt.expectedUid, tt.expectedGid)
		}
	}
}


================================================
FILE: cmd/mount_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bufio"
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net"
	"net/http"
	"os"
	"os/exec"
	"os/signal"
	"os/user"
	"path"
	"path/filepath"
	"reflect"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/juicedata/godaemon"
	"github.com/urfave/cli/v2"

	"github.com/juicedata/juicefs/pkg/fuse"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/juicedata/juicefs/pkg/vfs"
)

var mountPid int

func showThreadStack(agentAddr string) {
	if agentAddr == "" {
		return
	}
	client := http.Client{
		Timeout: 10 * time.Second,
	}
	resp, err := client.Get(fmt.Sprintf("http://%s/debug/pprof/goroutine?debug=2", agentAddr))
	if err != nil {
		logger.Warnf("list goroutine from %s: %s", agentAddr, err)
	} else {
		grs, _ := io.ReadAll(resp.Body)
		logger.Infof("list goroutines from %s:\n%s", agentAddr, string(grs))
		_ = resp.Body.Close()
	}
}

// devMinor returns the minor component of a Linux device number.
func devMinor(dev uint64) uint32 {
	minor := dev & 0xff
	minor |= (dev >> 12) & 0xffffff00
	return uint32(minor)
}

func killMountProcess(pid int, dev uint64, lastActive *int64) {
	if pid > 0 {
		logger.Infof("watchdog: kill %d", pid)
		err := syscall.Kill(pid, syscall.SIGABRT)
		if err != nil {
			logger.Warnf("kill %d: %s", pid, err)
			_ = syscall.Kill(pid, syscall.SIGKILL)
		}
		// double check
		time.Sleep(time.Second * 10)
		if atomic.LoadInt64(lastActive)+30 > time.Now().Unix() {
			return
		}
	}
	if runtime.GOOS == "linux" && dev > 0 {
		tids, _ := os.ReadDir(fmt.Sprintf("/proc/%d/task", pid))
		for _, tid := range tids {
			stack, err := os.ReadFile(fmt.Sprintf("/proc/%d/task/%s/stack", pid, tid))
			if err == nil && bytes.Contains(stack, []byte("fuse_simple_request")) {
				logger.Errorf("find deadlock in mount process, abort it: %s", string(stack))
				if fuseFd > 0 {
					_ = syscall.Close(fuseFd)
					fuseFd = 0
				}
				f, err := os.OpenFile(fmt.Sprintf("/sys/fs/fuse/connections/%d/abort", devMinor(dev)), os.O_WRONLY, 0777)
				if err != nil {
					logger.Warn(err)
				} else {
					_, _ = f.WriteString("1")
					_ = f.Close()
				}
				break
			}
		}
	}
}

func loadConfig(path string) (string, *vfs.Config, error) {
	for d := path; d != "/"; d = filepath.Dir(d) {
		data, err := readConfig(d)
		if err == nil {
			var conf vfs.Config
			err = json.Unmarshal(data, &conf)
			return d, &conf, err
		}
		if !os.IsNotExist(err) {
			return "", nil, fmt.Errorf("read %s: %w", d, err)
		}
	}
	return "", nil, fmt.Errorf("%s is not inside JuiceFS", path)
}

func watchdog(ctx context.Context, mp string) {
	var lastActive int64
	var pid int
	var agentAddr string
	var dev uint64
	go func() {
		time.Sleep(time.Millisecond * 100) // wait for child process
		atomic.StoreInt64(&lastActive, time.Now().Unix())
		for ctx.Err() == nil {
			var confName = ".config"
			if !vfs.IsSpecialName(confName) {
				confName = ".jfs" + confName
			}
			var confStat syscall.Stat_t
			err := syscall.Stat(filepath.Join(mp, confName), &confStat)
			ino, _ := vfs.GetInternalNodeByName(confName)
			if err == nil && confStat.Ino == uint64(ino) {
				if dev == 0 && runtime.GOOS == "linux" {
					var st syscall.Stat_t
					if err := syscall.Stat(mp, &st); err == nil && st.Ino == 1 {
						dev = uint64(st.Dev)
					}
				}
				if pid == 0 {
					_, conf, err := loadConfig(mp)
					if err == nil {
						logger.Infof("watching %s, pid %d", mp, conf.Pid)
						pid = conf.Pid
						agentAddr = conf.Port.DebugAgent
					} else {
						logger.Warnf("load config: %s", err)
						continue
					}
				}
			}
			atomic.StoreInt64(&lastActive, time.Now().Unix())
			time.Sleep(time.Second * 5)
		}
	}()
	for ctx.Err() == nil {
		now := time.Now().Unix()
		if atomic.LoadInt64(&lastActive)+30 < now {
			showThreadStack(agentAddr)
			time.Sleep(time.Second * 30)
			// double check
			if atomic.LoadInt64(&lastActive)+60 < time.Now().Unix() && ctx.Err() == nil {
				logger.Infof("mount point %s is not active for %s", mp, time.Since(time.Unix(atomic.LoadInt64(&lastActive), 0)))
				showThreadStack(agentAddr)
				killMountProcess(pid, dev, &lastActive)
				atomic.StoreInt64(&lastActive, time.Now().Unix())
				pid = 0
				dev = 0
			}
		}
		time.Sleep(time.Second * 10)
	}
}

// parseFuseFd checks if `mountPoint` is the special form /dev/fd/N (with N >= 0),
// and returns N in this case. Returns -1 otherwise.
func parseFuseFd(mountPoint string) (fd int) {
	dir, file := path.Split(mountPoint)
	if dir != "/dev/fd/" {
		return -1
	}
	fd, err := strconv.Atoi(file)
	if err != nil || fd <= 0 {
		return -1
	}
	return fd
}

func checkMountpoint(name, mp, logPath string, background bool) {
	if parseFuseFd(mp) > 0 {
		logger.Infof("\033[92mOK\033[0m, %s with special mount point %s", name, mp)
		return
	}
	_, oldConf, _ := loadConfig(mp)
	mountTimeOut := 10 // default 10 seconds
	interval := 500    // check every 500 Millisecond
	if tStr, ok := os.LookupEnv("JFS_MOUNT_TIMEOUT"); ok {
		if t, err := strconv.ParseInt(tStr, 10, 64); err == nil {
			mountTimeOut = int(t)
		} else {
			logger.Errorf("invalid env JFS_MOUNT_TIMEOUT: %s %s", tStr, err)
		}
	}
	for i := 0; i < mountTimeOut*1000/interval; i++ {
		time.Sleep(time.Duration(interval) * time.Millisecond)
		st, err := os.Stat(mp)
		if err == nil {
			if sys, ok := st.Sys().(*syscall.Stat_t); ok && sys.Ino == uint64(meta.RootInode) {
				// in pod, pid probably the same
				if csiCommPath == "" && oldConf != nil {
					_, newConf, _ := loadConfig(mp)
					if newConf == nil || newConf.Pid == oldConf.Pid {
						continue
					}
				}
				logger.Infof("\033[92mOK\033[0m, %s is ready at %s", name, mp)
				return
			}
		}
		_, _ = os.Stdout.WriteString(".")
		_ = os.Stdout.Sync()
	}
	_, _ = os.Stdout.WriteString("\n")
	mountDesc := "mount process is not started yet"
	if mountPid != 0 {
		mountDesc = fmt.Sprintf("tried to kill mount process %d", mountPid)
		_ = syscall.Kill(mountPid, syscall.SIGABRT) // Kill and show stack trace
	}
	if background {
		logger.Fatalf("The mount point is not ready in %d seconds (%s), please check the log (%s) or re-mount in foreground", mountTimeOut, mountDesc, logPath)
	} else {
		logger.Fatalf("The mount point is not ready in %d seconds (%s), exit it", mountTimeOut, mountDesc)
	}
}

func checkSvcPort(address string) {
	mountTimeOut := 10
	interval := 500
	for i := 0; i < mountTimeOut*1000/interval; i++ {
		time.Sleep(time.Duration(interval) * time.Millisecond)
		conn, err := net.DialTimeout("tcp", address, 500*time.Millisecond)
		if err == nil {
			_ = conn.Close()
			logger.Infof("\033[92mOK\033[0m, service is ready on %s", address)
			return
		}
		_, _ = os.Stdout.WriteString(".")
		_ = os.Stdout.Sync()
	}
	_, _ = os.Stdout.WriteString("\n")
	logger.Fatalf("The service is not ready in %d seconds, please check the log or restart in foreground", mountTimeOut)
}

func makeDaemonForSvc(c *cli.Context, m meta.Meta, metaUrl, listenAddr string) error {
	cacheDirPathToAbs(c)
	_ = expandPathForEmbedded(metaUrl)

	var attrs godaemon.DaemonAttr
	logfile := c.String("log")
	attrs.OnExit = func(stage int) error {
		if stage == 0 {
			checkSvcPort(listenAddr)
		}
		return nil
	}

	if godaemon.Stage() == 0 {
		var err error
		attrs.Stdout, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
		if err != nil {
			logger.Errorf("open log file %s: %s", logfile, err)
		} else {
			logger.Infof("open log file %s", logfile)
		}

		conn, err := net.DialTimeout("tcp", listenAddr, 500*time.Millisecond)
		if err == nil {
			_ = conn.Close()
			logger.Fatalf("unable to start the server: %s is already in use", listenAddr)
		}
	}
	if godaemon.Stage() <= 1 {
		err := m.Shutdown()
		if err != nil {
			logger.Errorf("shutdown: %s", err)
		}
	}
	_, _, err := godaemon.MakeDaemon(&attrs)
	return err
}

func getDaemonStage() int {
	return int(godaemon.Stage())
}

func fuseFlags() []cli.Flag {
	return addCategories("FUSE", []cli.Flag{
		&cli.BoolFlag{
			Name:  "enable-xattr",
			Usage: "enable extended attributes (xattr)",
		},
		&cli.BoolFlag{
			Name:  "enable-cap",
			Usage: "enable security.capability xattr",
		},
		&cli.BoolFlag{
			Name:  "enable-selinux",
			Usage: "enable security.selinux xattr",
		},
		&cli.BoolFlag{
			Name:  "enable-ioctl",
			Usage: "enable ioctl (support GETFLAGS/SETFLAGS only)",
		},
		&cli.StringFlag{
			Name:  "root-squash",
			Usage: "mapping local root user (uid = 0) to another one specified as <uid>:<gid>",
		},
		&cli.StringFlag{
			Name:  "all-squash",
			Usage: "mapping all users to another one specified as <uid>:<gid>",
		},
		&cli.BoolFlag{
			Name:  "prefix-internal",
			Usage: "add '.jfs' prefix to all internal files",
		},
		&cli.BoolFlag{
			Name:   "non-default-permission",
			Usage:  "disable `default_permissions` option, only for testing",
			Hidden: true,
		},
		&cli.StringFlag{
			Name:  "max-fuse-io",
			Usage: "maximum size for fuse request",
			Value: "128K",
		},
		&cli.StringFlag{
			Name:  "umask",
			Usage: "umask for new files and directories in octal (overwrite the one from app)",
		},
		&cli.StringFlag{
			Name:  "o",
			Usage: "other FUSE options",
		},
	})
}

func mountFlags() []cli.Flag {
	selfFlags := []cli.Flag{
		&cli.BoolFlag{
			Name:    "f",
			Aliases: []string{"foreground"},
			Hidden:  true,
			Usage:   "run in foreground",
		},
		&cli.BoolFlag{
			Name:    "d",
			Aliases: []string{"background"},
			Usage:   "run in background",
		},
		&cli.BoolFlag{
			Name:  "no-syslog",
			Usage: "disable syslog",
		},
		&cli.StringFlag{
			Name:  "log",
			Value: path.Join(getDefaultLogDir(), "juicefs.log"),
			Usage: "path of log file when running in background",
		},
		&cli.BoolFlag{
			Name:  "force",
			Usage: "force to mount even if the mount point is already mounted by the same filesystem",
		},
		&cli.BoolFlag{
			Name:  "hide-internal",
			Usage: "hide all internal files (.accesslog, .stats, etc.)",
		},
	}
	if runtime.GOOS == "linux" {
		selfFlags = append(selfFlags, &cli.BoolFlag{
			Name:  "update-fstab",
			Usage: "add / update entry in /etc/fstab, will create a symlink from /sbin/mount.juicefs to JuiceFS executable if not existing",
		})
		selfFlags = append(selfFlags, &cli.BoolFlag{
			Name:  "disable-transparent-hugepage",
			Usage: "disable transparent huge page to avoid latency spikes caused by kernel's memory compaction",
		})
	}
	return append(selfFlags, fuseFlags()...)
}

func disableUpdatedb() {
	path := "/etc/updatedb.conf"
	file, err := os.Open(path)
	if err != nil {
		return
	}
	defer file.Close()

	// obtain exclusive and not block flock
	if err := syscall.Flock(int(file.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil {
		if err == syscall.EAGAIN {
			return
		}
	} else {
		defer func() {
			// release flock
			_ = syscall.Flock(int(file.Fd()), syscall.LOCK_UN)
		}()
	}

	data, err := io.ReadAll(file)
	if err != nil {
		return
	}
	fstype := "fuse.juicefs"
	if bytes.Contains(data, []byte(fstype)) {
		return
	}
	// assume that fuse.sshfs is already in PRUNEFS
	knownFS := "fuse.sshfs"
	p1 := bytes.Index(data, []byte("PRUNEFS"))
	p2 := bytes.Index(data, []byte(knownFS))
	if p1 > 0 && p2 > p1 {
		var nd []byte
		nd = append(nd, data[:p2]...)
		nd = append(nd, fstype...)
		nd = append(nd, ' ')
		nd = append(nd, data[p2:]...)
		err = os.WriteFile(path, nd, 0644)
		if err != nil {
			logger.Warnf("update %s: %s", path, err)
		} else {
			logger.Infof("Add %s into PRUNEFS of %s", fstype, path)
		}
	}
}

func getFuserMountVersion() string {
	var version = "0.0.0"
	out, _ := exec.Command("fusermount", "-V").CombinedOutput()
	ps := strings.Split(string(out), ":")
	if len(ps) > 1 {
		return strings.TrimSpace(ps[1])
	}
	return version
}

func setFuseOption(c *cli.Context, format *meta.Format, vfsConf *vfs.Config) {
	rawOpts, mt, noxattr, noacl, maxWrite := genFuseOptExt(c, format)
	options := vfs.FuseOptions(fuse.GenFuseOpt(vfsConf, rawOpts, mt, noxattr, noacl, maxWrite))
	vfsConf.FuseOpts = &options
}

func genFuseOpt(c *cli.Context, name string) string {
	fuseOpt := c.String("o")
	// todo: remove ?
	prefix := os.Getenv("FSTAB_NAME_PREFIX")
	if prefix == "" {
		prefix = "JuiceFS:"
	}
	fuseOpt += ",fsname=" + prefix + name
	if c.Bool("allow-other") || os.Getuid() == 0 && !strings.Contains(fuseOpt, "allow_other") {
		fuseOpt += ",allow_other"
	}
	switch runtime.GOOS {
	case "darwin":
		fuseOpt += ",allow_recursion"
	case "linux":
		// nonempty has been removed since 3.0.0
		if getFuserMountVersion() < "3.0.0" {
			fuseOpt += ",nonempty"
		}
	}
	fuseOpt = strings.TrimLeft(fuseOpt, ",")
	return fuseOpt
}

func prepareMp(mp string) {
	if csiCommPath != "" {
		return
	}
	var fi os.FileInfo
	var ino uint64
	err := utils.WithTimeout(context.TODO(), func(context.Context) error {
		var err error
		fi, err = os.Stat(mp)
		return err
	}, time.Second*3)
	if !strings.Contains(mp, ":") && err != nil {
		err2 := utils.WithTimeout(context.TODO(), func(context.Context) error {
			return os.MkdirAll(mp, 0777)
		}, time.Second*3)
		if err2 != nil {
			if os.IsExist(err2) || strings.Contains(err2.Error(), "timeout after 3s") {
				// a broken mount point, umount it
				logger.Infof("mountpoint %s is broken: %s, umount it", mp, err)
				_ = doUmount(mp, true)
			} else {
				logger.Fatalf("create %s: %s", mp, err2)
			}
		}
	} else if err == nil {
		ino, _ = utils.GetFileInode(mp)
		if ino <= uint64(meta.RootInode) && fi.Size() == 0 {
			// a broken mount point, umount it
			logger.Infof("mountpoint %s is broken (ino=%d, size=%d), umount it", mp, ino, fi.Size())
			_ = doUmount(mp, true)
		}
	}

	if os.Getuid() == 0 {
		return
	}
	if ino == uint64(meta.RootInode) {
		return
	}
	switch runtime.GOOS {
	case "darwin":
		if fi, err := os.Stat(mp); err == nil {
			if st, ok := fi.Sys().(*syscall.Stat_t); ok {
				if st.Uid != uint32(os.Getuid()) {
					logger.Fatalf("current user should own %s", mp)
				}
			}
		}
	case "linux":
		f, err := os.CreateTemp(mp, ".test")
		if err != nil && (os.IsPermission(err) || errors.Is(err, syscall.EPERM) || errors.Is(err, syscall.EROFS)) {
			logger.Fatalf("Do not have write permission on %s", mp)
		} else if f != nil {
			_ = f.Close()
			_ = os.Remove(f.Name())
		}
	}
}

func genFuseOptExt(c *cli.Context, format *meta.Format) (fuseOpt string, mt int, noxattr, noacl bool, maxWrite int) {
	enableXattr := c.Bool("enable-xattr")
	if format.EnableACL {
		enableXattr = true
	}
	return genFuseOpt(c, format.Name), 1, !enableXattr, !format.EnableACL, int(utils.ParseBytes(c, "max-fuse-io", 'B'))
}

func shutdownGraceful(mp string) {
	_, conf, err := loadConfig(mp)
	if err != nil {
		logger.Warnf("load config from %s: %s", mp, err)
		return
	}
	fuseFd, fuseSetting = getFuseFd(conf.CommPath)
	for i := 0; i < 100 && fuseFd == 0; i++ {
		time.Sleep(time.Millisecond * 100)
		fuseFd, fuseSetting = getFuseFd(conf.CommPath)
	}
	if fuseFd == 0 {
		logger.Warnf("fail to recv FUSE fd from %s", conf.CommPath)
		return
	}
	for i := 0; i < 600; i++ {
		if err := syscall.Kill(conf.Pid, syscall.SIGHUP); err != nil {
			os.Setenv("_FUSE_STATE_PATH", conf.StatePath)
			os.Setenv("_JFS_META_SID", strconv.Itoa(int(conf.Meta.Sid)))
			return
		}
		time.Sleep(time.Millisecond * 100)
	}
	logger.Infof("mount point %s is busy, stop upgrade, mount on top of it", mp)
	err = sendFuseFd(conf.CommPath, fuseSetting, fuseFd)
	if err != nil {
		logger.Warnf("send FUSE fd: %s", err)
	}
	_ = syscall.Close(fuseFd)
	fuseFd = 0
	fuseSetting = []byte("FUSE")
}

func canShutdownGracefully(mp string, newConf *vfs.Config) bool {
	if csiCommPath != "" {
		return false
	}
	var ino uint64
	var err error
	err = utils.WithTimeout(context.TODO(), func(context.Context) error {
		ino, err = utils.GetFileInode(mp)
		return err
	}, time.Second*3)
	if err != nil {
		logger.Warnf("get inode of %s: %s", mp, err)
		_ = doUmount(mp, true)
		return false
	} else if ino != 1 {
		return false
	}
	_, oldConf, err := loadConfig(mp)
	if err != nil {
		if !os.IsNotExist(err) {
			logger.Warnf("load config: %s", err)
		}
		return false
	}
	if oldConf.Pid == 0 || oldConf.CommPath == "" {
		logger.Infof("mount point %s is not ready for upgrade, mount on top of it", mp)
		return false
	}
	if oldConf.Format.Name != newConf.Format.Name {
		logger.Infof("different volume %s != %s, mount on top of it", oldConf.Format.Name, newConf.Format.Name)
		return false
	}
	oldVersion := version.Parse(oldConf.Version)
	if ret, _ := version.CompareVersions(oldVersion, version.Parse("1.2.0")); ret <= 0 {
		oldConf.FuseOpts.MaxWrite = 128 * 1024
	}
	if oldConf.FuseOpts != nil && !reflect.DeepEqual(oldConf.FuseOpts.StripOptions(), newConf.FuseOpts.StripOptions()) {
		logger.Infof("different options, mount on top of it: %v != %v", oldConf.FuseOpts.StripOptions(), newConf.FuseOpts.StripOptions())
		return false
	}
	if oldConf.FuseOpts.DisableXAttrs && !newConf.FuseOpts.DisableXAttrs {
		logger.Infof("Xattr is enabled, mount on top of it")
		return false
	}
	return true
}

func absPath(d string) string {
	if strings.HasPrefix(d, "/") {
		return d
	}
	if strings.HasPrefix(d, "~/") {
		if h, err := os.UserHomeDir(); err == nil {
			return filepath.Join(h, d[1:])
		} else {
			logger.Fatalf("Expand user home dir of %s: %s", d, err)
		}
	}
	d, err := filepath.Abs(d)
	if err != nil {
		logger.Fatalf("Expand %s: %s", d, err)
	}
	return d
}

func buildBoolFlagsMap(c *cli.Context) map[string]bool {
	boolFlags := make(map[string]bool)
	addBoolFlags := func(flags []cli.Flag) {
		for _, flag := range flags {
			if _, ok := flag.(*cli.BoolFlag); ok {
				for _, name := range flag.Names() {
					boolFlags[name] = true
				}
			}
		}
	}
	if c.App != nil {
		addBoolFlags(c.App.Flags)
	}
	if c.Command != nil {
		addBoolFlags(c.Command.Flags)
	}
	return boolFlags
}

func tellFstabOptions(c *cli.Context) string {
	opts := []string{"_netdev,nofail"}
	boolFlags := buildBoolFlagsMap(c)
	for _, s := range os.Args[2:] {
		if !strings.HasPrefix(s, "-") {
			continue
		}
		s = strings.TrimLeft(s, "-")
		s = strings.Split(s, "=")[0]
		if !c.IsSet(s) || s == "update-fstab" || s == "background" || s == "d" {
			continue
		}
		if s == "o" {
			opts = append(opts, c.String(s))
		} else if boolFlags[s] && c.Bool(s) {
			opts = append(opts, s)
		} else if s == "cache-dir" {
			var dirString string
			if c.String(s) == "memory" {
				dirString = "memory"
			} else {
				dirs := utils.SplitDir(c.String(s))
				dirString = strings.Join(relPathToAbs(dirs), string(os.PathListSeparator))
			}
			opts = append(opts, fmt.Sprintf("%s=%s", s, dirString))
		} else {
			opts = append(opts, fmt.Sprintf("%s=%s", s, c.Generic(s)))
		}
	}
	sort.Strings(opts)
	return strings.Join(opts, ",")
}

func updateFstab(c *cli.Context) error {
	addr := expandPathForEmbedded(c.Args().Get(0))
	mp := absPath(c.Args().Get(1))
	var fstab = "/etc/fstab"

	f, err := os.Open(fstab)
	if err != nil {
		return err
	}
	defer f.Close()
	entryIndex := -1
	var lines []string
	scanner := bufio.NewScanner(f)
	for scanner.Scan() {
		line := scanner.Text()
		fields := strings.Fields(line)
		if len(fields) >= 6 && fields[2] == "juicefs" && fields[0] == addr && fields[1] == mp {
			entryIndex = len(lines)
		}
		lines = append(lines, line)
	}
	if err = scanner.Err(); err != nil {
		return err
	}
	opts := tellFstabOptions(c)
	entry := fmt.Sprintf("%s  %s  juicefs  %s  0 0", addr, mp, opts)
	if entryIndex >= 0 {
		if entry == lines[entryIndex] {
			return nil
		}
		lines[entryIndex] = entry
	} else {
		lines = append(lines, entry)
	}
	tempFstab := fstab + ".tmp"
	tmpf, err := os.OpenFile(tempFstab, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
	if err != nil {
		return err
	}
	defer tmpf.Close()
	if _, err := tmpf.WriteString(strings.Join(lines, "\n") + "\n"); err != nil {
		_ = os.Remove(tempFstab)
		return err
	}
	return os.Rename(tempFstab, fstab)
}

func tryToInstallMountExec() error {
	if _, err := os.Stat("/sbin/mount.juicefs"); err == nil {
		return nil
	}
	src, err := os.Executable()
	if err != nil {
		return err
	}
	return os.Symlink(src, "/sbin/mount.juicefs")
}

func fixCacheDirs(c *cli.Context) {
	cd := c.String("cache-dir")
	if cd == "memory" || strings.HasPrefix(cd, "/") {
		return
	}
	ds := utils.SplitDir(cd)
	for i, d := range ds {
		ds[i] = absPath(d)
	}
	for i, a := range os.Args {
		if i > 0 && os.Args[i-1] == "--cache-dir" && a == cd || a == "--cache-dir="+cd {
			os.Args[i] = a[:len(a)-len(cd)] + strings.Join(ds, string(os.PathListSeparator))
		}
	}
}

func makeDaemon(c *cli.Context, conf *vfs.Config) error {
	var attrs godaemon.DaemonAttr
	logfile := c.String("log")
	mp := conf.Meta.MountPoint
	attrs.OnExit = func(stage int) error {
		if stage == 0 {
			checkMountpoint(conf.Format.Name, mp, logfile, true)
		}
		return nil
	}

	// the current dir will be changed to root in daemon,
	// so the mount point has to be an absolute path.
	if godaemon.Stage() == 0 {
		mp := c.Args().Get(1)
		amp, err := filepath.Abs(mp)
		if err == nil && amp != mp {
			for i := len(os.Args) - 1; i > 2; i-- {
				if os.Args[i] == mp {
					// FIXME: it could be other options
					os.Args[i] = amp
					break
				}
			}
		}
		fixCacheDirs(c)

		_ = os.MkdirAll(filepath.Dir(logfile), 0755)
		attrs.Stdout, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
		if err != nil {
			logger.Errorf("open log file %s: %s", logfile, err)
		}
	}
	_, _, err := godaemon.MakeDaemon(&attrs)
	return err
}

func increaseRlimit() {
	var n uint64 = 100000
	err := syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{Max: n, Cur: n})
	for err != nil && n > 1024 {
		n = n * 2 / 3
		err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &syscall.Rlimit{Max: n, Cur: n})
	}
	if err != nil {
		logger.Warnf("setrlimit to %d: %s", n, err)
	}
}

func installHandler(m meta.Meta, mp string, v *vfs.VFS, blob object.ObjectStorage) {
	// Go will catch all the signals
	signal.Ignore(syscall.SIGPIPE)
	signalChan := make(chan os.Signal, 10)
	signal.Notify(signalChan, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP)
	go func() {
		for {
			sig := <-signalChan
			logger.Infof("Received signal %s, exiting...", sig.String())
			if sig == syscall.SIGHUP {
				path := fmt.Sprintf("/tmp/state%d.json", os.Getppid())
				if err := v.FlushAll(""); err == nil {
					fuse.Shutdown()
					err = v.FlushAll(path)
					if err != nil {
						logger.Fatalf("flush buffered data failed: %s", err)
					}
					m.FlushSession()
					object.Shutdown(blob)
					logger.Warnf("exit with code 1")
					os.Exit(1)
				} else {
					logger.Warnf("flush buffered data failed: %s, don't restart", err)
					continue
				}
			}
			go func() {
				time.Sleep(time.Second * 30)
				if err := v.FlushAll(""); err != nil {
					logger.Errorf("flush all: %s", err)
				}
				logger.Errorf("exit after receiving signal %s, but umount does not finish in 30 seconds, force exit", sig)
				os.Exit(meta.UmountCode)
			}()
			go func() { _ = doUmount(mp, true) }()
		}
	}()
}
func launchMount(c *cli.Context, mp string, conf *vfs.Config) error {
	increaseRlimit()
	utils.AdjustOOMKiller(-1000)
	utils.SetIOFlusher()

	if c.Bool("disable-transparent-hugepage") {
		utils.DisableTHP()
	}

	if canShutdownGracefully(mp, conf) {
		shutdownGraceful(mp)
	}
	os.Setenv("_FUSE_FD_COMM", serverAddress)
	serveFuseFD(serverAddress)
	defer os.Remove(serverAddress)

	path, err := os.Executable()
	if err != nil {
		return fmt.Errorf("find executable: %s", err)
	}
	start := time.Now()
	for attempt := 0; ; attempt++ {
		if attempt == 3 && time.Since(start) < time.Second*10 {
			return fmt.Errorf("fail 3 times in %s, give up", time.Since(start))
		}
		// For volcengine VKE serverless container, no umount before mount when
		// `JFS_NO_UMOUNT` environment provided
		noUmount := os.Getenv("JFS_NO_UMOUNT")
		if fuseFd == 0 && (attempt > 0 || noUmount == "0") {
			_ = doUmount(mp, true)
		}
		if runtime.GOOS == "linux" {
			if !utils.Exists(serverAddress) {
				serveFuseFD(serverAddress)
			}
		}

		mountPid = 0
		cmd := exec.Command(path, os.Args[1:]...)
		cmd.Stdin = os.Stdin
		cmd.Stdout = os.Stdout
		cmd.Stderr = os.Stderr
		err = cmd.Start()
		if err != nil {
			logger.Errorf("start process %s: %s", path, err)
			time.Sleep(time.Second)
			continue
		}
		os.Unsetenv("_FUSE_STATE_PATH")
		mountPid = cmd.Process.Pid

		notInCSI := os.Getenv("JFS_SUPER_COMM") == ""
		signalChan := make(chan os.Signal, 10)
		if notInCSI {
			signal.Notify(signalChan, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP)
			go func() {
				for {
					sig := <-signalChan
					if sig == nil {
						return
					}
					logger.Infof("received signal %s, propagating to child process %d...", sig.String(), mountPid)
					if err := cmd.Process.Signal(sig); err != nil && !errors.Is(err, os.ErrProcessDone) {
						logger.Errorf("send signal %s to %d: %s", sig.String(), mountPid, err)
					}
				}
			}()
		}

		ctx, cancel := context.WithCancel(context.TODO())
		go watchdog(ctx, mp)
		err = cmd.Wait()
		cancel()
		if notInCSI {
			signal.Stop(signalChan)
		}
		close(signalChan)
		if err == nil {
			return nil
		} else {
			var exitError *exec.ExitError
			if ok := errors.As(err, &exitError); ok {
				if waitStatus, ok := exitError.Sys().(syscall.WaitStatus); ok && waitStatus.ExitStatus() == meta.UmountCode {
					logger.Errorf("received umount exit code")
					_ = doUmount(mp, true)
					return nil
				}
			}
			if fuseFd < 0 {
				logger.Info("transfer FUSE session to others")
				return nil
			}
			logger.Errorf("mount process %d: %s, will restart in 1 second", mountPid, err)
			time.Sleep(time.Second)
		}
	}
}

func getNobodyUIDGID() (uint32, uint32) {
	var uid, gid uint32 = 65534, 65534
	if u, err := user.Lookup("nobody"); err == nil {
		nobody, err := strconv.ParseUint(u.Uid, 10, 32)
		if err != nil {
			logger.Fatalf("invalid uid: %s", u.Uid)
		}
		uid = uint32(nobody)
	}
	if g, err := user.LookupGroup("nogroup"); err == nil {
		nogroup, err := strconv.ParseUint(g.Gid, 10, 32)
		if err != nil {
			logger.Fatalf("invalid gid: %s", g.Gid)
		}
		gid = uint32(nogroup)
	}
	return uid, gid
}

func parseUIDGID(input string, defaultUid uint32, defaultGid uint32) (uint32, uint32) {
	ss := strings.SplitN(strings.TrimSpace(input), ":", 2)
	uid, gid := defaultUid, defaultGid
	if ss[0] != "" {
		u, err := strconv.ParseUint(ss[0], 10, 32)
		if err != nil {
			logger.Fatalf("invalid uid: %s", ss[0])
		}
		uid = uint32(u)
		if uid == 0 {
			logger.Warnf("Can't map uid as 0, use %d instead", defaultUid)
			uid = defaultUid
		}
	}
	if len(ss) == 2 && ss[1] != "" {
		g, err := strconv.ParseUint(ss[1], 10, 32)
		if err != nil {
			logger.Fatalf("invalid gid: %s", ss[1])
		}
		gid = uint32(g)
		if gid == 0 {
			logger.Warnf("Can't map gid as 0, use %d instead", defaultGid)
			gid = defaultGid
		}
	}
	return uid, gid
}

func mountMain(v *vfs.VFS, c *cli.Context) {
	if os.Getuid() == 0 {
		disableUpdatedb()
	}
	conf := v.Conf
	conf.AttrTimeout = utils.Duration(c.String("attr-cache"))
	conf.EntryTimeout = utils.Duration(c.String("entry-cache"))
	conf.DirEntryTimeout = utils.Duration(c.String("dir-entry-cache"))
	conf.NegEntryTimeout = utils.Duration(c.String("negative-entry-cache"))
	conf.ReaddirCache = c.Bool("readdir-cache")
	major, minor := utils.GetKernelVersion()
	if conf.ReaddirCache {
		if conf.AttrTimeout == 0 {
			logger.Warnf("readdir-cache is enabled without attr-cache, it's performance may be affected")
		}
		if major < 4 || (major == 4 && minor < 20) {
			logger.Warnf("readdir-cache requires kernel version 4.20 or higher, current version: %d.%d", major, minor)
		}
		if conf.Meta.SkipDirMtime > 0 {
			logger.Warnf("When both readdir-cache and skip-dir-mtime are enabled, ignoring mtime may disable readdir refreshes on other nodes")
		}
	}
	if conf.NegEntryTimeout > 0 && (major < 5 || (major == 5 && minor < 11)) {
		logger.Warnf("On kernel versions below 5.11 (current: %d.%d), negative-entry-cache may cause concurrent check-then-create operations (e.g. mkdir -p) to fail in a distributed environment", major, minor)
	}
	conf.NonDefaultPermission = c.Bool("non-default-permission")
	rootSquash := c.String("root-squash")
	allSquash := c.String("all-squash")
	if allSquash != "" || rootSquash != "" {
		nobodyUid, nobodyGid := getNobodyUIDGID()
		// all-squash takes precedence over root-squash
		if allSquash != "" {
			conf.NonDefaultPermission = true // disable kernel permission check
			uid, gid := parseUIDGID(allSquash, nobodyUid, nobodyGid)
			conf.AllSquash = &vfs.AnonymousAccount{Uid: uid, Gid: gid}
			logger.Infof("Map all uid/gid to %d/%d by setting all-squash", uid, gid)
		} else { // rootSquash != ""
			uid, gid := parseUIDGID(rootSquash, nobodyUid, nobodyGid)
			conf.RootSquash = &vfs.AnonymousAccount{Uid: uid, Gid: gid}
			logger.Infof("Map root uid/gid 0 to %d/%d by setting root-squash", uid, gid)
		}
	}
	logger.Infof("Mounting volume %s at %s ...", conf.Format.Name, conf.Meta.MountPoint)
	err := fuse.Serve(v, c.String("o"), c.Bool("enable-xattr"), c.Bool("enable-ioctl"))
	if err != nil {
		logger.Fatalf("fuse: %s", err)
	}
}


================================================
FILE: cmd/mount_windows.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"os"
	"path/filepath"
	"runtime"
	"strconv"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/juicedata/juicefs/pkg/winfsp"
	"github.com/urfave/cli/v2"
)

func mountFlags() []cli.Flag {
	return []cli.Flag{
		&cli.StringFlag{
			Name:  "o",
			Usage: "other FUSE options",
		},
		&cli.StringFlag{
			Name:  "log",
			Value: filepath.Join(getDefaultLogDir(), "juicefs.log"),
			Usage: "path of log file when running in background",
		},
		&cli.StringFlag{
			Name:    "fuse-access-log",
			Aliases: []string{"fuse-trace-log"},
			Usage:   "Fuse Layer access log file",
			Hidden:  true,
		},
		&cli.IntFlag{
			Name:   "fuse-access-log-rotate-count",
			Usage:  "Fuse Layer access log file rotate count",
			Value:  7,
			Hidden: true,
		},
		&cli.IntFlag{
			Name:   "readdir-batch-size",
			Usage:  "readdir batch size",
			Value:  1000,
			Hidden: true,
		},
		&cli.StringFlag{
			Name:  "alias",
			Usage: "volume alias, useful for mounting a volume multiple times on the same machine",
		},
		&cli.StringFlag{
			Name:   "winfsp-dbg-log",
			Hidden: true,
		},
		&cli.BoolFlag{
			Name:   "as-local-volume",
			Usage:  "If mount as a local volume, supports mounting to a path.",
			Hidden: true,
		},
		&cli.BoolFlag{
			Name:  "flush-on-cleanup",
			Usage: "When enabled, Will instruct the WinFsp to call Flush() when a file handle is closing (MJ_IRP_CLEANUP). Requires the dev branch of WinFsp or version that GREATER than 2.1.25156.",
			Value: true,
		},
		&cli.BoolFlag{
			Name:  "as-root",
			Usage: "Access files as administrator",
		},
		&cli.StringFlag{
			Name:  "delay-close",
			Usage: "delay file closing duration",
			Value: "0s",
		},
		&cli.BoolFlag{
			Name:    "d",
			Aliases: []string{"background"},
			Usage:   "run in background(Windows: as a system service. support ONLY 1 volume mounting at the same time)",
		},
		&cli.BoolFlag{
			Name:  "show-dot-files",
			Usage: "If set, dot files will not be treated as hidden files",
		},
		&cli.IntFlag{
			Name:  "winfsp-threads",
			Usage: "WinFsp threads count option, Default is min(cpu core * 2, 16)",
			Value: min(runtime.NumCPU()*2, 16),
		},
		&cli.BoolFlag{
			Name:   "case-sensitive",
			Usage:  "If set, the file system will be case sensitive",
			Hidden: true,
		},
		&cli.BoolFlag{
			Name:  "report-case",
			Usage: "If set, juicefs will report the correct case of a file path for a case-insensitive filesystem. (May incur a performance lost)",
		},
		&cli.BoolFlag{
			Name:  "admin-as-root",
			Usage: "If we treat the Windows build-in user 'Administrator' as the root user on Linux. Default true.",
			Value: true,
		},
		&cli.StringFlag{
			Name:  "create-perm",
			Usage: "When creating files or directories, this will overwrite the permission parameters if set. example: 0755. Default is empty.",
			Value: "",
			Action: func(c *cli.Context, v string) error {
				if v != "" {
					if p, err := strconv.ParseUint(v, 8, 32); err != nil || p > 0o777 {
						return cli.Exit("create-perm must be a valid octal number between 0000 and 0777", 1)
					}
				}
				return nil
			},
		},
	}
}

func makeDaemon(c *cli.Context, conf *vfs.Config) error {
	logPath := c.String("log")
	if logPath != "" {
		if !filepath.IsAbs(logPath) {
			return cli.Exit("log path must be an absolute path", 1)
		}
		if err := os.MkdirAll(filepath.Dir(logPath), 0755); err != nil {
			return cli.Exit(err, 1)
		}
	}

	defaultCacheDir := getDefaultCacheDir()

	return winfsp.RunAsSystemService(conf.Format.Name, c.Args().Get(1), logPath, defaultCacheDir, c)
}

func makeDaemonForSvc(c *cli.Context, m meta.Meta, metaUrl, listenAddr string) error {
	logger.Warnf("Cannot run in background in Windows.")
	return nil
}

func getDaemonStage() int {
	return 0
}

func mountMain(v *vfs.VFS, c *cli.Context) {
	v.Conf.AccessLog = c.String("access-log")
	v.Conf.AttrTimeout = utils.Duration(c.String("attr-cache"))
	v.Conf.EntryTimeout = utils.Duration(c.String("entry-cache"))
	v.Conf.DirEntryTimeout = utils.Duration(c.String("dir-entry-cache"))
	v.Conf.Mountpoint = c.Args().Get(1)

	delayCloseTime := utils.Duration(c.String("delay-close"))

	err := winfsp.Serve(v, c.String("o"),
		c.Bool("as-root"), int(delayCloseTime.Seconds()), c.Bool("show-dot-files"),
		c.Int("winfsp-threads"), c.Bool("case-sensitive"), c.Bool("report-case"), c)

	if err != nil {
		logger.Errorf("Failed to mount volume %s: %s", v.Conf.Format.Name, err)
	}
}

func checkMountpoint(name, mp, logPath string, background bool) {}

func prepareMp(mp string) {}

func setFuseOption(c *cli.Context, format *meta.Format, vfsConf *vfs.Config) {}

func launchMount(c *cli.Context, mp string, conf *vfs.Config) error { return nil }

func installHandler(m meta.Meta, mp string, v *vfs.VFS, blob object.ObjectStorage) {}

func tryToInstallMountExec() error { return nil }

func updateFstab(c *cli.Context) error { return nil }


================================================
FILE: cmd/objbench.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"math"
	"os"
	"os/user"
	"path/filepath"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/object"
	osync "github.com/juicedata/juicefs/pkg/sync"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
	"golang.org/x/sync/errgroup"
)

func cmdObjbench() *cli.Command {
	return &cli.Command{
		Name:      "objbench",
		Action:    objbench,
		Category:  "TOOL",
		Usage:     "Run benchmarks on an object storage",
		ArgsUsage: "ENDPOINT",
		Description: `
Run basic benchmarks on the target object storage to test if it works as expected.

Examples:
# Run benchmarks on S3
$ ACCESS_KEY=myAccessKey SECRET_KEY=mySecretKey juicefs objbench --storage s3  https://mybucket.s3.us-east-2.amazonaws.com -p 6
# Run benchmakks on JuiceFS
$ juicefs objbench --storage jfs redis://localhost/1

Details: https://juicefs.com/docs/community/performance_evaluation_guide#juicefs-objbench`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "storage",
				Value: "file",
				Usage: "object storage type (e.g. s3, gs, oss, cos)",
			},
			&cli.StringFlag{
				Name:  "access-key",
				Usage: "access key for object storage (env ACCESS_KEY)",
			},
			&cli.StringFlag{
				Name:  "secret-key",
				Usage: "secret key for object storage (env SECRET_KEY)",
			},
			&cli.StringFlag{
				Name:  "session-token",
				Usage: "session token for object storage",
			},
			&cli.IntFlag{
				Name:  "shards",
				Usage: "store the blocks into N buckets by hash of key",
			},
			&cli.StringFlag{
				Name:  "block-size",
				Value: "4M",
				Usage: "size of each IO block in KiB",
			},
			&cli.StringFlag{
				Name:  "big-object-size",
				Value: "1G",
				Usage: "size of each big object in MiB",
			},
			&cli.StringFlag{
				Name:  "small-object-size",
				Value: "128K",
				Usage: "size of each small object in KiB",
			},
			&cli.UintFlag{
				Name:  "small-objects",
				Value: 100,
				Usage: "number of small object",
			},
			&cli.BoolFlag{
				Name:  "skip-functional-tests",
				Usage: "skip functional tests",
			},
			&cli.UintFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   4,
				Usage:   "number of concurrent threads",
			},
			&cli.StringFlag{
				Name:    "storage-class",
				Aliases: []string{"sc"},
				Usage:   "storage class for object storage, e.g. Standard, IA",
			},
		},
	}
}

var (
	nspt    = "not support"
	pass    = "pass"
	skipped = "skipped"
	failed  = "failed"
)

type warning error

var groupName string
var listCount, bCount, sCount int

func objbench(ctx *cli.Context) error {
	setup(ctx, 1)
	for _, name := range []string{"small-objects", "threads"} {
		if ctx.Uint(name) == 0 {
			logger.Fatalf("%s should not be set to zero", name)
		}
	}
	bSize := int(utils.ParseBytes(ctx, "block-size", 'K'))
	fsize := int(utils.ParseBytes(ctx, "big-object-size", 'M'))
	smallBSize := int(utils.ParseBytes(ctx, "small-object-size", 'K'))
	if bSize == 0 || fsize == 0 || smallBSize == 0 {
		logger.Fatalf("block-size, big-object-size and small-object-size should not be zero")
	}
	ak, sk, token := ctx.String("access-key"), ctx.String("secret-key"), ctx.String("session-token")
	if ak == "" {
		ak = os.Getenv("ACCESS_KEY")
	}
	if sk == "" {
		sk = os.Getenv("SECRET_KEY")
	}
	if token == "" {
		token = os.Getenv("SESSION_TOKEN")
	}
	endpoint := ctx.Args().First()
	storageType := strings.ToLower(ctx.String("storage"))
	if storageType == "file" {
		if strings.Contains(endpoint, "://") {
			warn("The bucket \"%s\" doesn't look like a file path.", endpoint)
			warn("Did you forget to specify the `--storage <type>`?")
			if !userConfirmed() {
				return errors.New("Aborted")
			}
		}
		var err error
		if endpoint, err = filepath.Abs(endpoint); err != nil {
			logger.Fatalf("invalid path: %s", err)
		}
	}
	var blobOrigin object.ObjectStorage
	var err error
	shards := ctx.Int("shards")
	if shards > 1 {
		blobOrigin, err = object.NewSharded(storageType, endpoint, ak, sk, token, shards)
	} else {
		blobOrigin, err = object.CreateStorage(storageType, endpoint, ak, sk, token)
	}
	if err != nil {
		logger.Fatalf("create storage failed: %v", err)
	}

	prefix := fmt.Sprintf("__juicefs_benchmark_%d__/", time.Now().UnixNano())
	blob := object.WithPrefix(blobOrigin, prefix)
	storageClass := ctx.String("storage-class")
	if os, ok := blob.(object.SupportStorageClass); ok && storageClass != "" {
		if err := os.SetStorageClass(storageClass); err != nil {
			logger.Fatalf("set storageClass %s failed: %v", storageClass, err)
		}
	}
	defer func() {
		_ = blobOrigin.Delete(ctx.Context, prefix)
	}()
	bCount = int(math.Ceil(float64(fsize) / float64(bSize)))
	sCount = int(ctx.Uint("small-objects"))
	listCount = sCount + bCount
	if listCount > 1000 {
		listCount = 1000
	}
	threads := int(ctx.Uint("threads"))
	if threads > bCount || threads > sCount {
		threads = bCount
		if threads > sCount {
			threads = sCount
		}
		logger.Warnf("The number of threads was set too large and has been reduced to %d", threads)
	}
	colorful := utils.SupportANSIColor(os.Stdout.Fd())
	progress := utils.NewProgress(false)
	if colorful {
		nspt = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, YELLOW, nspt, RESET_SEQ)
		skipped = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, YELLOW, skipped, RESET_SEQ)
		pass = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, GREEN, pass, RESET_SEQ)
		failed = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, RED, failed, RESET_SEQ)
	}
	if runtime.GOOS != "windows" {
		nobody, err := user.Lookup("nobody")
		if err != nil {
			logger.Fatalf("lookup nobody user failed: %v", err)
		} else {
			group, err := user.LookupGroupId(nobody.Gid)
			if err != nil {
				logger.Fatalf("lookup nobody's group failed: %v", err)
			}
			groupName = group.Name
		}
	}
	if ctx.Bool("skip-functional-tests") {
		if err := blob.Create(ctx.Context); err != nil {
			return fmt.Errorf("can't create bucket: %s", err)
		}
	} else {
		var result [][]string
		result = append(result, []string{"CATEGORY", "TEST", "RESULT"})
		fmt.Println("Start Functional Testing ...")
		functionalTesting(ctx.Context, blob, &result, colorful)
		printResult(result, -1, colorful)
		fmt.Println()
	}
	fmt.Println("Start Performance Testing ...")
	var pResult [][]string
	pResult = append(pResult, []string{"ITEM", "VALUE", "COST"})

	apis := []apiInfo{
		{
			name:  "smallput",
			count: sCount,
			title: "put small objects",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("smallput", float64(sCount)/cost, float64(threads)*cost*1000/float64(sCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:  "smallget",
			count: sCount,
			title: "get small objects",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("smallget", float64(sCount)/cost, float64(threads)*cost*1000/float64(sCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:     "put",
			count:    bCount,
			title:    "upload objects",
			startKey: sCount,
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("put", float64(bSize)/1024/1024*float64(bCount)/cost, float64(threads)*cost*1000/float64(bCount), 2, colorful)
					line[1] += " MiB/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:     "get",
			count:    bCount,
			title:    "download objects",
			startKey: sCount,
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("get", float64(bSize)/1024/1024*float64(bCount)/cost, float64(threads)*cost*1000/float64(bCount), 2, colorful)
					line[1] += " MiB/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:  "list",
			title: "list objects",
			count: threads,
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("list", float64(listCount)*float64(threads)/cost, cost*1000, 2, colorful)
					line[1] += " objects/s"
					line[2] += fmt.Sprintf(" ms/ %d objects", listCount)
				}
				return line
			},
		}, {
			name:  "head",
			count: sCount + bCount,
			title: "head objects",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("head", float64(sCount+bCount)/cost, float64(threads)*cost*1000/float64(sCount+bCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:  "chtimes",
			count: sCount + bCount,
			title: "update mtime",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("chtimes", float64(sCount+bCount)/cost, float64(threads)*cost*1000/float64(sCount+bCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:  "chmod",
			count: sCount + bCount,
			title: "change permissions",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("chmod", float64(sCount+bCount)/cost, float64(threads)*cost*1000/float64(sCount+bCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:  "chown",
			count: sCount + bCount,
			title: "change owner/group",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("chown", float64(sCount+bCount)/cost, float64(threads)*cost*1000/float64(sCount+bCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		}, {
			name:  "delete",
			count: sCount + bCount,
			title: "delete objects",
			getResult: func(cost float64) []string {
				line := []string{"", nspt, nspt}
				if cost > 0 {
					line[1], line[2] = colorize("delete", float64(sCount+bCount)/cost, float64(threads)*cost*1000/float64(sCount+bCount), 2, colorful)
					line[1] += " objects/s"
					line[2] += " ms/object"
				}
				return line
			},
		},
	}

	bm := &benchMarkObj{
		blob:        blob,
		progressBar: progress,
		threads:     threads,
		seed:        make([]byte, bSize),
		smallSeed:   make([]byte, smallBSize),
		buffPool: &sync.Pool{New: func() interface{} {
			buff := make([]byte, bSize)
			return &buff
		}},
		smallBuffPool: &sync.Pool{New: func() interface{} {
			buff := make([]byte, smallBSize)
			return &buff
		}},
	}
	utils.RandRead(bm.seed)
	utils.RandRead(bm.smallSeed)

	for _, api := range apis {
		pResult = append(pResult, bm.run(ctx.Context, api))
	}
	progress.Done()

	fmt.Printf("Benchmark finished! block-size: %s, big-object-size: %s, small-object-size: %s, small-objects: %d, NumThreads: %d\n",
		humanize.IBytes(uint64(bSize)), humanize.IBytes(uint64(fsize)), humanize.IBytes(uint64(smallBSize)), sCount, threads)

	// adjust the print order
	pResult[1], pResult[3] = pResult[3], pResult[1]
	pResult[2], pResult[4] = pResult[4], pResult[2]
	pResult[7], pResult[10] = pResult[10], pResult[7]
	printResult(pResult, -1, colorful)
	return nil
}

var resultRangeForObj = map[string][4]float64{
	"put":          {100, 150, 50, 150},
	"get":          {100, 150, 50, 150},
	"smallput":     {10, 30, 30, 100},
	"smallget":     {10, 30, 30, 100},
	"multi-upload": {100, 150, 20, 50},
	"list":         {1000, 10000, 100, 200},
	"head":         {10, 30, 30, 100},
	"delete":       {10, 30, 30, 100},
	"chmod":        {10, 30, 30, 100},
	"chown":        {10, 30, 30, 100},
	"chtimes":      {10, 30, 30, 100},
}

func colorize(item string, value, cost float64, prec int, colorful bool) (string, string) {
	svalue := strconv.FormatFloat(value, 'f', prec, 64)
	var fmtMode byte = 'f'
	if cost < 0.01 {
		// For 'g' and 'G' it is the maximum number of significant digits
		fmtMode = 'g'
	}
	scost := strconv.FormatFloat(cost, byte(fmtMode), 2, 64)
	if colorful {
		r, ok := resultRangeForObj[item]
		if !ok {
			logger.Fatalf("Invalid item: %s", item)
		}
		var color int
		if value > r[1] { // max
			color = GREEN
		} else if value > r[0] { // min
			color = YELLOW
		} else {
			color = RED
		}
		svalue = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, svalue, RESET_SEQ)
		if cost < r[2] { // min
			color = GREEN
		} else if cost < r[3] { // max
			color = YELLOW
		} else {
			color = RED
		}
		scost = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, scost, RESET_SEQ)
	}
	return svalue, scost
}

type apiInfo struct {
	name      string
	title     string
	count     int
	startKey  int
	getResult func(cost float64) []string
}

type benchMarkObj struct {
	progressBar             *utils.Progress
	blob                    object.ObjectStorage
	threads                 int
	seed, smallSeed         []byte
	buffPool, smallBuffPool *sync.Pool
}

func (bm *benchMarkObj) run(ctx context.Context, api apiInfo) []string {
	if api.name == "chown" || api.name == "chmod" || api.name == "chtimes" {
		if err := bm.chmod(ctx, "not_exists", 0); err == utils.ENOTSUP {
			line := api.getResult(-1)
			line[0] = api.title
			return line
		}
		if api.name == "chown" && (strings.HasPrefix(bm.blob.String(), "file://") || strings.HasPrefix(bm.blob.String(), "jfs://")) && os.Getuid() != 0 {
			logger.Warnf("chown test should be run by root")
			return []string{api.title, skipped, skipped}
		}
	}
	var fn func(ctx context.Context, key string, startKey int) error
	switch api.name {
	case "put":
		fn = bm.put
	case "get":
		fn = bm.get
	case "smallput":
		fn = bm.smallPut
	case "smallget":
		fn = bm.smallGet
	case "delete":
		fn = bm.delete
	case "head":
		fn = bm.head
	case "list":
		fn = bm.list
	case "chown":
		fn = bm.chown
	case "chmod":
		fn = bm.chmod
	case "chtimes":
		fn = bm.chtimes
	}

	var wg sync.WaitGroup
	pool := make(chan struct{}, bm.threads)
	count := api.count
	var bar *utils.Bar
	if api.name == "list" {
		bar = bm.progressBar.AddCountBar(api.title, int64(listCount)*int64(count))
	} else {
		bar = bm.progressBar.AddCountBar(api.title, int64(count))
	}
	var err error
	start := time.Now()
	for i := api.startKey; i < api.startKey+count; i++ {
		pool <- struct{}{}
		wg.Add(1)
		go func(key int) {
			defer func() {
				<-pool
				wg.Done()
			}()
			if e := fn(ctx, strconv.Itoa(key), api.startKey); e != nil {
				err = e
			}
			if api.name == "list" {
				bar.IncrInt64(int64(listCount))
			} else {
				bar.Increment()
			}
		}(i)
	}
	wg.Wait()
	bar.Done()
	line := api.getResult(time.Since(start).Seconds())
	if err != nil {
		logger.Errorf("%s test failed: %s", api.name, err)
		return []string{api.title, failed, failed}
	}
	line[0] = api.title
	return line
}

func getMockData(seed []byte, idx int, result *[]byte) {
	size := len(seed)
	rSize := len(*result)
	if size == 0 || rSize == 0 {
		return
	}
	i := idx % size
	if size-i > rSize {
		copy(*result, seed[i:i+rSize])
	} else {
		copy((*result)[:size-i], seed[i:size])
		copy((*result)[size-i:rSize], seed[:rSize-(size-i)])
	}

}

func (bm *benchMarkObj) put(ctx context.Context, key string, startKey int) error {
	idx, _ := strconv.Atoi(key)
	if idx-startKey == 0 {
		return bm.blob.Put(ctx, key, bytes.NewReader(bm.seed))
	}
	buff := bm.buffPool.Get().(*[]byte)
	defer bm.buffPool.Put(buff)
	getMockData(bm.seed, idx-startKey, buff)
	return bm.blob.Put(ctx, key, bytes.NewReader(*buff))
}

func (bm *benchMarkObj) smallPut(ctx context.Context, key string, startKey int) error {
	idx, _ := strconv.Atoi(key)
	if idx == 0 {
		return bm.blob.Put(ctx, key, bytes.NewReader(bm.smallSeed))
	}

	buff := bm.smallBuffPool.Get().(*[]byte)
	defer bm.smallBuffPool.Put(buff)
	getMockData(bm.smallSeed, idx-startKey, buff)
	return bm.blob.Put(ctx, key, bytes.NewReader(*buff))
}

func getAndCheckN(ctx context.Context, blob object.ObjectStorage, key string, seed []byte, pool *sync.Pool, getOrgIdx func(idx int) int) error {
	idx, _ := strconv.Atoi(key)
	r, err := blob.Get(ctx, key, 0, -1)
	if err != nil {
		return err
	}
	defer r.Close()
	content := pool.Get().(*[]byte)
	defer pool.Put(content)

	var n int
	n, err = io.ReadFull(r, *content)
	if err != nil {
		return err
	}
	orgIdx := getOrgIdx(idx)
	checkN := 10
	l := len(seed)
	if l < checkN {
		checkN = l
	}

	// if orgIdx is 0, mockdata is the same as the seed
	var preNMockData []byte
	if orgIdx == 0 {
		preNMockData = seed[:checkN]
	} else {
		mockResult := pool.Get().(*[]byte)
		defer pool.Put(mockResult)
		preNMockData = (*mockResult)[:checkN]
		getMockData(seed, orgIdx, &preNMockData)
	}

	if n != len(seed) || !bytes.Equal((*content)[:checkN], preNMockData) {
		return fmt.Errorf("the downloaded content is incorrect")
	}
	return nil
}

func (bm *benchMarkObj) get(ctx context.Context, key string, startKey int) error {
	return getAndCheckN(ctx, bm.blob, key, bm.seed, bm.buffPool, func(idx int) int {
		return idx - startKey
	})
}

func (bm *benchMarkObj) smallGet(ctx context.Context, key string, startKey int) error {
	return getAndCheckN(ctx, bm.blob, key, bm.smallSeed, bm.smallBuffPool, func(idx int) int {
		return idx
	})
}

func (bm *benchMarkObj) delete(ctx context.Context, key string, startKey int) error {
	return bm.blob.Delete(ctx, key)
}

func (bm *benchMarkObj) head(ctx context.Context, key string, startKey int) error {
	_, err := bm.blob.Head(ctx, key)
	return err
}

func (bm *benchMarkObj) list(ctx context.Context, key string, startKey int) error {
	result, err := osync.ListAll(bm.blob, "", "0", "999", true)
	for range result {
	}
	return err
}

func (bm *benchMarkObj) chown(ctx context.Context, key string, startKey int) error {
	return bm.blob.(object.FileSystem).Chown(key, "nobody", groupName)
}

func (bm *benchMarkObj) chmod(ctx context.Context, key string, startKey int) error {
	return bm.blob.(object.FileSystem).Chmod(key, 0755)
}

func (bm *benchMarkObj) chtimes(ctx context.Context, key string, startKey int) error {
	return bm.blob.(object.FileSystem).Chtimes(key, time.Now())
}

func listAll(ctx context.Context, s object.ObjectStorage, prefix, marker string, limit int64) ([]object.Object, error) {
	ch, err := object.ListAll(ctx, s, prefix, marker, true, true)
	if err == nil {
		objs := make([]object.Object, 0)
		for obj := range ch {
			if len(objs) < int(limit) {
				objs = append(objs, obj)
			}
		}
		return objs, nil
	}
	return nil, err
}

var syncTests = map[string]bool{
	"special key":         true,
	"put a big object":    true,
	"put an empty object": true,
	"multipart upload":    true,
}

func functionalTesting(ctx context.Context, blob object.ObjectStorage, result *[][]string, colorful bool) {
	runCase := func(title string, fn func(blob object.ObjectStorage) error) {
		r := pass
		if err := fn(blob); err == utils.ENOTSUP {
			r = nspt
		} else if err != nil {
			color := RED
			if _, ok := err.(warning); ok {
				color = YELLOW
			}
			r = err.Error()
			if len(r) > 45 {
				r = r[:45] + "..."
			}
			if colorful {
				r = fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, r, RESET_SEQ)
			}
			logger.Debug(err.Error())
		}

		category := "basic"
		if syncTests[title] || strings.HasPrefix(title, "change") {
			category = "sync"
		}

		if colorful {
			title = fmt.Sprintf("%s%sm%s%s", COLOR_SEQ, DEFAULT, title, RESET_SEQ)
		}

		*result = append(*result, []string{category, title, r})
	}
	isFileSystem := true
	fi, ok := blob.(object.FileSystem)
	if ok {
		if err := fi.Chmod("not_exists_file", 0755); err == utils.ENOTSUP {
			isFileSystem = false
		}
	}

	get := func(s object.ObjectStorage, k string, off, limit int64) (string, error) {
		r, err := s.Get(ctx, k, off, limit)
		if err != nil {
			return "", err
		}
		defer r.Close()
		data, err := io.ReadAll(r)
		if err != nil {
			return "", err
		}
		return string(data), nil
	}
	key := "put_test_file"

	funFSCase := func(name string, fn func() error) {
		runCase(name, func(blob object.ObjectStorage) error {
			if !isFileSystem {
				return utils.ENOTSUP
			}
			br := []byte("hello")
			if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
				return fmt.Errorf("put object failed: %s", err)
			}
			defer blob.Delete(ctx, key) //nolint:errcheck
			return warning(fn())
		})
	}

	runCase("create a bucket", func(blob object.ObjectStorage) error {
		created := true
		if err := blob.Put(ctx, key, bytes.NewReader([]byte("1"))); err != nil {
			created = false
		}
		defer blob.Delete(ctx, key) //nolint:errcheck

		if !created {
			if err := blob.Create(ctx); err != nil {
				return fmt.Errorf("can't create bucket: %s", err)
			}
		}
		if err := blob.Create(ctx); err != nil {
			return fmt.Errorf("creating a bucket that already exists returns an error")
		}
		return nil
	})

	runCase("put an object", func(blob object.ObjectStorage) error {
		br := []byte("hello")
		if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
			return fmt.Errorf("put object failed: %s", err)
		}
		defer blob.Delete(ctx, key) //nolint:errcheck
		return nil
	})

	runCase("get an object", func(blob object.ObjectStorage) error {
		br := []byte("hello")
		if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
			return fmt.Errorf("put object failed: %s", err)
		}
		defer blob.Delete(ctx, key) //nolint:errcheck
		if d, e := get(blob, key, 0, -1); e != nil || d != string(br) {
			return fmt.Errorf(`failed to get an object: expect "hello", but got %v, error: %s`, d, e)
		}
		if d, e := get(blob, key, 0, 5); e != nil || d != string(br) {
			return fmt.Errorf(`failed to get an object: expect "hello", but got %v, error: %s`, d, e)
		}
		return nil
	})

	runCase("get non-exist", func(blob object.ObjectStorage) error {
		if _, err := blob.Get(ctx, "not_exists_file", 0, -1); err == nil {
			return fmt.Errorf("get not existed object should failed: %s", err)
		}
		return nil
	})

	runCase("get partial object", func(blob object.ObjectStorage) error {
		br := []byte("hello")
		if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
			return fmt.Errorf("put object failed: %s", err)
		}
		defer blob.Delete(ctx, key) //nolint:errcheck

		// get first
		if d, e := get(blob, key, 0, 1); e != nil || d != "h" {
			return fmt.Errorf(`failed to get the first byte:, expect "h", but got %q, error: %s`, d, e)
		}
		// get last
		if d, e := get(blob, key, 4, 1); e != nil || d != "o" {
			return fmt.Errorf(`failed to get the last byte: expect "o", but got %q, error: %s`, d, e)
		}
		// get last 3
		if d, e := get(blob, key, 2, 3); e != nil || d != "llo" {
			return fmt.Errorf(`failed to get the last three bytes: expect "llo", but got %q, error: %s`, d, e)
		}
		// get middle
		if d, e := get(blob, key, 2, 2); e != nil || d != "ll" {
			return fmt.Errorf(`failed to get two bytes: expect "ll", but got %q, error: %s`, d, e)
		}
		// get the end out of range
		if d, e := get(blob, key, 4, 2); e != nil || d != "o" {
			return warning(fmt.Errorf(`failed to get object with the end out of range, expect "o", but got %q, error: %s`, d, e))
		}
		// get the off out of range
		if d, e := get(blob, key, 6, 2); e != nil || d != "" {
			return warning(fmt.Errorf(`failed to get object with the offset out of range, expect "", but got %q, error: %s`, d, e))
		}
		return nil
	})

	runCase("head an object", func(blob object.ObjectStorage) error {
		br := []byte("hello")
		if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
			return fmt.Errorf("put object failed: %s", err)
		}
		defer blob.Delete(ctx, key) //nolint:errcheck
		if h, err := blob.Head(ctx, key); err != nil {
			return fmt.Errorf("failed to head object %s", err)
		} else {
			if h.Key() != key {
				return fmt.Errorf("expected key 'test' but got %s", h.Key())
			}
		}
		return nil
	})

	runCase("delete an object", func(blob object.ObjectStorage) error {
		br := []byte("hello")
		if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
			return fmt.Errorf("put object failed: %s", err)
		}
		if err := blob.Delete(ctx, key); err != nil {
			return fmt.Errorf("delete failed: %s", err)
		}
		if _, err := blob.Head(ctx, key); err == nil {
			return fmt.Errorf("expect err is not nil")
		}

		if err := blob.Delete(ctx, key); err != nil {
			return fmt.Errorf("delete not existed: %v", err)
		}
		return nil
	})

	runCase("delete non-exist", func(blob object.ObjectStorage) error {
		if err := blob.Delete(ctx, key); err != nil {
			return fmt.Errorf("deleting a non-existent object returns an error %v", err)
		}
		return nil
	})

	runCase("list objects", func(blob object.ObjectStorage) error {
		br := []byte("hello")
		if err := blob.Put(ctx, key, bytes.NewReader(br)); err != nil {
			return fmt.Errorf("put object failed: %s", err)
		}
		defer blob.Delete(ctx, key) //nolint:errcheck
		if isFileSystem {
			objs, err := listAll(ctx, blob, "", "", 2)
			if err == nil {
				if len(objs) != 2 {
					return fmt.Errorf("list should return 2 keys, but got %d", len(objs))
				}
				if objs[0].Key() != "" {
					return fmt.Errorf("first key should be empty string, but got %s", objs[0].Key())
				}
				if objs[0].Size() != 0 {
					return fmt.Errorf("first object size should be 0, but got %d", objs[0].Size())
				}
				if objs[1].Key() != key {
					return fmt.Errorf("first key should be test, but got %s", objs[1].Key())
				}
				if objs[1].Size() != 5 {
					return fmt.Errorf("size of first key shold be 5, but got %v", objs[1].Size())
				}
				now := time.Now()
				if objs[1].Mtime().Before(now.Add(-30*time.Second)) || objs[1].Mtime().After(now.Add(time.Second*30)) {
					return fmt.Errorf("mtime of key should be within 30 seconds, but got %s", objs[1].Mtime().Sub(now))
				}
			} else {
				return fmt.Errorf("list failed: %s", err)
			}

			objs, err = listAll(ctx, blob, "", "test2", 1)
			if err != nil {
				return fmt.Errorf("list failed: %s", err)
			} else if len(objs) != 0 {
				return fmt.Errorf("list should not return anything, but got %d", len(objs))
			}
		} else {
			objs, err2 := listAll(ctx, blob, "", "", 1)
			if err2 == nil {
				if len(objs) != 1 {
					return fmt.Errorf("list should return 1 keys, but got %d", len(objs))
				}
				if objs[0].Key() != key {
					return fmt.Errorf("first key should be test, but got %s", objs[0].Key())
				}
				if objs[0].Size() != 5 {
					return fmt.Errorf("size of first key shold be 5, but got %v", objs[0].Size())
				}
				now := time.Now()
				if objs[0].Mtime().Before(now.Add(-30*time.Second)) || objs[0].Mtime().After(now.Add(time.Second*30)) {
					return fmt.Errorf("mtime of key should be within 30 seconds, but got %s", objs[0].Mtime().Sub(now))
				}
			} else {
				return fmt.Errorf("list failed: %s", err2)
			}

			objs, err2 = listAll(ctx, blob, "", "test2", 1)
			if err2 != nil {
				return fmt.Errorf("list failed: %s", err2)
			} else if len(objs) != 0 {
				return fmt.Errorf("list should not return anything, but got %d", len(objs))
			}
		}
		keyTotal := 100
		var sortedKeys []string
		for i := 0; i < keyTotal; i++ {
			k := fmt.Sprintf("hashKey%d", i)
			sortedKeys = append(sortedKeys, k)
			if err := blob.Put(ctx, fmt.Sprintf("hashKey%d", i), bytes.NewReader(br)); err != nil {
				return fmt.Errorf("put object failed: %s", err.Error())
			}
		}
		sort.Strings(sortedKeys)
		defer func() {
			for i := 0; i < keyTotal; i++ {
				_ = blob.Delete(ctx, fmt.Sprintf("hashKey%d", i))
			}
		}()

		if objs, err := listAll(ctx, blob, "hashKey", "", int64(keyTotal)); err != nil {
			return fmt.Errorf("list failed: %s", err)
		} else {
			for i := 0; i < keyTotal; i++ {
				if objs[i].Key() != sortedKeys[i] {
					return fmt.Errorf("the result for list is incorrect")
				}
			}
		}
		return nil
	})

	runCase("special key", func(blob object.ObjectStorage) error {
		key := "测试编码文件" + `{"name":"juicefs"}` + string('\u001F') + "%uFF081%uFF09.jpg"
		defer blob.Delete(ctx, key) //nolint:errcheck
		if err := blob.Put(ctx, key, bytes.NewReader([]byte("1"))); err != nil {
			return fmt.Errorf("put encode file failed: %s", err)
		} else {
			if resp, _, _, err := blob.List(ctx, "", "测试编码文件", "", "", 1, true); err != nil && err != utils.ENOTSUP {
				return fmt.Errorf("list encode file failed %s", err)
			} else if len(resp) == 1 && resp[0].Key() != key {
				return fmt.Errorf("list encode file failed: expect key %s, but got %s", key, resp[0].Key())
			}
		}
		return nil
	})

	runCase("put a big object", func(blob object.ObjectStorage) error {
		fsize := 256 << 20
		buffL := 4 << 20
		buff := make([]byte, buffL)
		utils.RandRead(buff)
		count := int(math.Floor(float64(fsize) / float64(buffL)))
		content := make([]byte, fsize)
		for i := 0; i < count; i++ {
			copy(content[i*buffL:(i+1)*buffL], buff)
		}
		if err := blob.Put(ctx, key, bytes.NewReader(content)); err != nil {
			return err
		}
		defer blob.Delete(ctx, key) //nolint:errcheck
		return nil
	})

	runCase("put an empty object", func(blob object.ObjectStorage) error {
		// Copy empty objects
		defer blob.Delete(ctx, "empty_test_file") //nolint:errcheck
		if err := blob.Put(ctx, "empty_test_file", bytes.NewReader([]byte{})); err != nil {
			return err
		}

		// Copy `/` suffixed object
		defer blob.Delete(ctx, "slash_test_file/") //nolint:errcheck
		if err := blob.Put(ctx, "slash_test_file/", bytes.NewReader([]byte("1"))); err != nil {
			return fmt.Errorf("put `/` suffixed object failed: %s", err)
		}
		return nil
	})

	runCase("multipart upload", func(blob object.ObjectStorage) (err error) {
		defer func() {
			err = warning(err)
		}()

		key := "multi_test_file"
		if err = blob.CompleteUpload(ctx, key, "notExistsUploadId", []*object.Part{}); err != utils.ENOTSUP {
			defer blob.Delete(ctx, key) //nolint:errcheck
			upload, err := blob.CreateMultipartUpload(ctx, key)
			if err != nil {
				return fmt.Errorf("create multipart upload failed: %s", err)
			}
			total := 3
			seed := make([]byte, upload.MinPartSize)
			utils.RandRead(seed)
			parts := make([]*object.Part, total)
			content := make([][]byte, total)
			for i := 0; i < total; i++ {
				content[i] = make([]byte, upload.MinPartSize)
				getMockData(seed, i, &content[i])
			}
			var eg errgroup.Group
			eg.SetLimit(4)
			for i := 1; i <= total; i++ {
				num := i
				eg.Go(func() error {
					var err error
					parts[num-1], err = blob.UploadPart(ctx, key, upload.UploadID, num, content[num-1])
					if err != nil {
						err = fmt.Errorf("multipart upload error: %s", err)
					}
					return err
				})
			}
			err = eg.Wait()
			if err != nil {
				return err
			}
			// overwrite the first part
			firstPartContent := append(seed, seed...)
			if parts[0], err = blob.UploadPart(ctx, key, upload.UploadID, 1, firstPartContent); err != nil {
				return fmt.Errorf("multipart upload error: %v", err)
			}
			content[0] = firstPartContent

			// overwrite the last part
			lastPartContent := []byte("hello")
			if parts[total-1], err = blob.UploadPart(ctx, key, upload.UploadID, total, lastPartContent); err != nil {
				return fmt.Errorf("multipart upload error: %v", err)
			}
			content[total-1] = lastPartContent

			if err = blob.CompleteUpload(ctx, key, upload.UploadID, parts); err != nil {
				return fmt.Errorf("failed to complete multipart upload: %v", err)
			}
			r, err := blob.Get(ctx, key, 0, -1)
			if err != nil {
				return fmt.Errorf("failed to get multipart upload file: %v", err)
			}
			cnt, err := io.ReadAll(r)
			if err != nil {
				return fmt.Errorf("failed to get multipart upload file: %v", err)
			}
			if !bytes.Equal(cnt, bytes.Join(content, nil)) {
				return fmt.Errorf("the content of the multipart upload file is incorrect")
			}
			return nil
		}
		return utils.ENOTSUP
	})

	funFSCase("change owner/group", func() error {
		if (strings.HasPrefix(blob.String(), "file://") || strings.HasPrefix(blob.String(), "jfs://")) && os.Getuid() != 0 {
			return errors.New("root required")
		}
		if err := fi.Chown(key, "nobody", groupName); err != nil {
			return fmt.Errorf("failed to chown object %s", err)
		}
		if objInfo, err := blob.Head(ctx, key); err != nil {
			return fmt.Errorf("failed to head object %s", err)
		} else if info, ok := objInfo.(object.File); ok {
			if info.Owner() != "nobody" {
				return fmt.Errorf("expect owner nobody but got %s", info.Owner())
			}
			if info.Group() != groupName {
				return fmt.Errorf("expect group %s but got %s", groupName, info.Group())
			}
		}
		return nil
	})

	funFSCase("change permission", func() error {
		if err := fi.Chmod(key, 0777); err != nil {
			return err
		}
		if objInfo, err := blob.Head(ctx, key); err != nil {
			return fmt.Errorf("failed to head object %s", err)
		} else if info, ok := objInfo.(object.File); ok {
			if info.Mode()&0xFFF != 0777 {
				return fmt.Errorf("expect mode %o but got %o", 0777, info.Mode())
			}
		}
		return nil
	})

	funFSCase("change mtime", func() error {
		mtime := time.Now().Add(-10 * time.Minute)
		if err := fi.Chtimes(key, mtime); err != nil {
			return fmt.Errorf("failed to chtimes %s", err)
		}
		if objInfo, err := blob.Head(ctx, key); err != nil {
			return fmt.Errorf("failed to head object %s", err)
		} else {
			if objInfo.Mtime().Before(mtime.Add(-2*time.Second)) || objInfo.Mtime().After(mtime.Add(2*time.Second)) {
				return fmt.Errorf("mtime deviation is too large, the actual mtime is %s but got %s", mtime.Format(time.RFC3339), objInfo.Mtime().Format(time.RFC3339))
			}
		}
		return nil
	})
}


================================================
FILE: cmd/object.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"os"
	"path"
	"runtime"
	"sort"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/urfave/cli/v2"
)

var (
	dirSuffix     = "/"
	cliCtx        *cli.Context
	pid, uid, gid uint32
)

func toError(eno syscall.Errno) error {
	if eno == 0 {
		return nil
	}
	return eno
}

type juiceFS struct {
	object.DefaultObjectStorage
	name  string
	umask uint16
	jfs   *fs.FileSystem
}

func (j *juiceFS) String() string {
	return fmt.Sprintf("jfs://%s/", j.name)
}

func (j *juiceFS) path(key string) string {
	return dirSuffix + key
}

type jFile struct {
	f     *fs.File
	limit int64
}

func (f *jFile) Read(buf []byte) (int, error) {
	if len(buf) == 0 {
		return 0, nil
	}
	if f.limit <= 0 {
		return 0, io.EOF
	}
	if len(buf) > int(f.limit) {
		buf = buf[:f.limit]
	}
	n, err := f.f.Read(ctx, buf)
	f.limit -= int64(n)
	return n, err
}

func (f *jFile) Write(buf []byte) (int, error) {
	n, eno := f.f.Write(ctx, buf)
	return n, toError(eno)
}

func (f *jFile) Close() error {
	return toError(f.f.Close(ctx))
}

func (j *juiceFS) Get(rCtx context.Context, key string, off, limit int64, getters ...object.AttrGetter) (io.ReadCloser, error) {
	ctx := meta.WrapWithoutCancel(rCtx, pid, uid, []uint32{gid})
	f, err := j.jfs.Open(ctx, j.path(key), vfs.MODE_MASK_R)
	if err != 0 {
		return nil, err
	}
	if off > 0 {
		_, _ = f.Seek(ctx, off, io.SeekStart)
	}
	if limit <= 0 {
		limit = 1 << 62
	}
	return &jFile{f, limit}, nil
}

var bufPool = sync.Pool{
	New: func() interface{} {
		buf := make([]byte, 128<<10)
		return &buf
	},
}

func (j *juiceFS) Put(rCtx context.Context, key string, in io.Reader, getters ...object.AttrGetter) (err error) {
	ctx := meta.WrapWithoutCancel(rCtx, pid, uid, []uint32{gid})
	if vfs.IsSpecialName(key) {
		return fmt.Errorf("skip special file %s for jfs: %w", key, utils.ErrSkipped)
	}
	p := j.path(key)
	if strings.HasSuffix(p, "/") {
		eno := j.jfs.MkdirAll(ctx, p, 0777, j.umask)
		return toError(eno)
	}
	var tmp string
	if object.PutInplace {
		tmp = p
	} else {
		name := path.Base(p)
		if len(name) > 200 {
			name = name[:200]
		}
		tmp = object.TmpFilePath(p, name)
		defer func() {
			if err != nil {
				if e := j.jfs.Delete(ctx, tmp); e != 0 {
					logger.Warnf("Failed to delete %s: %s", tmp, e)
				}
			}
		}()
	}
	f, eno := j.jfs.Create(ctx, tmp, 0666, j.umask)
	if eno == syscall.ENOENT {
		if eno = j.jfs.MkdirAll(ctx, path.Dir(tmp), 0777, j.umask); eno != 0 {
			return toError(eno)
		}
		f, eno = j.jfs.Create(ctx, tmp, 0666, j.umask)
	}

	if eno == syscall.EEXIST {
		if eno = j.jfs.Delete(ctx, tmp); eno != 0 {
			return toError(eno)
		}
		f, eno = j.jfs.Create(ctx, tmp, 0666, j.umask)
	}

	if eno != 0 {
		return toError(eno)
	}
	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	_, err = io.CopyBuffer(&jFile{f, 0}, in, *buf)
	if err != nil {
		return
	}
	eno = f.Close(ctx)
	if eno != 0 {
		return toError(eno)
	}
	if !object.PutInplace {
		if eno = j.jfs.Rename(ctx, tmp, p, 0); eno != 0 {
			return toError(eno)
		}
	}
	return nil
}

func (j *juiceFS) Delete(rCtx context.Context, key string, getters ...object.AttrGetter) error {
	ctx := meta.WrapWithoutCancel(rCtx, pid, uid, []uint32{gid})
	if key == "" {
		return nil
	}
	p := strings.TrimSuffix(j.path(key), dirSuffix)
	eno := j.jfs.Delete(ctx, p)
	if eno == syscall.ENOENT {
		eno = 0
	}
	return toError(eno)
}

type jObj struct {
	key       string
	fi        *fs.FileStat
	isSymlink bool
}

func (o *jObj) Key() string { return o.key }
func (o *jObj) Size() int64 {
	if o.fi.IsDir() {
		return 0
	}
	return o.fi.Size()
}
func (o *jObj) Mtime() time.Time     { return o.fi.ModTime() }
func (o *jObj) IsDir() bool          { return o.fi.IsDir() }
func (o *jObj) IsSymlink() bool      { return o.isSymlink }
func (o *jObj) Owner() string        { return utils.UserName(o.fi.Uid()) }
func (o *jObj) Group() string        { return utils.GroupName(o.fi.Gid()) }
func (o *jObj) Mode() os.FileMode    { return o.fi.Mode() }
func (o *jObj) StorageClass() string { return "" }

func (j *juiceFS) Head(rCtx context.Context, key string) (object.Object, error) {
	ctx := meta.WrapWithoutCancel(rCtx, pid, uid, []uint32{gid})
	errConv := func(eno syscall.Errno) error {
		if errors.Is(eno, syscall.ENOENT) {
			return os.ErrNotExist
		} else {
			return eno
		}
	}
	fi, eno := j.jfs.Lstat(ctx, j.path(key))
	if eno != 0 {
		return nil, errConv(eno)
	}
	isSymlink := fi.IsSymlink()
	if isSymlink {
		fi, eno = j.jfs.Stat(ctx, j.path(key))
		if eno != 0 {
			return nil, errConv(eno)
		}
	}
	return &jObj{key, fi, isSymlink}, nil
}

func (j *juiceFS) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]object.Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", utils.ENOTSUP
	}
	dir := j.path(prefix)
	var objs []object.Object
	if !strings.HasSuffix(dir, dirSuffix) {
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := j.Head(ctx, prefix)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}
	entries, err := j.readDirSorted(dir, followLink)
	if err != 0 {
		if err == syscall.ENOENT {
			return nil, false, "", nil
		}
		return nil, false, "", err
	}
	for _, e := range entries {
		key := dir[1:] + e.name
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}
		f := &jObj{key, e.fi, e.fi.IsSymlink()}
		objs = append(objs, f)
		if len(objs) == int(limit) {
			break
		}
	}
	var nextMarker string
	if len(objs) > 0 {
		nextMarker = objs[len(objs)-1].Key()
	}
	return objs, len(objs) == int(limit), nextMarker, nil
}

type mEntry struct {
	fi        *fs.FileStat
	name      string
	isSymlink bool
}

// readDirSorted reads the directory named by dirname and returns
// a sorted list of directory entries.
func (j *juiceFS) readDirSorted(dirname string, followLink bool) ([]*mEntry, syscall.Errno) {
	f, err := j.jfs.Open(ctx, dirname, 0)
	if err != 0 {
		return nil, err
	}
	defer f.Close(ctx)
	entries, err := f.ReaddirPlus(ctx, 0)
	if err != 0 {
		return nil, err
	}
	mEntries := make([]*mEntry, len(entries))
	for i, e := range entries {
		fi := fs.AttrToFileInfo(e.Inode, e.Attr)
		if fi.IsDir() {
			mEntries[i] = &mEntry{fi, string(e.Name) + dirSuffix, false}
		} else if fi.IsSymlink() && followLink {
			fi2, err := j.jfs.Stat(ctx, path.Join(dirname, string(e.Name)))
			if err != 0 {
				mEntries[i] = &mEntry{fi, string(e.Name), true}
				continue
			}
			name := string(e.Name)
			if fi2.IsDir() {
				name += dirSuffix
			}
			mEntries[i] = &mEntry{fi2, name, false}
		} else {
			mEntries[i] = &mEntry{fi, string(e.Name), fi.IsSymlink()}
		}
	}
	sort.Slice(mEntries, func(i, j int) bool { return mEntries[i].name < mEntries[j].name })
	return mEntries, err
}

func (j *juiceFS) Chtimes(key string, mtime time.Time) error {
	f, err := j.jfs.Lopen(ctx, j.path(key), 0)
	if err != 0 {
		return err
	}
	defer f.Close(ctx)
	return toError(f.Utime(ctx, -1, mtime.UnixNano()/1e6))
}

// syscallMode returns the syscall-specific mode bits from Go's portable mode bits.
func syscallMode(i os.FileMode) (o uint32) {
	o |= uint32(i.Perm())
	if i&os.ModeSetuid != 0 {
		o |= syscall.S_ISUID
	}
	if i&os.ModeSetgid != 0 {
		o |= syscall.S_ISGID
	}
	if i&os.ModeSticky != 0 {
		o |= syscall.S_ISVTX
	}
	// No mapping for Go's ModeTemporary (plan9 only).
	return
}

func (j *juiceFS) Chmod(key string, mode os.FileMode) error {
	f, err := j.jfs.Open(ctx, j.path(key), 0)
	if err != 0 {
		return err
	}
	defer f.Close(ctx)
	return toError(f.Chmod(ctx, uint16(syscallMode(mode))))
}

func (j *juiceFS) Chown(key string, owner, group string) error {
	uid := utils.LookupUser(owner)
	gid := utils.LookupGroup(group)
	if uid == -1 || gid == -1 {
		return fmt.Errorf("user(%s):group(%s) not found", owner, group)
	}
	f, err := j.jfs.Lopen(ctx, j.path(key), 0)
	if err != 0 {
		return err
	}
	defer f.Close(ctx)
	return toError(f.Chown(ctx, uint32(uid), uint32(gid)))
}

func (j *juiceFS) Symlink(oldName, newName string) error {
	p := j.path(newName)
	err := j.jfs.Symlink(ctx, oldName, p)
	if err == syscall.ENOENT {
		if err = j.jfs.MkdirAll(ctx, path.Dir(p), 0777, j.umask); err != 0 {
			return toError(err)
		}
		err = j.jfs.Symlink(ctx, oldName, p)
	}
	return toError(err)
}

func (j *juiceFS) Readlink(name string) (string, error) {
	target, err := j.jfs.Readlink(ctx, j.path(name))
	return string(target), toError(err)
}

func getDefaultChunkConf(format *meta.Format) *chunk.Config {
	chunkConf := &chunk.Config{
		BlockSize:   format.BlockSize * 1024,
		Compress:    format.Compression,
		HashPrefix:  format.HashPrefix,
		GetTimeout:  time.Minute,
		PutTimeout:  time.Minute,
		MaxUpload:   50,
		MaxDownload: 200,
		MaxRetries:  10,
		BufferSize:  300 << 20,
	}
	chunkConf.SelfCheck(format.UUID)
	return chunkConf
}

func (j *juiceFS) Shutdown() {
	_ = j.jfs.Meta().CloseSession()
}

func newJFS(endpoint, accessKey, secretKey, token string) (object.ObjectStorage, error) {
	pid, uid, gid = uint32(os.Getpid()), uint32(utils.GetCurrentUID()), uint32(utils.GetCurrentGID())
	if runtime.GOOS == "windows" && utils.IsWinAdminOrElevatedPrivilege() {
		uid = 0
		gid = 0
	}
	metaUrl := os.Getenv(endpoint)
	if metaUrl == "" {
		metaUrl = endpoint
	}
	metaConf := meta.DefaultConf()
	metaConf.MaxDeletes = 10
	metaConf.NoBGJob = true
	metaCli := meta.NewClient(metaUrl, metaConf)
	format, err := metaCli.Load(true)
	if err != nil {
		return nil, fmt.Errorf("load setting: %s", err)
	}
	blob, err := NewReloadableStorage(format, metaCli, nil)
	if err != nil {
		return nil, fmt.Errorf("object storage: %s", err)
	}
	chunkConf := getDefaultChunkConf(format)
	store := chunk.NewCachedStore(blob, *chunkConf, nil)
	registerMetaMsg(metaCli, store, chunkConf)
	err = metaCli.NewSession(false)
	if err != nil {
		return nil, fmt.Errorf("new session: %s", err)
	}
	metaCli.OnReload(func(fmt *meta.Format) {
		store.UpdateLimit(fmt.UploadLimit, fmt.DownloadLimit)
	})

	vfsConf := &vfs.Config{
		Meta:            metaConf,
		Format:          *format,
		Version:         version.Version(),
		Chunk:           chunkConf,
		AttrTimeout:     time.Second,
		DirEntryTimeout: time.Second,
		Mountpoint:      cliCtx.String("mountpoint"),
	}

	vfsConf.Format.RemoveSecret()
	d, _ := json.MarshalIndent(vfsConf, "  ", "")
	logger.Debugf("Config: %s", string(d))

	jfs, err := fs.NewFileSystem(vfsConf, metaCli, store, nil)
	if err != nil {
		return nil, fmt.Errorf("Initialize: %s", err)
	}
	return &juiceFS{object.DefaultObjectStorage{}, format.Name, uint16(utils.GetUmask()), jfs}, nil
}

func init() {
	object.Register("jfs", newJFS)
}


================================================
FILE: cmd/object_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package cmd

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"os"
	"strings"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
)

func testKeysEqual(objs []object.Object, expectedKeys []string) error {
	gottenKeys := make([]string, len(objs))
	for idx, obj := range objs {
		gottenKeys[idx] = obj.Key()
	}
	if len(gottenKeys) != len(expectedKeys) {
		return fmt.Errorf("Expected {%s}, got {%s}", strings.Join(expectedKeys, ", "),
			strings.Join(gottenKeys, ", "))
	}

	for idx, key := range gottenKeys {
		if key != expectedKeys[idx] {
			return fmt.Errorf("Expected {%s}, got {%s}", strings.Join(expectedKeys, ", "),
				strings.Join(gottenKeys, ", "))
		}
	}
	return nil
}

// copied from pkg/object/filesystem_test.go
func testFileSystem(t *testing.T, s object.ObjectStorage) {
	ctx := context.Background()
	keys := []string{
		"x/",
		"x/x.txt",
		"xy.txt",
		"xyz/",
		"xyz/xyz.txt",
	}
	// initialize directory tree
	for _, key := range keys {
		if err := s.Put(ctx, key, bytes.NewReader([]byte{})); err != nil {
			t.Fatalf("PUT object `%s` failed: %q", key, err)
		}
	}
	if o, err := s.Head(ctx, "x/"); err != nil {
		t.Fatalf("Head x/: %s", err)
	} else if f, ok := o.(object.File); !ok {
		t.Fatalf("Head should return File")
	} else if !f.IsDir() {
		t.Fatalf("x/ should be a dir")
	}
	// cleanup
	defer func() {
		// delete reversely, directory only can be deleted when it's empty
		objs, err := listAll(ctx, s, "", "", 100)
		if err != nil {
			t.Fatalf("listall failed: %s", err)
		}
		gottenKeys := make([]string, len(objs))
		for idx, obj := range objs {
			gottenKeys[idx] = obj.Key()
		}
		idx := len(gottenKeys) - 1
		for ; idx >= 0; idx-- {
			if err := s.Delete(ctx, gottenKeys[idx]); err != nil {
				t.Fatalf("DELETE object `%s` failed: %q", gottenKeys[idx], err)
			}
		}
	}()
	objs, err := listAll(ctx, s, "x/", "", 100)
	if err != nil {
		t.Fatalf("list failed: %s", err)
	}
	expectedKeys := []string{"x/", "x/x.txt"}
	if err = testKeysEqual(objs, expectedKeys); err != nil {
		t.Fatalf("testKeysEqual fail: %s", err)
	}

	objs, err = listAll(ctx, s, "x", "", 100)
	if err != nil {
		t.Fatalf("list failed: %s", err)
	}
	expectedKeys = []string{"x/", "x/x.txt", "xy.txt", "xyz/", "xyz/xyz.txt"}
	if err = testKeysEqual(objs, expectedKeys); err != nil {
		t.Fatalf("testKeysEqual fail: %s", err)
	}

	objs, err = listAll(ctx, s, "xy", "", 100)
	if err != nil {
		t.Fatalf("list failed: %s", err)
	}
	expectedKeys = []string{"xy.txt", "xyz/", "xyz/xyz.txt"}
	if err = testKeysEqual(objs, expectedKeys); err != nil {
		t.Fatalf("testKeysEqual fail: %s", err)
	}

	if ss, ok := s.(object.SupportSymlink); ok {
		// a< a- < a/ < a0    <    b< b- < b/ < b0
		_ = s.Put(ctx, "a-", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "a0", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "b-", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "b0", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "xyz/ol1/p.txt", bytes.NewReader([]byte{}))
		if err = ss.Symlink("./xyz/ol1/", "a"); err != nil {
			t.Fatalf("symlink a %s", err)
		}
		if target, err := ss.Readlink("a"); err != nil || target != "./xyz/ol1/" {
			t.Fatalf("readlink a %s %s", target, err)
		}
		if err = ss.Symlink("/xyz/notExist/", "b"); err != nil {
			t.Fatalf("symlink b %s", err)
		}
		if target, err := ss.Readlink("b"); err != nil || target != "/xyz/notExist/" {
			t.Fatalf("readlink b %s %s", target, err)
		}
		head, err := s.Head(ctx, "a")
		if err != nil || !head.IsSymlink() {
			t.Fatalf("head a %s %s", head, err)
		}
		ss.Symlink("notExit", "brokenLink")
		_, err = s.Head(ctx, "brokenLink")
		if !errors.Is(err, os.ErrNotExist) {
			t.Fatalf("head b %s %s", head, err)
		}
		s.Delete(ctx, "brokenLink")
		objs, err = listAll(ctx, s, "", "", 100)
		if err != nil {
			t.Fatalf("listall failed: %s", err)
		}
		expectedKeys = []string{"", "a-", "a/", "a/p.txt", "a0", "b", "b-", "b0", "x/", "x/x.txt", "xy.txt", "xyz/", "xyz/ol1/", "xyz/ol1/p.txt", "xyz/xyz.txt"}
		if err = testKeysEqual(objs, expectedKeys); err != nil {
			t.Fatalf("testKeysEqual fail: %s", err)
		}
	}

	// put a file with very long name
	longName := strings.Repeat("a", 255)
	if err := s.Put(ctx, "dir/"+longName, bytes.NewReader([]byte{0})); err != nil {
		t.Fatalf("PUT a file with long name `%s` failed: %q", longName, err)
	}
}

func TestJFS(t *testing.T) {
	m := meta.NewClient("memkv://", nil)
	format := &meta.Format{
		Name:      "test",
		BlockSize: 4096,
		Capacity:  1 << 30,
		DirStats:  true,
	}
	_ = m.Init(format, true)
	var conf = vfs.Config{
		Meta: meta.DefaultConf(),
		Chunk: &chunk.Config{
			BlockSize:   format.BlockSize << 10,
			MaxUpload:   1,
			MaxDownload: 200,
			BufferSize:  100 << 20,
		},
		DirEntryTimeout: time.Millisecond * 100,
		EntryTimeout:    time.Millisecond * 100,
		AttrTimeout:     time.Millisecond * 100,
		AccessLog:       "/tmp/juicefs.access.log",
	}
	objStore, _ := object.CreateStorage("mem", "", "", "", "")
	store := chunk.NewCachedStore(objStore, *conf.Chunk, nil)
	jfs, err := fs.NewFileSystem(&conf, m, store, nil)
	if err != nil {
		t.Fatalf("initialize  failed: %s", err)
	}

	jstore := &juiceFS{object.DefaultObjectStorage{}, "test", uint16(utils.GetUmask()), jfs}
	testFileSystem(t, jstore)
	testFileSystem(t, object.WithPrefix(jstore, "unittest/"))
}


================================================
FILE: cmd/passfd.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"net"
	"os"
	"sync"
	"syscall"

	"github.com/juicedata/juicefs/pkg/utils"
)

// Get receives file descriptors from a Unix domain socket.
//
// Num specifies the expected number of file descriptors in one message.
// Internal files' names to be assigned are specified via optional filenames
// argument.
//
// You need to close all files in the returned slice. The slice can be
// non-empty even if this function returns an error.
func getFd(via *net.UnixConn, num int) ([]byte, []int, error) {
	if num < 1 {
		return nil, nil, nil
	}

	// get the underlying socket
	viaf, err := via.File()
	if err != nil {
		return nil, nil, err
	}
	defer viaf.Close()
	socket := int(viaf.Fd())

	// recvmsg
	msg := make([]byte, syscall.CmsgSpace(100))
	oob := make([]byte, syscall.CmsgSpace(num*4))
	n, oobn, _, _, err := syscall.Recvmsg(socket, msg, oob, 0)
	if err != nil {
		return nil, nil, err
	}

	// parse control msgs
	msgs, err := syscall.ParseSocketControlMessage(oob[:oobn])

	// convert fds to files
	fds := make([]int, 0, len(msgs))
	for _, msg := range msgs {
		var rights []int
		rights, err = syscall.ParseUnixRights(&msg)
		fds = append(fds, rights...)
		if err != nil {
			for i := range fds {
				syscall.Close(fds[i])
			}
			fds = nil
			break
		}
	}
	return msg[:n], fds, err
}

// putFd sends file descriptors to Unix domain socket.
//
// Please note that the number of descriptors in one message is limited
// and is rather small.
func putFd(via *net.UnixConn, msg []byte, fds ...int) error {
	if len(fds) == 0 {
		return nil
	}
	viaf, err := via.File()
	if err != nil {
		return err
	}
	defer viaf.Close()
	socket := int(viaf.Fd())
	rights := syscall.UnixRights(fds...)
	return syscall.Sendmsg(socket, msg, rights, nil, 0)
}

var fuseMu sync.Mutex
var fuseFd int = 0
var fuseSetting = []byte("FUSE")
var serverAddress string = fmt.Sprintf("/tmp/fuse_fd_comm.%d", os.Getpid())
var csiCommPath = os.Getenv("JFS_SUPER_COMM")

func handleFDRequest(conn *net.UnixConn) {
	defer conn.Close()
	var fds = []int{0}
	fuseMu.Lock()
	if fuseFd > 0 {
		fds = append(fds, fuseFd)
		logger.Debugf("send FUSE fd: %d", fuseFd)
	}
	err := putFd(conn, fuseSetting, fds...)
	if err != nil {
		fuseMu.Unlock()
		logger.Errorf("send fuse fds: %s", err)
		return
	}
	if fuseFd > 0 {
		_ = syscall.Close(fuseFd)
		fuseFd = -1
	}
	fuseMu.Unlock()

	var msg []byte
	msg, fds, err = getFd(conn, 1)
	if err != nil {
		logger.Debugf("recv fuse fds: %s", err)
		return
	}
	fuseMu.Lock()
	if string(msg) != "CLOSE" && fuseFd <= 0 && len(fds) == 1 {
		logger.Debugf("recv FUSE fd: %d", fds[0])
		fuseFd = fds[0]
		fuseSetting = msg
		if csiCommPath != "" {
			err = sendFuseFd(csiCommPath, fuseSetting, fuseFd)
			if err != nil {
				logger.Warnf("send fd to %s: %v", csiCommPath, err)
			}
		}
	} else {
		for _, fd := range fds {
			_ = syscall.Close(fd)
		}
		logger.Debugf("msg: %s fds: %+v", string(msg), fds)
	}
	fuseMu.Unlock()
}

func serveFuseFD(path string) {
	if csiCommPath != "" {
		fd, fSetting := getFuseFd(csiCommPath)
		if fd > 0 {
			fuseFd, fuseSetting = fd, fSetting
		}
	}
	_ = os.Remove(path)
	sock, err := net.Listen("unix", path)
	if err != nil {
		logger.Error(err)
		return
	}
	go func() {
		defer sock.Close()
		for {
			conn, err := sock.Accept()
			if err != nil {
				logger.Warnf("accept : %s", err)
				continue
			}
			go handleFDRequest(conn.(*net.UnixConn))
		}
	}()
}

func getFuseFd(path string) (int, []byte) {
	if !utils.Exists(path) {
		return -1, nil
	}
	conn, err := net.Dial("unix", path)
	if err != nil {
		logger.Warnf("dial %s: %s", path, err)
		return -1, nil
	}
	defer conn.Close()
	msg, fds, err := getFd(conn.(*net.UnixConn), 2)
	if err != nil {
		logger.Warnf("recv fds: %s", err)
		return -1, nil
	}
	_ = syscall.Close(fds[0])
	if len(fds) > 1 {
		// for old version
		_ = putFd(conn.(*net.UnixConn), []byte("CLOSE"), 0) // close it
		logger.Debugf("recv FUSE fd: %d", fds[1])
		return fds[1], msg
	}
	return 0, nil
}

func sendFuseFd(path string, msg []byte, fd int) error {
	conn, err := net.Dial("unix", path)
	if err != nil {
		return err
	}
	defer conn.Close()
	_, fds, err := getFd(conn.(*net.UnixConn), 2)
	if err != nil {
		logger.Warnf("recv fds: %s", err)
		return err
	}
	for _, fd := range fds {
		_ = syscall.Close(fd)
	}
	logger.Debugf("send FUSE fd: %d", fd)
	return putFd(conn.(*net.UnixConn), msg, fd)
}


================================================
FILE: cmd/printsid.go
================================================
package cmd

import (
	"fmt"
	"runtime"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdPrintSID() *cli.Command {
	return &cli.Command{
		Name:     "printsid",
		Category: "TOOL",
		Action:   printSID,
		Usage:    "Show SID info and the convected UID/GID for the current user.",
		Hidden:   true,
	}
}

func printSID(ctx *cli.Context) error {
	if runtime.GOOS != "windows" {
		return fmt.Errorf("printsid command is only supported on Windows")
	}

	userSid := utils.GetCurrentUserSIDStr()
	groupSid := utils.GetCurrentUserGroupSIDStr()
	fmt.Printf("Current User SID: %s, UID: %d\n", userSid, utils.GetCurrentUID())
	fmt.Printf("Current Group SID: %s, GID: %d\n", groupSid, utils.GetCurrentGID())

	return nil
}


================================================
FILE: cmd/profile.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bufio"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdProfile() *cli.Command {
	return &cli.Command{
		Name:      "profile",
		Action:    profile,
		Category:  "INSPECTOR",
		Usage:     "Show profiling of operations completed in JuiceFS",
		ArgsUsage: "MOUNTPOINT/LOGFILE",
		Description: `
This is a tool that analyzes access log of JuiceFS and shows an overview of recently completed operations.

Examples:
# Monitor real time operations
$ juicefs profile /mnt/jfs

# Replay an access log
$ cat /mnt/jfs/.accesslog > /tmp/juicefs.accesslog
# Press Ctrl-C to stop the "cat" command after some time
$ juicefs profile /tmp/juicefs.accesslog

# Analyze an access log and print the total statistics immediately
$ juicefs profile /tmp/juicefs.accesslog --interval 0

Details: https://juicefs.com/docs/community/fault_diagnosis_and_analysis#profile`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:    "uid",
				Aliases: []string{"u"},
				Usage:   "track only specified UIDs(separated by comma ,)",
			},
			&cli.StringFlag{
				Name:    "gid",
				Aliases: []string{"g"},
				Usage:   "track only specified GIDs(separated by comma ,)",
			},
			&cli.StringFlag{
				Name:    "pid",
				Aliases: []string{"p"},
				Usage:   "track only specified PIDs(separated by comma ,)",
			},
			&cli.StringFlag{
				Name:    "paths",
				Aliases: []string{"filter-by-path"},
				Usage:   "track only specified paths (separated by comma , Only for Windows FUSE log)",
				Hidden:  true,
			},
			&cli.Int64Flag{
				Name:  "interval",
				Value: 2,
				Usage: "flush interval in seconds; set it to 0 when replaying a log file to get an immediate result",
			},
		},
	}
}

var findDigits = regexp.MustCompile(`\d+`)

type profiler struct {
	file      *os.File
	replay    bool
	colorful  bool
	interval  time.Duration
	uids      []string
	gids      []string
	pids      []string
	paths     []string
	entryChan chan *logEntry // one line
	statsChan chan map[string]*stat
	pause     chan bool
	/* --- for replay --- */
	printTime chan time.Time
	done      chan bool
}

type stat struct {
	count int
	total int // total latency in 'us'
}

type keyStat struct {
	key  string
	sPtr *stat
}

type logEntry struct {
	ts            time.Time
	uid, gid, pid string
	op            string
	latency       int    // us
	path          string // only for Windows FUSE log
}

func parseLine(line string, winFuseLog bool) *logEntry {
	if len(line) < 3 { // dummy line: "#"
		return nil
	}
	fields := strings.Fields(line)
	if len(fields) < 5 {
		logger.Warnf("Log line is invalid: %s", line)
		return nil
	}
	ts, err := time.Parse("2006.01.02 15:04:05.000000", strings.Join([]string{fields[0], fields[1]}, " "))
	if err != nil {
		logger.Warnf("Failed to parse log line: %s: %s", line, err)
		return nil
	}
	ids := findDigits.FindAllString(fields[2], 3) // e.g: [uid:0,gid:0,pid:36674]
	if len(ids) != 3 {
		logger.Warnf("Log line is invalid: %s", line)
		return nil
	}
	latStr := fields[len(fields)-1] // e.g: <0.000003>
	latFloat, err := strconv.ParseFloat(latStr[1:len(latStr)-1], 64)
	if err != nil {
		logger.Warnf("Failed to parse log line: %s: %s", line, err)
		return nil
	}

	filePath := ""
	if winFuseLog {
		// Find the path in Windows log, should after the "{op} (/xxxx/bb  bb/cc cc.*)"
		// the windows path may contain space or "(", ")"
		restPart := strings.Join(fields[4:len(fields)-1], " ")
		if strings.HasPrefix(restPart, "(") && strings.Contains(restPart, ")") {
			lastIndex := strings.LastIndex(restPart, ")")
			if lastIndex > 1 {
				paths := strings.SplitN(restPart[1:lastIndex], ",", 2)
				if len(paths) > 0 {
					filePath = paths[0]
				}
			}
		}

		if filePath == "" {
			logger.Warnf("log line is invalid, cannot find path: %s", line)
		}
	}

	return &logEntry{
		ts:      ts,
		uid:     ids[0],
		gid:     ids[1],
		pid:     ids[2],
		op:      fields[3],
		latency: int(latFloat * 1000000.0),
		path:    filePath,
	}
}

func (p *profiler) reader() {
	scanner := bufio.NewScanner(p.file)
	for scanner.Scan() {
		p.entryChan <- parseLine(scanner.Text(), p.isWinFuseLog())
	}
	if err := scanner.Err(); err != nil {
		logger.Fatalf("Reading log file failed with error: %s", err)
	}
	close(p.entryChan)
	if p.replay {
		p.done <- true
	}
}

func (p *profiler) isWinFuseLog() bool {
	return len(p.paths) > 0
}

func (p *profiler) isValid(entry *logEntry) bool {
	valid := func(f []string, e string) bool {
		if len(f) == 1 && f[0] == "" {
			return true
		}
		for _, v := range f {
			if v == e {
				return true
			}
		}
		return false
	}
	return valid(p.uids, entry.uid) && valid(p.gids, entry.gid) && valid(p.pids, entry.pid) && valid(p.paths, entry.path)
}

func (p *profiler) counter() {
	var edge time.Time
	stats := make(map[string]*stat)
	for {
		select {
		case entry := <-p.entryChan:
			if entry == nil {
				break
			}
			if !p.isValid(entry) {
				break
			}
			if p.replay {
				if edge.IsZero() {
					edge = entry.ts.Add(p.interval)
				}
				for ; entry.ts.After(edge); edge = edge.Add(p.interval) {
					p.statsChan <- stats
					p.printTime <- edge
					stats = make(map[string]*stat)
				}
			}
			value, ok := stats[entry.op]
			if !ok {
				value = &stat{}
				stats[entry.op] = value
			}
			value.count++
			value.total += entry.latency
		case p.statsChan <- stats:
			if p.replay {
				p.printTime <- edge
				edge = edge.Add(p.interval)
			}
			stats = make(map[string]*stat)
		}
	}
}

func (p *profiler) fastCounter() {
	var start, last time.Time
	stats := make(map[string]*stat)
	for entry := range p.entryChan {
		if entry == nil {
			continue
		}
		if !p.isValid(entry) {
			continue
		}
		if start.IsZero() {
			start = entry.ts
		}
		last = entry.ts
		value, ok := stats[entry.op]
		if !ok {
			value = &stat{}
			stats[entry.op] = value
		}
		value.count++
		value.total += entry.latency
	}
	p.statsChan <- stats
	p.printTime <- start
	p.printTime <- last
}

func colorize1(msg string, color int) string {
	return fmt.Sprintf("%s%dm%s%s", COLOR_SEQ, color, msg, RESET_SEQ)
}

func printLines(lines []string, colorful bool) {
	if colorful {
		fmt.Print(CLEAR_SCREEM)
		fmt.Println(colorize1(lines[0], GREEN))
		fmt.Println(colorize1(lines[1], YELLOW))
		fmt.Println(colorize1(lines[2], BLUE))
		if len(lines) > 3 {
			for _, l := range lines[3:] {
				fmt.Println(colorize1(l, BLACK))
			}
		}
	} else {
		fmt.Println(lines[0])
		for _, l := range lines[2:] {
			fmt.Println(l)
		}
		fmt.Println()
	}
}

func (p *profiler) flush(timeStamp time.Time, keyStats []keyStat, done bool) {
	var head string
	if p.replay {
		if done {
			head = "(replay done)"
		} else {
			head = "(replaying)"
		}
	}
	output := make([]string, 3)
	output[0] = fmt.Sprintf("> JuiceFS Profiling %13s  Refresh: %.0f seconds %20s",
		head, p.interval.Seconds(), timeStamp.Format("2006-01-02T15:04:05"))
	output[2] = fmt.Sprintf("%-14s %10s %15s %18s %14s", "Operation", "Count", "Average(us)", "Total(us)", "Percent(%)")
	for _, s := range keyStats {
		output = append(output, fmt.Sprintf("%-14s %10d %15.0f %18d %14.1f",
			s.key, s.sPtr.count, float64(s.sPtr.total)/float64(s.sPtr.count), s.sPtr.total, float64(s.sPtr.total)/float64(p.interval.Microseconds())*100.0))
	}
	if p.replay {
		output[1] = fmt.Sprintln("\n[enter]Pause/Continue")
	}
	printLines(output, p.colorful)
}

func (p *profiler) flusher() {
	var paused, done bool
	ticker := time.NewTicker(p.interval)
	ts := time.Now()
	p.flush(ts, nil, false)
	for {
		select {
		case t := <-ticker.C:
			stats := <-p.statsChan
			if paused { // ticker event might be passed long ago
				paused = false
				ticker.Stop()
				ticker = time.NewTicker(p.interval)
				t = time.Now()
			}
			if done {
				ticker.Stop()
			}
			if p.replay {
				ts = <-p.printTime
			} else {
				ts = t
			}
			keyStats := make([]keyStat, 0, len(stats))
			for k, s := range stats {
				keyStats = append(keyStats, keyStat{k, s})
			}
			sort.Slice(keyStats, func(i, j int) bool { // reversed
				return keyStats[i].sPtr.total > keyStats[j].sPtr.total
			})
			p.flush(ts, keyStats, done)
			if done {
				os.Exit(0)
			}
		case paused = <-p.pause:
			fmt.Printf("\n\033[97mPaused. Press [enter] to continue.\n\033[0m")
			<-p.pause
		case done = <-p.done:
		}
	}
}

func profile(ctx *cli.Context) error {
	setup(ctx, 1)
	logPath := ctx.Args().First()
	st, err := os.Stat(logPath)
	if err != nil {
		logger.Fatalf("Failed to stat path %s: %s", logPath, err)
	}
	var replay bool
	if st.IsDir() { // mount point
		inode, err := utils.GetFileInode(logPath)
		if err != nil {
			logger.Fatalf("Failed to lookup inode for %s: %s", logPath, err)
		}
		if inode != uint64(meta.RootInode) {
			logger.Fatalf("Path %s is not a mount point!", logPath)
		}
		if p := filepath.Join(logPath, ".jfs.accesslog"); utils.Exists(p) {
			logPath = p
		} else {
			logPath = filepath.Join(logPath, ".accesslog")
		}
	} else { // log file to be replayed
		replay = true
	}
	nodelay := ctx.Int64("interval") == 0
	if nodelay && !replay {
		logger.Fatalf("Interval must be > 0 for real time mode!")
	}
	file, err := os.Open(logPath)
	if err != nil {
		logger.Fatalf("Failed to open log file %s: %s", logPath, err)
	}
	defer file.Close()

	prof := profiler{
		file:      file,
		replay:    replay,
		colorful:  utils.SupportANSIColor(os.Stdout.Fd()),
		interval:  time.Second * time.Duration(ctx.Int64("interval")),
		uids:      strings.Split(ctx.String("uid"), ","),
		gids:      strings.Split(ctx.String("gid"), ","),
		pids:      strings.Split(ctx.String("pid"), ","),
		paths:     strings.Split(ctx.String("paths"), ","),
		entryChan: make(chan *logEntry, 16),
		statsChan: make(chan map[string]*stat),
		pause:     make(chan bool),
	}
	if prof.replay {
		prof.printTime = make(chan time.Time)
		prof.done = make(chan bool)
	}

	go prof.reader()
	if nodelay {
		go prof.fastCounter()
		stats := <-prof.statsChan
		start := <-prof.printTime
		last := <-prof.printTime
		keyStats := make([]keyStat, 0, len(stats))
		for k, s := range stats {
			keyStats = append(keyStats, keyStat{k, s})
		}
		sort.Slice(keyStats, func(i, j int) bool { // reversed
			return keyStats[i].sPtr.total > keyStats[j].sPtr.total
		})
		prof.replay = false
		prof.interval = last.Sub(start)
		prof.flush(last, keyStats, <-prof.done)
		return nil
	}

	go prof.counter()
	go prof.flusher()
	var input string
	for {
		_, _ = fmt.Scanln(&input)
		if prof.colorful {
			fmt.Print("\033[1A\033[K") // move cursor back
		}
		if prof.replay {
			prof.pause <- true // pause/continue
		}
	}
}


================================================
FILE: cmd/quota.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"math"
	"sort"
	"strings"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"

	"github.com/urfave/cli/v2"
)

func cmdQuota() *cli.Command {
	return &cli.Command{
		Name:            "quota",
		Category:        "ADMIN",
		Usage:           "Manage directory quotas",
		ArgsUsage:       "META-URL",
		HideHelpCommand: true,
		Description: `
Examples:
$ juicefs quota set redis://localhost --path /dir1 --capacity 1 --inodes 100
$ juicefs quota get redis://localhost --path /dir1
$ juicefs quota list redis://localhost
$ juicefs quota delete redis://localhost --path /dir1
$ juicefs quota check redis://localhost --path /dir1 --repair
$ juicefs quota set redis://localhost --uid 1000 --capacity 2 --inodes 200
$ juicefs quota get redis://localhost --uid 1000
$ juicefs quota delete redis://localhost --uid 1000
$ juicefs quota set redis://localhost --gid 100 --capacity 5 --inodes 500
$ juicefs quota get redis://localhost --gid 100
$ juicefs quota delete redis://localhost --gid 100`,
		Subcommands: []*cli.Command{
			{
				Name:      "set",
				Usage:     "Set quota to a directory, user, or group",
				ArgsUsage: "META-URL",
				Action:    quota,
			},
			{
				Name:      "get",
				Usage:     "Get quota of a directory, user, or group",
				ArgsUsage: "META-URL",
				Action:    quota,
			},
			{
				Name:      "delete",
				Aliases:   []string{"del"},
				Usage:     "Delete quota of a directory, user, or group",
				ArgsUsage: "META-URL",
				Action:    quota,
			},
			{
				Name:      "list",
				Aliases:   []string{"ls"},
				Usage:     "List all quotas (directory, user, and group)",
				ArgsUsage: "META-URL",
				Action:    quota,
			},
			{
				Name:      "check",
				Usage:     "Check quota consistency of a directory, user, or group",
				ArgsUsage: "META-URL",
				Action:    quota,
			},
		},
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "path",
				Usage: "full path of the directory within the volume",
			},
			&cli.BoolFlag{
				Name:  "create",
				Usage: "create the directory if not exists",
			},
			&cli.StringFlag{
				Name:  "capacity",
				Usage: "hard quota of the directory limiting its usage of space in GiB",
			},
			&cli.Uint64Flag{
				Name:  "inodes",
				Usage: "hard quota of the directory limiting its number of inodes",
			},
			&cli.BoolFlag{
				Name:  "repair",
				Usage: "repair inconsistent quota",
			},
			&cli.BoolFlag{
				Name:  "strict",
				Usage: "calculate total usage of directory in strict mode (NOTE: may be slow for huge directory)",
			},
			&cli.Uint64Flag{
				Name:  "uid",
				Usage: "user ID for user quota management",
			},
			&cli.Uint64Flag{
				Name:  "gid",
				Usage: "group ID for group quota management",
			},
		},
	}
}

func quota(c *cli.Context) error {
	setup(c, 1)
	var cmd uint8
	switch c.Command.Name {
	case "set":
		cmd = meta.QuotaSet
	case "get":
		cmd = meta.QuotaGet
	case "delete":
		cmd = meta.QuotaDel
	case "list":
		cmd = meta.QuotaList
	case "check":
		cmd = meta.QuotaCheck
	default:
		logger.Fatalf("Invalid quota command: %s", c.Command.Name)
	}

	var uid, gid uint32
	var quotaKey string
	var quotaType string
	validateID := func(name string) uint32 {
		id := c.Uint64(name)
		if id == 0 {
			logger.Fatalf("Invalid --%s: 0 is not allowed", name)
		}
		if id > math.MaxUint32 {
			logger.Fatalf("Invalid --%s: %d exceeds maximum value %d", name, id, math.MaxUint32)
		}
		return uint32(id)
	}
	if c.IsSet("uid") {
		uid = validateID("uid")
		quotaKey = fmt.Sprintf("uid:%d", uid)
		quotaType = "user"
		if c.IsSet("gid") {
			logger.Fatalf("Cannot specify both --uid and --gid at the same time")
		}
		if c.IsSet("path") {
			logger.Fatalf("Cannot specify both --uid and --path at the same time")
		}
	} else if c.IsSet("gid") {
		gid = validateID("gid")
		quotaKey = fmt.Sprintf("gid:%d", gid)
		quotaType = "group"
		if c.IsSet("path") {
			logger.Fatalf("Cannot specify both --gid and --path at the same time")
		}
	} else {
		dpath := c.String("path")
		if dpath == "" && cmd != meta.QuotaList {
			logger.Fatalf("Please specify the directory with `--path <dir>` option")
		}
		quotaKey = dpath
		quotaType = "directory"
	}

	removePassword(c.Args().Get(0))

	m := meta.NewClient(c.Args().Get(0), nil)
	_, err := m.Load(true)
	if err != nil {
		logger.Fatalf("Load setting: %s", err)
	}
	qs := make(map[string]*meta.Quota)
	var strict, repair bool
	if cmd == meta.QuotaSet {
		strict = c.Bool("strict")
		q := &meta.Quota{MaxSpace: -1, MaxInodes: -1} // negative means no change
		if c.IsSet("capacity") {
			q.MaxSpace = int64(utils.ParseBytes(c, "capacity", 'G'))
		}
		if c.IsSet("inodes") {
			q.MaxInodes = int64(c.Uint64("inodes"))
		}
		qs[quotaKey] = q
	} else if cmd == meta.QuotaCheck {
		strict = c.Bool("strict")
		repair = c.Bool("repair")
	}

	if err := m.HandleQuota(meta.Background(), cmd, quotaKey, uid, gid, qs, strict, repair, c.Bool("create")); err != nil {
		return err
	} else if len(qs) == 0 {
		return nil
	}

	result := make([][]string, 1, len(qs)+1)

	if quotaType == "user" {
		result[0] = []string{"User ID", "Size", "Used", "Use%", "Inodes", "IUsed", "IUse%"}
	} else if quotaType == "group" {
		result[0] = []string{"Group ID", "Size", "Used", "Use%", "Inodes", "IUsed", "IUse%"}
	} else {
		result[0] = []string{"Path", "Size", "Used", "Use%", "Inodes", "IUsed", "IUse%"}
	}

	paths := make([]string, 0, len(qs))
	for p := range qs {
		paths = append(paths, p)
	}
	sort.Strings(paths)
	for _, p := range paths {
		q := qs[p]
		if q.UsedSpace < 0 {
			logger.Warnf("Used space of %s is negative (%d), please run `juicefs quota check` to fix it", p, q.UsedSpace)
			q.UsedSpace = 0
		}
		if q.UsedInodes < 0 {
			logger.Warnf("Used inodes of %s is negative (%d), please run `juicefs quota check` to fix it", p, q.UsedInodes)
			q.UsedInodes = 0
		}
		used := humanize.IBytes(uint64(q.UsedSpace))
		var size, usedR string
		if q.MaxSpace > 0 {
			size = humanize.IBytes(uint64(q.MaxSpace))
			usedR = fmt.Sprintf("%d%%", q.UsedSpace*100/q.MaxSpace)
		} else {
			size = "unchanged"
		}
		iused := humanize.Comma(q.UsedInodes)
		var itotal, iusedR string
		if q.MaxInodes > 0 {
			itotal = humanize.Comma(q.MaxInodes)
			iusedR = fmt.Sprintf("%d%%", q.UsedInodes*100/q.MaxInodes)
		} else {
			itotal = "unchanged"
		}

		var identifier string
		if strings.HasPrefix(p, "uid:") {
			identifier = fmt.Sprintf("UID:%s", strings.TrimPrefix(p, "uid:"))
		} else if strings.HasPrefix(p, "gid:") {
			identifier = fmt.Sprintf("GID:%s", strings.TrimPrefix(p, "gid:"))
		} else {
			identifier = p
		}
		result = append(result, []string{identifier, size, used, usedR, itotal, iused, iusedR})
	}
	printResult(result, 0, false)
	return nil
}


================================================
FILE: cmd/restore.go
================================================
package cmd

import (
	"bytes"
	"fmt"
	"math/rand"
	"os"
	"runtime"
	"strconv"
	"strings"
	"sync"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdRestore() *cli.Command {
	return &cli.Command{
		Name:      "restore",
		Action:    restore,
		Category:  "ADMIN",
		Usage:     "restore files from trash",
		ArgsUsage: "META HOUR ...",
		Description: `
Rebuild the tree structure for trash files, and put them back to original directories.

Examples:
$ juicefs restore redis://localhost/1 2023-05-10-01`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:  "put-back",
				Usage: "move the recovered files into original directory",
			},
			&cli.IntFlag{
				Name:  "threads",
				Value: 10,
				Usage: "number of threads",
			},
		},
	}
}

func restore(ctx *cli.Context) error {
	setup0(ctx, 2, 0)
	if runtime.GOOS == "windows" && !utils.IsWinAdminOrElevatedPrivilege() {
		return fmt.Errorf("restore command requires Administrator or elevated privilege on Windows")
	}
	if os.Getuid() != 0 && runtime.GOOS != "windows" {
		return fmt.Errorf("only root can restore files from trash")
	}
	removePassword(ctx.Args().Get(0))
	m := meta.NewClient(ctx.Args().Get(0), nil)
	_, err := m.Load(true)
	if err != nil {
		return err
	}
	for i := 1; i < ctx.NArg(); i++ {
		hour := ctx.Args().Get(i)
		doRestore(m, hour, ctx.Bool("put-back"), ctx.Int("threads"))
	}
	return nil
}

func doRestore(m meta.Meta, hour string, putBack bool, threads int) {
	if err := m.NewSession(false); err != nil {
		logger.Warningf("running without sessions because fail to new session: %s", err)
	} else {
		defer func() {
			_ = m.CloseSession()
		}()
	}
	logger.Infof("restore files in %s ...", hour)
	ctx := meta.Background()
	var parent meta.Ino
	var attr meta.Attr
	err := m.Lookup(ctx, meta.TrashInode, hour, &parent, &attr, false)
	if err != 0 {
		logger.Errorf("lookup %s: %s", hour, err)
		return
	}
	var entries []*meta.Entry
	err = m.Readdir(meta.Background(), parent, 0, &entries)
	if err != 0 {
		logger.Errorf("list %s: %s", hour, err)
		return
	}
	entries = entries[2:]
	// to avoid conflict
	rand.Shuffle(len(entries), func(i, j int) {
		entries[i], entries[j] = entries[j], entries[i]
	})

	var parents = make(map[meta.Ino]bool)
	if !putBack {
		for _, e := range entries {
			if e.Attr.Typ == meta.TypeDirectory {
				parents[e.Inode] = true
			}
		}
	}

	todo := make(chan *meta.Entry, 1000)
	p := utils.NewProgress(false)
	restored := p.AddCountBar("restored", int64(len(entries)))
	skipped := p.AddCountSpinner("skipped")
	failed := p.AddCountSpinner("failed")
	var mu sync.Mutex
	restoredTo := make(map[meta.Ino]int)
	var wg sync.WaitGroup
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for e := range todo {
				ps := bytes.SplitN(e.Name, []byte("-"), 3)
				dst, _ := strconv.Atoi(string(ps[0]))
				if putBack || parents[meta.Ino(dst)] {
					err = m.Rename(ctx, parent, string(e.Name), meta.Ino(dst), string(ps[2]), meta.RenameNoReplace|meta.RenameRestore, nil, nil)
					if err != 0 {
						logger.Warnf("restore %s: %s", string(e.Name), err)
						failed.Increment()
					} else {
						restored.Increment()
						mu.Lock()
						restoredTo[meta.Ino(dst)] += 1
						mu.Unlock()
					}
				} else {
					skipped.Increment()
				}
			}
		}()
	}

	for _, e := range entries {
		todo <- e
	}
	close(todo)
	wg.Wait()
	failed.Done()
	skipped.Done()
	restored.Done()
	p.Done()
	logger.Infof("restored %d files in %s", restored.Current(), hour)
	for dst, count := range restoredTo {
		logger.Infof("restored %d files to %q", count, strings.Join(m.GetPaths(ctx, dst), ", "))
	}
}


================================================
FILE: cmd/restore_test.go
================================================
package cmd

import (
	"fmt"
	"os"
	"strings"
	"testing"
	"time"
)

func TestRestore(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	paths := []string{"/jfs-dir", "/jfs-dir/a"}
	if err := os.Mkdir(fmt.Sprintf("%s%s", testMountPoint, "/jfs-dir"), 0777); err != nil {
		t.Fatalf("mkdirAll err: %s", err)
	}

	filename := fmt.Sprintf("%s%s", testMountPoint, "/jfs-dir/a")
	if err := os.WriteFile(filename, []byte("test"), 0644); err != nil {
		t.Fatalf("write file failed: %s", err)
	}

	for i := len(paths) - 1; i >= 0; i-- {
		path := paths[i]
		if err := os.Remove(fmt.Sprintf("%s%s", testMountPoint, path)); err != nil {
			t.Fatalf("removeAll err: %s", err)
		}
	}

	hour := time.Now().UTC().Format("2006-01-02-15")
	restoreArgs := []string{"", "restore", testMeta, hour}
	if err := Main(restoreArgs); err != nil {
		t.Fatalf("restore failed: %s", err)
	}

	hourDir := fmt.Sprintf("%s/%s/%s", testMountPoint, ".trash", hour)
	child, err := os.ReadDir(hourDir)
	if err != nil {
		t.Fatalf("read dir failed: %s", err)
	}
	for _, entry := range child {
		if strings.Contains(entry.Name(), "jfs-dir") {
			fileInfo, err := os.Stat(fmt.Sprintf("%s/%s/%s", hourDir, entry.Name(), "a"))
			if err != nil {
				t.Fatalf("stat failed: %s", err)
			}
			if fileInfo.IsDir() {
				t.Fatalf("restore failed, file: %v is dir", fileInfo)
			}
			return
		}
	}
	t.Fatalf("restore failed, cannot find file: %s in trash", "jfs-dir")
}

func TestRestorePutBack(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	paths := []string{"/jfs-dir1", "/jfs-dir1/a"}
	if err := os.Mkdir(fmt.Sprintf("%s%s", testMountPoint, "/jfs-dir1"), 0777); err != nil {
		t.Fatalf("mkdirAll err: %s", err)
	}

	filename := fmt.Sprintf("%s%s", testMountPoint, "/jfs-dir1/a")
	if err := os.WriteFile(filename, []byte("test"), 0644); err != nil {
		t.Fatalf("write file failed: %s", err)
	}

	for i := len(paths) - 1; i >= 0; i-- {
		path := paths[i]
		if err := os.Remove(fmt.Sprintf("%s%s", testMountPoint, path)); err != nil {
			t.Fatalf("removeAll err: %s", err)
		}
	}

	hour := time.Now().UTC().Format("2006-01-02-15")
	restoreArgs := []string{"", "restore", testMeta, hour, "--put-back=true"}
	if err := Main(restoreArgs); err != nil {
		t.Fatalf("restore failed: %s", err)
	}

	fileInfo, err := os.Stat(fmt.Sprintf("%s%s", testMountPoint, "/jfs-dir1/a"))
	if err != nil {
		t.Fatalf("stat failed: %s", err)
	}
	if fileInfo.IsDir() {
		t.Fatalf("restore failed, file: %v is dir", fileInfo)
	}
}


================================================
FILE: cmd/rmr.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"path/filepath"
	"runtime"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdRmr() *cli.Command {
	return &cli.Command{
		Name:      "rmr",
		Action:    rmr,
		Category:  "TOOL",
		Usage:     "Remove directories recursively",
		ArgsUsage: "PATH ...",
		Description: `
This command provides a faster way to remove huge directories in JuiceFS.

Examples:
$ juicefs rmr /mnt/jfs/foo`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:  "skip-trash",
				Usage: "skip trash and delete files directly (requires root)",
			},
			&cli.IntFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   50,
				Usage:   "number of threads for delete jobs (max 255)",
			},
		},
	}
}

func openController(dpath string) (*os.File, error) {
	st, err := os.Stat(dpath)
	if err != nil {
		return nil, err
	}
	if !st.IsDir() {
		dpath = filepath.Dir(dpath)
	}
	fp, err := os.OpenFile(filepath.Join(dpath, ".jfs.control"), os.O_RDWR, 0)
	if os.IsNotExist(err) {
		fp, err = os.OpenFile(filepath.Join(dpath, ".control"), os.O_RDWR, 0)
	}
	return fp, err
}

func rmr(ctx *cli.Context) error {
	setup0(ctx, 1, 0)
	var flag uint8
	var numThreads int

	numThreads = ctx.Int("threads")
	if numThreads <= 0 {
		numThreads = meta.RmrDefaultThreads
	}
	if numThreads > 255 {
		numThreads = 255
	}
	if ctx.Bool("skip-trash") {
		if runtime.GOOS != "windows" && os.Getuid() != 0 {
			logger.Fatalf("Only root can remove files directly")
		} else if runtime.GOOS == "windows" && !utils.IsWinAdminOrElevatedPrivilege() {
			logger.Fatalf("Removing files directly requires Administrator or elevated privilege on Windows")
		}
		flag = 1
	}
	progress := utils.NewProgress(false)
	spin := progress.AddCountSpinner("Removing entries")
	for i := 0; i < ctx.Args().Len(); i++ {
		path := ctx.Args().Get(i)
		p, err := filepath.Abs(path)
		if err != nil {
			logger.Errorf("abs of %s: %s", path, err)
			continue
		}
		d := filepath.Dir(p)
		name := filepath.Base(p)
		inode, err := utils.GetFileInode(d)
		if err != nil {
			return fmt.Errorf("lookup inode for %s: %s", d, err)
		}
		f, err := openController(d)
		if err != nil {
			logger.Errorf("Open control file for %s: %s", d, err)
			continue
		}
		wb := utils.NewBuffer(8 + 8 + 1 + uint32(len(name)) + 1 + 1)
		wb.Put32(meta.Rmr)
		wb.Put32(8 + 1 + uint32(len(name)) + 1 + 1)
		wb.Put64(inode)
		wb.Put8(uint8(len(name)))
		wb.Put([]byte(name))
		wb.Put8(flag)
		wb.Put8(uint8(numThreads))
		_, err = f.Write(wb.Bytes())
		if err != nil {
			logger.Fatalf("write message: %s", err)
		}
		if _, errno := readProgress(f, func(count, bytes uint64) {
			spin.SetCurrent(int64(count))
		}); errno != 0 {
			logger.Fatalf("RMR %s: %s", path, errno)
		}
		_ = f.Close()
	}
	progress.Done()
	return nil
}


================================================
FILE: cmd/rmr_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"testing"
)

func TestRmr(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	paths := []string{"/dir1", "/dir2", "/dir3/dir2"}
	for _, path := range paths {
		if err := os.MkdirAll(fmt.Sprintf("%s%s/dir2/dir3/dir4/dir5", testMountPoint, path), 0777); err != nil {
			t.Fatalf("mkdirAll err: %s", err)
		}
	}
	for i := 0; i < 5; i++ {
		filename := fmt.Sprintf("%s/dir1/f%d.txt", testMountPoint, i)
		if err := os.WriteFile(filename, []byte("test"), 0644); err != nil {
			t.Fatalf("write file failed: %s", err)
		}
	}

	rmrArgs := []string{"", "rmr", testMountPoint + paths[0], testMountPoint + paths[1], testMountPoint + paths[2]}
	if err := Main(rmrArgs); err != nil {
		t.Fatalf("rmr failed: %s", err)
	}

	for _, path := range paths {
		if dir, err := os.ReadDir(testMountPoint + path); !os.IsNotExist(err) {
			t.Fatalf("test rmr error: %s len(dir): %d", err, len(dir))
		}
	}
}


================================================
FILE: cmd/stats.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/urfave/cli/v2"
)

func cmdStats() *cli.Command {
	return &cli.Command{
		Name:      "stats",
		Action:    stats,
		Category:  "INSPECTOR",
		Usage:     "Show real time performance statistics of JuiceFS",
		ArgsUsage: "MOUNTPOINT",
		Description: `
This is a tool that reads Prometheus metrics and shows real time statistics of the target mount point.

Examples:
$ juicefs stats /mnt/jfs

# More metrics
$ juicefs stats /mnt/jfs -l 1

Details: https://juicefs.com/docs/community/fault_diagnosis_and_analysis#stats`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:  "schema",
				Value: "ufmco",
				Usage: "schema string of output sections (t:time, u: usage, f: fuse, m: meta, c: blockcache, o: object, g: go)",
			},
			&cli.UintFlag{
				Name:  "interval",
				Value: 1,
				Usage: "interval in seconds between each update",
			},
			&cli.UintFlag{
				Name:    "verbosity",
				Aliases: []string{"l"},
				Usage:   "verbosity level, 0 or 1 is enough for most cases",
			},
			&cli.UintFlag{
				Name:    "count",
				Aliases: []string{"c"},
				Usage:   "number of updates to display before exiting",
			},
		},
	}
}

const (
	BLACK = 30 + iota
	RED
	GREEN
	YELLOW
	BLUE
	MAGENTA
	CYAN
	WHITE
	DEFAULT = "00"
)

const (
	RESET_SEQ      = "\033[0m"
	COLOR_SEQ      = "\033[1;" // %dm
	COLOR_DARK_SEQ = "\033[0;" // %dm
	UNDERLINE_SEQ  = "\033[4m"
	CLEAR_SCREEM   = "\033[2J\033[1;1H"
	UNIXTIME_FMT   = "01-02 15:04:05"
	// BOLD_SEQ       = "\033[1m"
)

type statsWatcher struct {
	colorful bool
	interval uint
	mp       string
	header   string
	sections []*section
}

func (w *statsWatcher) colorize(msg string, color int, dark bool, underline bool) string {
	if !w.colorful || msg == "" || msg == " " {
		return msg
	}
	var cseq, useq string
	if dark {
		cseq = COLOR_DARK_SEQ
	} else {
		cseq = COLOR_SEQ
	}
	if underline {
		useq = UNDERLINE_SEQ
	}
	return fmt.Sprintf("%s%s%dm%s%s", useq, cseq, color, msg, RESET_SEQ)
}

const (
	metricByte = 1 << iota
	metricCount
	metricTime
	metricCPU
	metricGauge
	metricCounter
	metricHist
	metricUnixtime
)

type item struct {
	nick string // must be size <= 5
	name string
	typ  uint8
}

type section struct {
	name  string
	items []*item
}

func (w *statsWatcher) buildSchema(schema string, verbosity uint) {
	for _, r := range schema {
		var s section
		switch r {
		case 't':
			s.name = "system"
			s.items = append(s.items, &item{"time", "juicefs_timestamp", metricUnixtime})
		case 'u':
			s.name = "usage"
			s.items = append(s.items, &item{"cpu", "juicefs_cpu_usage", metricCPU | metricCounter})
			s.items = append(s.items, &item{"mem", "juicefs_memory", metricGauge})
			s.items = append(s.items, &item{"buf", "juicefs_used_buffer_size_bytes", metricGauge})
			if verbosity > 0 {
				s.items = append(s.items, &item{"cache", "juicefs_store_cache_size_bytes", metricGauge})
			}
		case 'f':
			s.name = "fuse"
			s.items = append(s.items, &item{"ops", "juicefs_fuse_ops_durations_histogram_seconds", metricTime | metricHist})
			s.items = append(s.items, &item{"read", "juicefs_fuse_read_size_bytes_sum", metricByte | metricCounter})
			s.items = append(s.items, &item{"write", "juicefs_fuse_written_size_bytes_sum", metricByte | metricCounter})
		case 'm':
			s.name = "meta"
			s.items = append(s.items, &item{"ops", "juicefs_meta_ops_durations_histogram_seconds", metricTime | metricHist})
			if verbosity > 0 {
				s.items = append(s.items, &item{"txn", "juicefs_transaction_durations_histogram_seconds", metricTime | metricHist})
				s.items = append(s.items, &item{"retry", "juicefs_transaction_restart", metricCount | metricCounter})
			}
		case 'c':
			s.name = "blockcache"
			s.items = append(s.items, &item{"read", "juicefs_blockcache_hit_bytes", metricByte | metricCounter})
			s.items = append(s.items, &item{"write", "juicefs_blockcache_write_bytes", metricByte | metricCounter})
		case 'o':
			s.name = "object"
			s.items = append(s.items, &item{"get", "juicefs_object_request_data_bytes_GET", metricByte | metricCounter})
			if verbosity > 0 {
				s.items = append(s.items, &item{"get_c", "juicefs_object_request_durations_histogram_seconds_GET", metricTime | metricHist})
			}
			s.items = append(s.items, &item{"put", "juicefs_object_request_data_bytes_PUT", metricByte | metricCounter})
			if verbosity > 0 {
				s.items = append(s.items, &item{"put_c", "juicefs_object_request_durations_histogram_seconds_PUT", metricTime | metricHist})
				s.items = append(s.items, &item{"del_c", "juicefs_object_request_durations_histogram_seconds_DELETE", metricTime | metricHist})
			}
		case 'g':
			s.name = "go"
			s.items = append(s.items, &item{"alloc", "juicefs_go_memstats_alloc_bytes", metricGauge})
			s.items = append(s.items, &item{"sys", "juicefs_go_memstats_sys_bytes", metricGauge})
		default:
			fmt.Printf("Warning: no item defined for %c\n", r)
			continue
		}
		w.sections = append(w.sections, &s)
	}
	if len(w.sections) == 0 {
		logger.Fatalln("no section to watch, please check the schema string")
	}
}

func padding(name string, width int, char byte) string {
	pad := width - len(name)
	if pad < 0 {
		pad = 0
		name = name[0:width]
	}
	prefix := (pad + 1) / 2
	buf := make([]byte, width)
	for i := 0; i < prefix; i++ {
		buf[i] = char
	}
	copy(buf[prefix:], name)
	for i := prefix + len(name); i < width; i++ {
		buf[i] = char
	}
	return string(buf)
}

func (w *statsWatcher) formatHeader() {
	headers := make([]string, len(w.sections))
	subHeaders := make([]string, len(w.sections))
	for i, s := range w.sections {
		subs := make([]string, 0, len(s.items))
		for _, it := range s.items {
			if (it.typ & 0xF0) == metricUnixtime {
				subs = append(subs, w.colorize(padding(it.nick, len(UNIXTIME_FMT), ' '), BLUE, false, true))
			} else {
				subs = append(subs, w.colorize(padding(it.nick, 5, ' '), BLUE, false, true))
			}
			if it.typ&metricHist != 0 {
				if it.typ&metricTime != 0 {
					subs = append(subs, w.colorize(" lat ", BLUE, false, true))
				} else {
					subs = append(subs, w.colorize(" avg ", BLUE, false, true))
				}
			}
		}
		width := 6*len(subs) - 1 // nick(5) + space(1)
		if s.name == "system" {
			width = len(UNIXTIME_FMT)
		}
		subHeaders[i] = strings.Join(subs, " ")
		headers[i] = w.colorize(padding(s.name, width, '-'), BLUE, true, false)
	}
	w.header = fmt.Sprintf("%s\n%s", strings.Join(headers, " "),
		strings.Join(subHeaders, w.colorize("|", BLUE, true, false)))
}

func (w *statsWatcher) formatU64(v float64, dark, isByte bool) string {
	if v <= 0.0 {
		return w.colorize("   0 ", BLACK, false, false)
	}
	var vi uint64
	var unit string
	var color int
	switch vi = uint64(v); {
	case vi < 10000:
		if isByte {
			unit = "B"
		} else {
			unit = " "
		}
		color = RED
	case vi>>10 < 10000:
		vi, unit, color = vi>>10, "K", YELLOW
	case vi>>20 < 10000:
		vi, unit, color = vi>>20, "M", GREEN
	case vi>>30 < 10000:
		vi, unit, color = vi>>30, "G", BLUE
	case vi>>40 < 10000:
		vi, unit, color = vi>>40, "T", MAGENTA
	default:
		vi, unit, color = vi>>50, "P", CYAN
	}
	return w.colorize(fmt.Sprintf("%4d", vi), color, dark, false) +
		w.colorize(unit, BLACK, false, false)
}

func (w *statsWatcher) formatTime(v float64, dark bool) string {
	var ret string
	var color int
	switch {
	case v <= 0.0:
		ret, color, dark = "   0 ", BLACK, false
	case v < 10.0:
		ret, color = fmt.Sprintf("%4.2f ", v), GREEN
	case v < 100.0:
		ret, color = fmt.Sprintf("%4.1f ", v), YELLOW
	case v < 10000.0:
		ret, color = fmt.Sprintf("%4.f ", v), RED
	default:
		ret, color = fmt.Sprintf("%1.e", v), MAGENTA
	}
	return w.colorize(ret, color, dark, false)
}

func (w *statsWatcher) formatCPU(v float64, dark bool) string {
	var ret string
	var color int
	switch v = v * 100.0; {
	case v <= 0.0:
		ret, color = " 0.0", WHITE
	case v < 30.0:
		ret, color = fmt.Sprintf("%4.1f", v), GREEN
	case v < 100.0:
		ret, color = fmt.Sprintf("%4.1f", v), YELLOW
	default:
		ret, color = fmt.Sprintf("%4.f", v), RED
	}
	return w.colorize(ret, color, dark, false) +
		w.colorize("%", BLACK, false, false)
}

func (w *statsWatcher) printDiff(left, right map[string]float64, dark bool) {
	if !w.colorful && dark {
		return
	}
	values := make([]string, len(w.sections))
	for i, s := range w.sections {
		vals := make([]string, 0, len(s.items))
		for _, it := range s.items {
			switch it.typ & 0xF0 {
			case metricUnixtime: // current timestamp
				if dark {
					vals = append(vals, w.colorize(time.Now().Format(UNIXTIME_FMT), BLACK, false, false))
				} else {
					vals = append(vals, w.colorize(time.Now().Format(UNIXTIME_FMT), WHITE, true, false))
				}
			case metricGauge: // currently must be metricByte
				vals = append(vals, w.formatU64(right[it.name], dark, true))
			case metricCounter:
				v := (right[it.name] - left[it.name])
				if !dark {
					v /= float64(w.interval)
				}
				if it.typ&metricByte != 0 {
					vals = append(vals, w.formatU64(v, dark, true))
				} else if it.typ&metricCPU != 0 {
					vals = append(vals, w.formatCPU(v, dark))
				} else { // metricCount
					vals = append(vals, w.formatU64(v, dark, false))
				}
			case metricHist: // metricTime
				count := right[it.name+"_total"] - left[it.name+"_total"]
				var avg float64
				if count > 0.0 {
					cost := right[it.name+"_sum"] - left[it.name+"_sum"]
					if it.typ&metricTime != 0 {
						cost *= 1000 // s -> ms
					}
					avg = cost / count
				}
				if !dark {
					count /= float64(w.interval)
				}
				vals = append(vals, w.formatU64(count, dark, false), w.formatTime(avg, dark))
			}
		}
		values[i] = strings.Join(vals, " ")
	}
	if w.colorful && dark {
		fmt.Printf("%s\r", strings.Join(values, w.colorize("|", BLUE, true, false)))
	} else {
		fmt.Printf("%s\n", strings.Join(values, w.colorize("|", BLUE, true, false)))
	}
}

func readStats(mp string) map[string]float64 {
	f, err := os.Open(filepath.Join(mp, ".jfs.stats"))
	if os.IsNotExist(err) {
		f, err = os.Open(filepath.Join(mp, ".stats"))
	}
	if err != nil {
		logger.Warnf("open stats file under mount point %s: %s", mp, err)
		return nil
	}
	defer f.Close()
	d, err := io.ReadAll(f)
	if err != nil {
		logger.Warnf("read stats file under mount point %s: %s", mp, err)
		return nil
	}
	stats := make(map[string]float64)
	lines := strings.Split(string(d), "\n")
	for _, line := range lines {
		fields := strings.Fields(line)
		if len(fields) == 2 {
			v, err := strconv.ParseFloat(fields[1], 64)
			if err != nil {
				logger.Warnf("parse %s: %s", fields[1], err)
			}
			stats[fields[0]] += v
		}
	}
	return stats
}

func stats(ctx *cli.Context) error {
	setup(ctx, 1)
	mp := ctx.Args().First()
	inode, err := utils.GetFileInode(mp)
	if err != nil {
		logger.Fatalf("lookup inode for %s: %s", mp, err)
	}
	if inode != 1 {
		logger.Fatalf("path %s is not a mount point", mp)
	}

	watcher := &statsWatcher{
		colorful: !ctx.Bool("no-color") && utils.SupportANSIColor(os.Stdout.Fd()),
		interval: ctx.Uint("interval"),
		mp:       mp,
	}
	watcher.buildSchema(ctx.String("schema"), ctx.Uint("verbosity"))
	watcher.formatHeader()
	count := ctx.Uint("count")

	var tick uint
	var start, last, current map[string]float64
	ticker := time.NewTicker(time.Second)
	defer ticker.Stop()
	current = readStats(watcher.mp)
	start = current
	last = current
	for {
		if tick%(watcher.interval*30) == 0 {
			fmt.Println(watcher.header)
		}
		if tick%watcher.interval == 0 {
			watcher.printDiff(start, current, false)
			start = current
		} else {
			watcher.printDiff(last, current, true)
		}
		if count > 0 && tick >= watcher.interval*(count-1) {
			break
		}
		last = current
		tick++
		<-ticker.C
		current = readStats(watcher.mp)
	}
	return nil
}


================================================
FILE: cmd/status.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"encoding/json"
	"fmt"

	"github.com/juicedata/juicefs/pkg/meta"

	"github.com/urfave/cli/v2"
)

func cmdStatus() *cli.Command {
	return &cli.Command{
		Name:      "status",
		Action:    status,
		Category:  "INSPECTOR",
		Usage:     "Show status of a volume",
		ArgsUsage: "META-URL",
		Description: `
It shows basic setting of the target volume, and a list of active sessions (including mount, SDK,
S3-gateway and WebDAV) that are connected with the metadata engine.

NOTE: Read-only session is not listed since it cannot register itself in the metadata.

Examples:
$ juicefs status redis://localhost`,
		Flags: []cli.Flag{
			&cli.Uint64Flag{
				Name:    "session",
				Aliases: []string{"s"},
				Usage:   "show detailed information (sustained inodes, locks) of the specified session (sid)",
			},
			&cli.BoolFlag{
				Name:    "more",
				Aliases: []string{"m"},
				Usage:   "show more statistic information, may take a long time",
			},
		},
	}
}

func printJson(v interface{}) {
	output, err := json.MarshalIndent(v, "", "  ")
	if err != nil {
		logger.Fatalf("json: %s", err)
	}
	fmt.Println(string(output))
}

func status(ctx *cli.Context) error {
	setup(ctx, 1)
	metaUrl := ctx.Args().Get(0)
	removePassword(metaUrl)
	m := meta.NewClient(metaUrl, nil)

	if sid := ctx.Uint64("session"); sid != 0 {
		s, err := m.GetSession(sid, true)
		if err != nil {
			logger.Fatalf("get session: %s", err)
		}
		printJson(s)
		return nil
	}

	sections := &meta.Sections{}
	err := meta.Status(ctx.Context, m, ctx.Bool("more"), sections)
	if err != nil {
		logger.Fatalf("get status: %s", err)
	}
	printJson(sections)
	return nil
}


================================================
FILE: cmd/status_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"encoding/json"
	"os"
	"testing"

	"github.com/agiledragon/gomonkey/v2"
	"github.com/juicedata/juicefs/pkg/meta"
)

func TestStatus(t *testing.T) {
	tmpFile, err := os.CreateTemp("/tmp", "")
	if err != nil {
		t.Fatalf("create temporary file: %s", err)
	}
	defer tmpFile.Close()
	defer os.Remove(tmpFile.Name())

	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	// mock os.Stdout
	patches := gomonkey.ApplyGlobalVar(os.Stdout, *tmpFile)
	defer patches.Reset()

	if err = Main([]string{"", "status", testMeta}); err != nil {
		t.Fatalf("status failed: %s", err)
	}
	content, err := os.ReadFile(tmpFile.Name())
	if err != nil {
		t.Fatalf("read file failed: %s", err)
	}
	s := meta.Sections{}
	if err = json.Unmarshal(content, &s); err != nil {
		t.Fatalf("json unmarshal failed: %s", err)
	}
	if s.Setting.Name != testVolume || s.Setting.Storage != "file" {
		t.Fatalf("setting is not as expected: %+v", s.Setting)
	}
}


================================================
FILE: cmd/summary.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"encoding/csv"
	"encoding/json"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"syscall"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/urfave/cli/v2"
)

func cmdSummary() *cli.Command {
	return &cli.Command{
		Name:      "summary",
		Action:    summary,
		Category:  "INSPECTOR",
		Usage:     "Show tree summary of a directory",
		ArgsUsage: "PATH",
		Description: `
 It is used to show tree summary of target directory.
 
 Examples:
 # Show with path
 $ juicefs summary /mnt/jfs/foo
 
 # Show max depth of 5
 $ juicefs summary --depth 5 /mnt/jfs/foo

 # Show top 20 entries
 $ juicefs summary --entries 20 /mnt/jfs/foo

 # Show accurate result
 $ juicefs summary --strict /mnt/jfs/foo
 `,
		Flags: []cli.Flag{
			&cli.UintFlag{
				Name:    "depth",
				Aliases: []string{"d"},
				Value:   2,
				Usage:   "depth of tree to show (zero means only show root)",
			},
			&cli.UintFlag{
				Name:    "entries",
				Aliases: []string{"e"},
				Value:   10,
				Usage:   "show top N entries (sort by size)",
			},
			&cli.BoolFlag{
				Name:  "strict",
				Usage: "show accurate summary, including directories and files (may be slow)",
			},
			&cli.BoolFlag{
				Name:  "csv",
				Usage: "print summary in csv format",
			},
		},
	}
}

func summary(ctx *cli.Context) error {
	setup(ctx, 1)
	var strict uint8
	if ctx.Bool("strict") {
		strict = 1
	}
	depth := ctx.Uint("depth")
	if depth > 10 {
		logger.Warn("depth should be less than 11")
		depth = 10
	}
	topN := ctx.Uint("entries")
	if topN > 100 {
		logger.Warn("entries should be less than 101")
		topN = 100
	}

	csv := ctx.Bool("csv")
	progress := utils.NewProgress(csv)
	path := ctx.Args().Get(0)
	dspin := progress.AddDoubleSpinner(path)
	dpath, err := filepath.Abs(path)
	if err != nil {
		logger.Fatalf("abs of %s: %s", path, err)
	}
	inode, err := utils.GetFileInode(dpath)
	if err != nil {
		logger.Fatalf("lookup inode for %s: %s", path, err)
	}
	if inode < uint64(meta.RootInode) {
		logger.Fatalf("inode number shouldn't be less than %d", meta.RootInode)
	}
	f, err := openController(dpath)
	if err != nil {
		logger.Fatalf("open controller: %s", err)
	}
	defer f.Close()
	headerLen := uint32(8)
	contentLen := uint32(8 + 1 + 1 + 1)
	wb := utils.NewBuffer(headerLen + contentLen)
	wb.Put32(meta.OpSummary)
	wb.Put32(contentLen)
	wb.Put64(inode)
	wb.Put8(uint8(depth))
	wb.Put8(uint8(topN))
	wb.Put8(strict)
	_, err = f.Write(wb.Bytes())
	if err != nil {
		logger.Fatalf("write message: %s", err)
	}
	data, errno := readProgress(f, func(count, size uint64) {
		dspin.SetCurrent(int64(count), int64(size))
	})
	if errno == syscall.EINVAL {
		logger.Fatalf("summary is not supported, please upgrade and mount again")
	}
	if errno != 0 {
		logger.Errorf("failed to get info: %s", syscall.Errno(errno))
	}
	dspin.Done()
	progress.Done()

	var resp vfs.SummaryReponse
	err = json.Unmarshal(data, &resp)
	if err == nil && resp.Errno != 0 {
		err = resp.Errno
	}
	if err != nil {
		logger.Fatalf("summary: %s", err)
	}
	results := [][]string{{"PATH", "SIZE", "DIRS", "FILES"}}
	renderTree(&results, &resp.Tree, csv)
	if csv {
		printCSVResult(results)
	} else {
		printResult(results, 0, false)
	}
	return nil
}

func printCSVResult(results [][]string) {
	w := csv.NewWriter(os.Stdout)
	for _, r := range results {
		if err := w.Write(r); err != nil {
			logger.Fatalln("error writing record to csv:", err)
		}
	}
	w.Flush()
	if err := w.Error(); err != nil {
		logger.Fatal(err)
	}
}

func renderTree(results *[][]string, tree *meta.TreeSummary, csv bool) {
	if tree == nil {
		return
	}
	var size string
	if csv {
		size = strconv.FormatUint(tree.Size, 10)
	} else {
		size = humanize.IBytes(uint64(tree.Size))
	}

	path := tree.Path
	if tree.Type == meta.TypeDirectory && !strings.HasSuffix(path, "/") {
		path += "/"
	}

	result := []string{
		path,
		size,
		strconv.FormatUint(tree.Dirs, 10),
		strconv.FormatUint(tree.Files, 10),
	}
	*results = append(*results, result)
	for _, child := range tree.Children {
		renderTree(results, child, csv)
	}
}


================================================
FILE: cmd/sync.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"net"
	_ "net/http/pprof"
	"net/url"
	"os"
	"path/filepath"
	"regexp"
	"runtime"
	"strconv"
	"strings"

	"github.com/juicedata/juicefs/pkg/metric"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/sync"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/collectors"
	"github.com/urfave/cli/v2"
)

func cmdSync() *cli.Command {
	return &cli.Command{
		Name:      "sync",
		Action:    doSync,
		Category:  "TOOL",
		Usage:     "Sync between two storages",
		ArgsUsage: "SRC DST",
		Description: `
This tool spawns multiple threads to concurrently syncs objects of two data storages.
SRC and DST should be [NAME://][ACCESS_KEY:SECRET_KEY[:TOKEN]@]BUCKET[.ENDPOINT][/PREFIX].

Include/exclude pattern rules:
The include/exclude rules each specify a pattern that is matched against the names of the files that are going to be transferred.  These patterns can take several forms:

- if the pattern ends with a / then it will only match a directory, not a file, link, or device.
- it chooses between doing a simple string match and wildcard matching by checking if the pattern contains one of these three wildcard characters: '*', '?', and '[' .
- a '*' matches any non-empty path component (it stops at slashes).
- a '?' matches any character except a slash (/).
- a '[' introduces a character class, such as [a-z] or [[:alpha:]].
- in a wildcard pattern, a backslash can be used to escape a wildcard character, but it is matched literally when no wildcards are present.
- it does a prefix match of pattern, i.e. always recursive

Examples:
# Sync object from OSS to S3
$ juicefs sync oss://mybucket.oss-cn-shanghai.aliyuncs.com s3://mybucket.s3.us-east-2.amazonaws.com

# Sync objects from S3 to JuiceFS
$ myfs=redis://localhost juicefs sync s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://myfs/ -p 50

# SRC: a1/b1,a2/b2,aaa/b1   DST: empty   sync result: aaa/b1
$ juicefs sync --exclude='a?/b*' s3://mybucket.s3.us-east-2.amazonaws.com/ /mnt/jfs/

# SRC: a1/b1,a2/b2,aaa/b1   DST: empty   sync result: a1/b1,aaa/b1
$ juicefs sync --include='a1/b1' --exclude='a[1-9]/b*' s3://mybucket.s3.us-east-2.amazonaws.com/ /mnt/jfs/

# SRC: a1/b1,a2/b2,aaa/b1,b1,b2  DST: empty   sync result: b2
$ juicefs sync --include='a1/b1' --exclude='a*' --include='b2' --exclude='b?' s3://mybucket.s3.us-east-2.amazonaws.com/ /mnt/jfs/

Details: https://juicefs.com/docs/community/administration/sync
Supported storage systems: https://juicefs.com/docs/community/how_to_setup_object_storage#supported-object-storage`,

		Flags: expandFlags(
			selectionFlags(),
			syncActionFlags(),
			syncStorageFlags(),
			clusterFlags(),
			addCategories("METRICS", []cli.Flag{
				&cli.StringFlag{
					Name:  "metrics",
					Value: "127.0.0.1:9567",
					Usage: "address to export metrics",
				},
				&cli.StringFlag{
					Name:  "consul",
					Value: "127.0.0.1:8500",
					Usage: "consul address to register",
				},
			}),
		),
	}
}

func selectionFlags() []cli.Flag {
	return addCategories("SELECTION", []cli.Flag{
		&cli.StringFlag{
			Name:    "start",
			Aliases: []string{"s"},
			Usage:   "the first `KEY` to sync",
		},
		&cli.StringFlag{
			Name:    "end",
			Aliases: []string{"e"},
			Usage:   "the last `KEY` to sync",
		},
		&cli.StringSliceFlag{
			Name:  "exclude",
			Usage: "exclude Key matching PATTERN",
		},
		&cli.StringSliceFlag{
			Name:  "include",
			Usage: "don't exclude Key matching PATTERN, need to be used with \"--exclude\" option",
		},
		&cli.BoolFlag{
			Name:  "match-full-path",
			Usage: "match filters again the full path",
		},
		&cli.StringFlag{
			Name:  "max-size",
			Usage: "skip files larger than `SIZE`",
		},
		&cli.StringFlag{
			Name:  "min-size",
			Usage: "skip files smaller than `SIZE`",
		},
		&cli.StringFlag{
			Name:  "max-age",
			Usage: "skip files older than `DURATION`",
		},
		&cli.StringFlag{
			Name:  "min-age",
			Usage: "skip files newer than `DURATION`",
		},
		&cli.StringFlag{
			Name:  "start-time",
			Usage: "skip files modified before start-time. example: 2006-01-02 15:04:05",
		},
		&cli.StringFlag{
			Name:  "end-time",
			Usage: "skip files modified after end-time. example: 2006-01-02 15:04:05",
		},
		&cli.Int64Flag{
			Name:  "limit",
			Usage: "limit the number of objects that will be processed (-1 is unlimited, 0 is to process nothing)",
			Value: -1,
		},
		&cli.BoolFlag{
			Name:    "update",
			Aliases: []string{"u"},
			Usage:   "skip files if the destination is newer",
		},
		&cli.BoolFlag{
			Name:    "force-update",
			Aliases: []string{"f"},
			Usage:   "always update existing files",
		},
		&cli.BoolFlag{
			Name:    "existing",
			Aliases: []string{"ignore-non-existing"},
			Usage:   "skip creating new files on destination",
		},
		&cli.BoolFlag{
			Name:  "ignore-existing",
			Usage: "skip updating files that already exist on destination",
		},
		&cli.StringFlag{
			Name:  "files-from",
			Usage: "read list of files or dirs to sync from FILE",
		},
	})
}

func syncActionFlags() []cli.Flag {
	return addCategories("ACTION", []cli.Flag{
		&cli.BoolFlag{
			Name:  "dirs",
			Usage: "sync directories or holders",
		},
		&cli.BoolFlag{
			Name:  "perms",
			Usage: "preserve permissions",
		},
		&cli.BoolFlag{
			Name:    "links",
			Aliases: []string{"l"},
			Usage:   "copy symlinks as symlinks",
		},
		&cli.BoolFlag{
			Name:  "inplace",
			Usage: "put directly to destination file instead of atomic download to temp/rename",
		},
		&cli.BoolFlag{
			Name:    "delete-src",
			Aliases: []string{"deleteSrc"},
			Usage:   "delete objects from source those already exist in destination",
		},
		&cli.BoolFlag{
			Name:    "delete-dst",
			Aliases: []string{"deleteDst"},
			Usage:   "delete extraneous objects from destination",
		},
		&cli.BoolFlag{
			Name:  "check-all",
			Usage: "verify integrity of all files in source and destination",
		},
		&cli.BoolFlag{
			Name:  "check-new",
			Usage: "verify integrity of newly copied files",
		},
		&cli.BoolFlag{
			Name:  "check-change",
			Usage: "check if source file changes after sync",
		},
		&cli.Int64Flag{
			Name:  "max-failure",
			Value: -1,
			Usage: "max number of allowed failed files (-1 for unlimited)",
		},
		&cli.BoolFlag{
			Name:  "dry",
			Usage: "don't copy file",
		},
		&cli.StringFlag{
			Name:  "mountpoint",
			Usage: "the mount point for current volume (to follow symlink)",
		},
	})
}

func syncStorageFlags() []cli.Flag {
	return addCategories("STORAGE", []cli.Flag{
		&cli.IntFlag{
			Name:    "threads",
			Aliases: []string{"p"},
			Value:   10,
			Usage:   "number of concurrent threads",
		},
		&cli.IntFlag{
			Name:  "list-threads",
			Value: 1,
			Usage: "number of threads to list objects",
		},
		&cli.IntFlag{
			Name:  "list-depth",
			Value: 1,
			Usage: "list the top N level of directories in parallel",
		},
		&cli.BoolFlag{
			Name:  "no-https",
			Usage: "donot use HTTPS",
		},
		&cli.StringFlag{
			Name:  "storage-class",
			Usage: "the storage class for destination",
		},
		&cli.StringFlag{
			Name:  "bwlimit",
			Usage: "limit bandwidth in Mbps (0 means unlimited)",
		},
		&cli.StringFlag{
			Name:  "traffic-control-url",
			Usage: "the url of the traffic control",
		},
	})
}

func clusterFlags() []cli.Flag {
	return addCategories("CLUSTER", []cli.Flag{
		&cli.StringFlag{
			Name:   "manager",
			Usage:  "the manager address used only by the worker node",
			Hidden: true,
		},
		&cli.StringSliceFlag{
			Name:  "worker",
			Usage: "hosts (separated by comma) to launch worker",
		},
		&cli.StringFlag{
			Name:  "manager-addr",
			Usage: "the IP address to communicate with workers",
		},
	})
}

func supportHTTPS(name, endpoint string) bool {
	switch name {
	case "ufile":
		return !(strings.Contains(endpoint, ".internal-") || strings.HasSuffix(endpoint, ".ucloud.cn"))
	case "oss":
		return !(strings.Contains(endpoint, ".vpc100-oss") || strings.Contains(endpoint, "internal.aliyuncs.com"))
	case "s3":
		ps := strings.SplitN(strings.Split(endpoint, ":")[0], ".", 2)
		if len(ps) > 1 && net.ParseIP(ps[1]) != nil {
			return false
		}
	case "minio":
		return false
	}
	return true
}

// Check if uri is local file path
func isFilePath(uri string) bool {
	// check drive pattern when running on Windows
	if runtime.GOOS == "windows" &&
		len(uri) > 1 && (('a' <= uri[0] && uri[0] <= 'z') ||
		('A' <= uri[0] && uri[0] <= 'Z')) && uri[1] == ':' {
		return true
	}
	return !strings.Contains(uri, ":")
}

func extractToken(uri string) (string, string) {
	if submatch := regexp.MustCompile(`^.*:.*:.*(:.*)@.*$`).FindStringSubmatch(uri); len(submatch) == 2 {
		return strings.ReplaceAll(uri, submatch[1], ""), strings.TrimLeft(submatch[1], ":")
	}
	return uri, ""
}

func createSyncStorage(uri string, conf *sync.Config) (object.ObjectStorage, error) {
	// nolint:staticcheck
	uri = strings.TrimPrefix(uri, "sftp://")
	if !strings.Contains(uri, "://") {
		if isFilePath(uri) {
			absPath, err := filepath.Abs(uri)
			if err != nil {
				logger.Fatalf("invalid path: %s", err.Error())
			}
			if !strings.HasPrefix(absPath, "/") { // Windows path
				absPath = "/" + strings.Replace(absPath, "\\", "/", -1)
			}
			if strings.HasSuffix(uri, "/") {
				absPath += "/"
			}

			// Windows: file:///C:/a/b/c, Unix: file:///a/b/c
			uri = "file://" + absPath
		} else { // sftp
			var user string
			if strings.Contains(uri, "@") {
				parts := strings.Split(uri, "@")
				user = parts[0]
				uri = parts[1]
			}
			var pass string
			if strings.Contains(user, ":") {
				parts := strings.Split(user, ":")
				user = parts[0]
				pass = parts[1]
			}
			return object.CreateStorage("sftp", uri, user, pass, "")
		}
	}
	uri, token := extractToken(uri)
	u, err := url.Parse(uri)
	if err != nil {
		logger.Fatalf("Can't parse %s: %s", uri, err.Error())
	}
	user := u.User
	var accessKey, secretKey string
	if user != nil {
		accessKey = user.Username()
		secretKey, _ = user.Password()
	}
	name := strings.ToLower(u.Scheme)

	var endpoint string
	if name == "file" {
		endpoint = u.Path
	} else if name == "hdfs" {
		endpoint = u.Host
	} else if name == "jfs" {
		endpoint, err = url.PathUnescape(u.Host)
		if err != nil {
			return nil, fmt.Errorf("unescape %s: %s", u.Host, err)
		}
		if os.Getenv(endpoint) != "" {
			conf.Env[endpoint] = os.Getenv(endpoint)
		}
	} else if name == "nfs" {
		endpoint = u.Host + u.Path
	} else if !conf.NoHTTPS && supportHTTPS(name, u.Host) {
		endpoint = "https://" + u.Host
	} else {
		endpoint = "http://" + u.Host
	}

	isS3PathTypeUrl := isS3PathType(u.Host)
	if name == "minio" || name == "s3" && isS3PathTypeUrl {
		// bucket name is part of path
		endpoint += u.Path
	}

	store, err := object.CreateStorage(name, endpoint, accessKey, secretKey, token)
	if name == "nfs" && err != nil {
		p := u.Path
		for err != nil && strings.Contains(err.Error(), "MNT3ERR_NOENT") {
			p = filepath.Dir(p)
			store, err = object.CreateStorage(name, u.Host+p, accessKey, secretKey, token)
		}
		if err == nil {
			store = object.WithPrefix(store, u.Path[len(p):])
		}
	}
	if err != nil {
		return nil, fmt.Errorf("create %s %s: %s", name, endpoint, err)
	}

	if conf.Links {
		if _, ok := store.(object.SupportSymlink); !ok {
			logger.Warnf("storage %s does not support symlink, ignore it", uri)
			conf.Links = false
		}
	}

	if conf.Perms {
		if _, ok := store.(object.FileSystem); !ok {
			logger.Warnf("%s is not a file system, can not preserve permissions", store)
			conf.Perms = false
		}
	}
	switch name {
	case "file", "nfs":
	case "minio":
		if strings.Count(u.Path, "/") > 1 {
			// skip bucket name
			store = object.WithPrefix(store, strings.SplitN(u.Path[1:], "/", 2)[1])
		}
	case "s3":
		if isS3PathTypeUrl && strings.Count(u.Path, "/") > 1 {
			store = object.WithPrefix(store, strings.SplitN(u.Path[1:], "/", 2)[1])
		} else if len(u.Path) > 1 {
			store = object.WithPrefix(store, u.Path[1:])
		}
	default:
		if len(u.Path) > 1 {
			store = object.WithPrefix(store, u.Path[1:])
		}
	}

	return store, nil
}

func isS3PathType(endpoint string) bool {
	//localhost[:8080] 127.0.0.1[:8080]  s3.ap-southeast-1.amazonaws.com[:8080] s3-ap-southeast-1.amazonaws.com[:8080]
	pattern := `^((localhost)|(s3[.-].*\.amazonaws\.com)|((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|[1-9])\.((1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){2}(1\d{2}|2[0-4]\d|25[0-5]|[1-9]\d|\d)))?(:\d*)?$`
	return regexp.MustCompile(pattern).MatchString(endpoint)
}

func doSync(c *cli.Context) error {
	setup(c, 2)
	if c.IsSet("include") && !c.IsSet("exclude") {
		logger.Warnf("The include option needs to be used with the exclude option, otherwise the result of the current sync may not match your expectations")
	}
	config := sync.NewConfigFromCli(c)
	cliCtx = c
	if config.Manager != "" {
		logger.Debugf("worker process start")
	}
	// Windows support `\` and `/` as its separator, Unix only use `/`
	srcURL := c.Args().Get(0)
	dstURL := c.Args().Get(1)
	removePassword(srcURL, dstURL)
	if runtime.GOOS == "windows" {
		if !strings.Contains(srcURL, "://") {
			srcURL = strings.Replace(srcURL, "\\", "/", -1)
		}
		if !strings.Contains(dstURL, "://") {
			dstURL = strings.Replace(dstURL, "\\", "/", -1)
		}
	}
	if strings.HasSuffix(srcURL, "/") != strings.HasSuffix(dstURL, "/") {
		logger.Fatalf("SRC and DST should both end with path separator or not!")
	}
	src, err := createSyncStorage(srcURL, config)
	if err != nil {
		return err
	}
	dst, err := createSyncStorage(dstURL, config)
	if err != nil {
		return err
	}
	defer func() {
		object.Shutdown(src)
		object.Shutdown(dst)
	}()
	if config.StorageClass != "" {
		if os, ok := dst.(object.SupportStorageClass); ok {
			err := os.SetStorageClass(config.StorageClass)
			if err != nil {
				logger.Errorf("set storage class %s: %s", config.StorageClass, err)
			}
		}
	}

	if config.Manager == "" && !config.Dry {
		var srcPath, dstPath string
		if strings.HasPrefix(src.String(), "file://") {
			srcPath = src.String()
		}
		if strings.HasPrefix(dst.String(), "file://") {
			dstPath = dst.String()
		}
		srcPath = utils.RemovePassword(srcPath)
		dstPath = utils.RemovePassword(dstPath)
		registry := prometheus.NewRegistry()
		config.Registerer = prometheus.WrapRegistererWithPrefix("juicefs_sync_",
			prometheus.WrapRegistererWith(prometheus.Labels{"cmd": "sync", "pid": strconv.Itoa(os.Getpid())}, registry))
		config.Registerer.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
		config.Registerer.MustRegister(collectors.NewGoCollector())
		metricsAddr := exposeMetrics(c, config.Registerer, registry)
		if c.IsSet("consul") {
			metadata := make(map[string]string)
			metadata["src"] = srcPath
			metadata["dst"] = dstPath
			metadata["pid"] = strconv.Itoa(os.Getpid())
			metric.RegisterToConsul(c.String("consul"), metricsAddr, metadata)
		}
	}
	return sync.Sync(src, dst, config)
}


================================================
FILE: cmd/sync_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bytes"
	"context"
	"fmt"
	"os"
	"testing"

	"github.com/juicedata/juicefs/pkg/object"
)

func TestSync(t *testing.T) {
	if os.Getenv("MINIO_TEST_BUCKET") == "" {
		t.Skip()
	}
	minioDir := "synctest"
	localDir := "/tmp/synctest"
	defer os.RemoveAll(localDir)
	storage, err := object.CreateStorage("minio", os.Getenv("MINIO_TEST_BUCKET"), os.Getenv("MINIO_ACCESS_KEY"), os.Getenv("MINIO_SECRET_KEY"), "")
	if err != nil {
		t.Fatalf("create storage failed: %v", err)
	}

	testInstances := []struct{ path, content string }{
		{"t1.txt", "content1"},
		{"testDir1/t2.txt", "content2"},
		{"testDir1/testDir3/t3.txt", "content3"},
	}

	for _, instance := range testInstances {
		err = storage.Put(context.Background(), fmt.Sprintf("/%s/%s", minioDir, instance.path), bytes.NewReader([]byte(instance.content)))
		if err != nil {
			t.Fatalf("storage put failed: %v", err)
		}
	}
	syncArgs := []string{"", "sync", fmt.Sprintf("minio://%s/%s", os.Getenv("MINIO_TEST_BUCKET"), minioDir), fmt.Sprintf("file://%s", localDir)}
	err = Main(syncArgs)
	if err != nil {
		t.Fatalf("sync failed: %v", err)
	}

	for _, instance := range testInstances {
		c, err := os.ReadFile(fmt.Sprintf("%s/%s", localDir, instance.path))
		if err != nil || string(c) != instance.content {
			t.Fatalf("sync failed: %v", err)
		}
	}
}

func Test_isS3PathType(t *testing.T) {

	tests := []struct {
		endpoint string
		want     bool
	}{
		{"localhost", true},
		{"localhost:8080", true},
		{"127.0.0.1", true},
		{"127.0.0.1:8080", true},
		{"s3.ap-southeast-1.amazonaws.com", true},
		{"s3.ap-southeast-1.amazonaws.com:8080", true},
		{"s3-ap-southeast-1.amazonaws.com", true},
		{"s3-ap-southeast-1.amazonaws.com:8080", true},
		{"s3-ap-southeast-1.amazonaws..com:8080", false},
		{"ap-southeast-1.amazonaws.com", false},
		{"s3-ap-southeast-1amazonaws.com:8080", false},
		{"s3-ap-southeast-1", false},
		{"s3-ap-southeast-1:8080", false},
	}
	for _, tt := range tests {
		t.Run("Test host", func(t *testing.T) {
			if got := isS3PathType(tt.endpoint); got != tt.want {
				t.Errorf("isS3PathType() = %v, want %v", got, tt.want)
			}
		})
	}
}

func Test_extractToken(t *testing.T) {
	// [NAME://][ACCESS_KEY:SECRET_KEY[:TOKEN]@]BUCKET[.ENDPOINT][/PREFIX]
	tests := []struct {
		uri, removedTokenUri, token string
	}{
		{"NAME://ACCESS_KEY:SECRET_KEY@BUCKET.ENDPOINT/PREFIX", "NAME://ACCESS_KEY:SECRET_KEY@BUCKET.ENDPOINT/PREFIX", ""},
		{"NAME://:@BUCKET.ENDPOINT/PREFIX", "NAME://:@BUCKET.ENDPOINT/PREFIX", ""},
		{"NAME://ACCESS_KEY:SECRET_KEY:TOKEN@BUCKET.ENDPOINT/PREFIX", "NAME://ACCESS_KEY:SECRET_KEY@BUCKET.ENDPOINT/PREFIX", "TOKEN"},
		{"NAME://:@BUCKET.ENDPOINT/PREFIX", "NAME://:@BUCKET.ENDPOINT/PREFIX", ""},
		{"NAME://::TOKEN@BUCKET.ENDPOINT/PREFIX", "NAME://:@BUCKET.ENDPOINT/PREFIX", "TOKEN"},
		{"NAME://BUCKET.ENDPOINT/PREFIX", "NAME://BUCKET.ENDPOINT/PREFIX", ""},
		{"file:///tmp/testbucket", "file:///tmp/testbucket", ""},
		{"/tmp/testbucket", "/tmp/testbucket", ""},
	}
	for _, tt := range tests {
		t.Run("", func(t *testing.T) {
			removedTokenUri, token := extractToken(tt.uri)
			if removedTokenUri != tt.removedTokenUri {
				t.Errorf("extractToken() removedTokenUri = %v, want %v", removedTokenUri, tt.removedTokenUri)
			}
			if token != tt.token {
				t.Errorf("extractToken() token = %v, want %v", token, tt.token)
			}
		})
	}
}


================================================
FILE: cmd/umount.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"encoding/json"
	"fmt"
	"io/fs"
	"os"
	"os/exec"
	"path"
	"path/filepath"
	"runtime"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/pkg/errors"
	"github.com/urfave/cli/v2"
)

func cmdUmount() *cli.Command {
	return &cli.Command{
		Name:      "umount",
		Action:    umount,
		Category:  "SERVICE",
		Usage:     "Unmount a volume",
		ArgsUsage: "MOUNTPOINT",
		Description: `
Examples:
$ juicefs umount /mnt/jfs`,
		Flags: []cli.Flag{
			&cli.BoolFlag{
				Name:    "force",
				Aliases: []string{"f"},
				Usage:   "unmount a busy mount point by force",
			},
			&cli.BoolFlag{
				Name:  "flush",
				Usage: "wait for all staging chunks to be flushed",
			},
		},
	}
}

func doUmount(mp string, force bool) error {
	var cmd *exec.Cmd
	switch runtime.GOOS {
	case "darwin":
		if force {
			cmd = exec.Command("umount", "-f", mp)
		} else {
			cmd = exec.Command("umount", mp)
		}
	case "linux":
		if _, err := exec.LookPath("fusermount"); err == nil {
			if force {
				cmd = exec.Command("fusermount", "-uz", mp)
			} else {
				cmd = exec.Command("fusermount", "-u", mp)
			}
		} else {
			if force {
				cmd = exec.Command("umount", "-l", mp)
			} else {
				cmd = exec.Command("umount", mp)
			}
		}
	case "windows":
		if !force {
			_ = os.Mkdir(filepath.Join(mp, ".UMOUNTIT"), 0777)
			return nil
		} else {
			cmd = exec.Command("taskkill", "/IM", "juicefs.exe", "/F")
		}
	default:
		return fmt.Errorf("OS %s is not supported", runtime.GOOS)
	}
	out, err := cmd.CombinedOutput()
	if err != nil && len(out) != 0 {
		err = errors.New(string(out))
	}
	return err
}

func umount(ctx *cli.Context) error {
	setup(ctx, 1)
	mp := ctx.Args().Get(0)
	if ctx.Bool("flush") {
		raw, err := readConfig(mp)
		if err != nil {
			if os.IsNotExist(err) {
				return fmt.Errorf("not a JuiceFS mount point")
			}
			return errors.Wrap(err, "failed to read config")
		}

		var conf vfs.Config
		if err = json.Unmarshal(raw, &conf); err != nil {
			return errors.Wrap(err, "failed to parse config")
		}
		if conf.Chunk.Writeback {
			stagingDir := path.Join(conf.Chunk.CacheDir, "rawstaging")
			if err := waitWritebackComplete(stagingDir); err != nil {
				return err
			}
			defer func() {
				size, _ := fileSizeInDir(stagingDir)
				clearLastLine()
				if size == 0 {
					fmt.Println("\rAll staging chunks are flushed")
				} else {
					fmt.Printf("\r%s staging chunks are not flushed\n", humanize.IBytes(size))
				}
			}()
		}
	}
	return doUmount(mp, ctx.Bool("force"))
}

func waitWritebackComplete(stagingDir string) error {
	lastLeft := uint64(0)
	for {
		_, err := os.Stat(stagingDir)
		if err != nil {
			if os.IsNotExist(err) {
				return nil
			}
			return errors.Wrap(err, "failed to read staging directory")
		}
		start := time.Now()
		size, err := fileSizeInDir(stagingDir)
		if err != nil {
			if os.IsNotExist(err) {
				continue
			}
			return errors.Wrap(err, "failed to read staging directory")
		}
		if lastLeft == 0 {
			lastLeft = size
		}

		if size == 0 && lastLeft == 0 {
			return nil
		}

		speed := uint64(0)
		if lastLeft > size {
			speed = lastLeft - size
		}

		leftTime := 720 * time.Hour
		if speed != 0 {
			leftTime = time.Duration(size/speed) * time.Second
		}
		clearLastLine()
		fmt.Printf("\r%s staging chunks are being flushed... %s/s, left %s", humanize.IBytes(size), humanize.IBytes(speed), leftTime)
		lastLeft = size
		time.Sleep(time.Second - time.Since(start))
	}
}

func fileSizeInDir(dir string) (uint64, error) {
	var size uint64
	err := filepath.WalkDir(dir, func(name string, d fs.DirEntry, err error) error {
		if d != nil && !d.IsDir() {
			fi, _ := d.Info()
			if fi != nil {
				size += uint64(fi.Size())
			}
		}
		return nil
	})
	return size, err
}

func clearLastLine() {
	fmt.Printf("\r                                                                             ")
}


================================================
FILE: cmd/version.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"

	"github.com/urfave/cli/v2"
)

func cmdVersion() *cli.Command {
	return &cli.Command{
		Name:     "version",
		Category: "ADMIN",
		Action: func(c *cli.Context) error {
			fmt.Printf("%s version %s\n", c.App.Name, c.App.Version)
			return nil
		},
		Usage: "Show version",
	}
}


================================================
FILE: cmd/warmup.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"bufio"
	"encoding/binary"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"runtime"
	"sort"
	"strings"
	"syscall"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/urfave/cli/v2"
)

func cmdWarmup() *cli.Command {
	return &cli.Command{
		Name:      "warmup",
		Action:    warmup,
		Category:  "TOOL",
		Usage:     "Build cache for target directories/files",
		ArgsUsage: "[PATH ...]",
		Description: `
This command provides a faster way to actively build cache for the target files. It reads all objects
of the files and then write them into local cache directory.

Examples:
# Warm all files in datadir
$ juicefs warmup /mnt/jfs/datadir

# Warm only three files in datadir
$ cat /tmp/filelist
/mnt/jfs/datadir/f1
/mnt/jfs/datadir/f2
/mnt/jfs/datadir/f3
$ juicefs warmup -f /tmp/filelist`,
		Flags: []cli.Flag{
			&cli.StringFlag{
				Name:    "file",
				Aliases: []string{"f"},
				Usage:   "file containing a list of paths",
			},
			&cli.UintFlag{
				Name:    "threads",
				Aliases: []string{"p"},
				Value:   50,
				Usage:   "number of concurrent workers",
			},
			&cli.BoolFlag{
				Name:    "background",
				Aliases: []string{"b"},
				Usage:   "run in background",
			},
			&cli.BoolFlag{
				Name:  "evict",
				Usage: "evict cached blocks",
			},
			&cli.BoolFlag{
				Name:  "check",
				Usage: "check whether the data blocks are cached or not",
			},
		},
	}
}

const batchMax = 10240

const maxInterval = 300
const minInterval = 1

var interval int

func readControl(cf *os.File, resp []byte) int {
	if interval <= 0 {
		interval = 10
	}
	for {
		if n, err := cf.Read(resp); err == nil {
			interval = max(interval/2, minInterval)
			return n
		} else if err == io.EOF {
			interval = min(interval*2, maxInterval)
			time.Sleep(time.Millisecond * time.Duration(interval))
		} else if errors.Is(err, syscall.EBADF) {
			logger.Fatalf("JuiceFS client was restarted")
		} else {
			logger.Fatalf("Read message: %d %s", n, err)
		}
	}
}

func readProgress(cf *os.File, showProgress func(uint64, uint64)) (data []byte, errno syscall.Errno) {
	var resp = make([]byte, 2<<16)
END:
	for {
		n := readControl(cf, resp)
		for off := 0; off < n; {
			if off+1 == n {
				errno = syscall.Errno(resp[off])
				break END
			} else if off+17 <= n && resp[off] == meta.CPROGRESS {
				showProgress(binary.BigEndian.Uint64(resp[off+1:off+9]), binary.BigEndian.Uint64(resp[off+9:off+17]))
				off += 17
			} else if off+5 < n && resp[off] == meta.CDATA {
				size := binary.BigEndian.Uint32(resp[off+1 : off+5])
				data = resp[off+5:]
				if size > uint32(len(resp[off+5:])) {
					tailData, err := io.ReadAll(cf)
					if err != nil {
						logger.Errorf("Read data error: %v", err)
						break END
					}
					data = append(data, tailData...)
				} else {
					data = data[:size]
				}
				break END
			} else {
				logger.Errorf("Bad response off %d n %d: %v", off, n, resp)
				break
			}
		}
	}
	if errno != 0 && runtime.GOOS == "windows" {
		errno += 0x20000000
	}
	return
}

// send fill-cache command to controller file
func sendCommand(cf *os.File, action vfs.CacheAction, batch []string, threads uint, background bool, dspin *utils.DoubleSpinner) *vfs.CacheResponse {
	paths := strings.Join(batch, "\n")
	var back uint8
	if background {
		back = 1
	}
	headerLen, bodyLen := uint32(8), uint32(4+len(paths)+2+1+1)
	wb := utils.NewBuffer(headerLen + bodyLen)
	wb.Put32(meta.FillCache)
	wb.Put32(bodyLen)

	wb.Put32(uint32(len(paths)))
	wb.Put([]byte(paths))
	wb.Put16(uint16(threads))
	wb.Put8(back)
	wb.Put8(uint8(action))

	if _, err := cf.Write(wb.Bytes()); err != nil {
		logger.Fatalf("Write message: %s", err)
	}

	resp := &vfs.CacheResponse{}
	if background {
		logger.Infof("%s for %d paths in background", action, len(batch))
		return resp
	}

	lastCnt, lastBytes := dspin.Current()
	data, errno := readProgress(cf, func(fileCount, totalBytes uint64) {
		dspin.SetCurrent(lastCnt+int64(fileCount), lastBytes+int64(totalBytes))
	})

	if errno != 0 {
		logger.Fatalf("%s failed: %s", action, errno)
	}

	err := json.Unmarshal(data, resp)
	if err != nil {
		logger.Fatalf("unmarshal error: %s", err)
	}

	return resp
}

func warmup(ctx *cli.Context) error {
	setup0(ctx, 0, 0)

	evict, check := ctx.Bool("evict"), ctx.Bool("check")
	if evict && check {
		logger.Fatalf("--check and --evict can't be used together")
	}

	var paths []string
	for _, p := range ctx.Args().Slice() {
		if abs, err := filepath.Abs(p); err == nil {
			paths = append(paths, abs)
		} else {
			logger.Fatalf("Failed to get absolute path of %s: %s", p, err)
		}
	}
	if fname := ctx.String("file"); fname != "" {
		fd, err := os.Open(fname)
		if err != nil {
			logger.Fatalf("Failed to open file %s: %s", fname, err)
		}
		defer fd.Close()
		scanner := bufio.NewScanner(fd)
		for scanner.Scan() {
			if p := strings.TrimSpace(scanner.Text()); p != "" {
				if abs, e := filepath.Abs(p); e == nil {
					paths = append(paths, abs)
				} else {
					logger.Warnf("Skipped path %s because it fails to get absolute path: %s", p, e)
				}
			}
		}
		if err = scanner.Err(); err != nil {
			logger.Fatalf("Reading file %s failed with error: %s", fname, err)
		}
	}
	if len(paths) == 0 {
		logger.Infof("no path")
		return nil
	}

	// find mount point
	first := paths[0]
	controller, err := openController(first)
	if err != nil {
		return fmt.Errorf("open control file for %s: %s", first, err)
	}
	defer controller.Close()

	mp := first
	for ; mp != "/"; mp = filepath.Dir(mp) {
		inode, err := utils.GetFileInode(mp)
		if err != nil {
			logger.Fatalf("lookup inode for %s: %s", mp, err)
		}
		if inode == uint64(meta.RootInode) {
			break
		}
	}

	threads := ctx.Uint("threads")
	if threads == 0 {
		logger.Warnf("threads should be larger than 0, reset it to 1")
		threads = 1
	}

	action := vfs.WarmupCache
	if evict {
		action = vfs.EvictCache
	} else if check {
		action = vfs.CheckCache
	}

	background := ctx.Bool("background")
	start := len(mp)
	batch := make([]string, 0, batchMax)
	progress := utils.NewProgress(background)
	dspin := progress.AddDoubleSpinnerTwo(fmt.Sprintf("%s file", action), fmt.Sprintf("%s size", action))
	total := &vfs.CacheResponse{Locations: make(map[string]uint64)}
	for _, path := range paths {
		if mp == "/" {
			inode, err := utils.GetFileInode(path)
			if err != nil {
				logger.Errorf("lookup inode for %s: %s", mp, err)
				continue
			}
			batch = append(batch, fmt.Sprintf("inode:%d", inode))
		} else if strings.HasPrefix(path, mp) {
			batch = append(batch, path[start:])
		} else {
			logger.Errorf("Path %s is not under mount point %s", path, mp)
			continue
		}
		if len(batch) >= batchMax {
			resp := sendCommand(controller, action, batch, threads, background, dspin)
			total.Add(resp)
			batch = batch[:0]
		}
	}
	if len(batch) > 0 {
		resp := sendCommand(controller, action, batch, threads, background, dspin)
		total.Add(resp)
	}
	progress.Done()

	if !background {
		count, bytes := dspin.Current()
		switch action {
		case vfs.WarmupCache:
			logger.Infof("%s: %d files (%s bytes)", action, count, humanize.IBytes(uint64(bytes)))
		case vfs.EvictCache:
			logger.Infof("%s: %d files (%s bytes)", action, count, humanize.IBytes(uint64(bytes)))
		case vfs.CheckCache:
			if len(total.Locations) > 0 {
				var result = [][]string{
					{"Location", "Size", "Percentage"},
				}
				var locs []string
				for loc := range total.Locations {
					locs = append(locs, loc)
				}
				sort.Strings(locs)
				for _, loc := range locs {
					size := total.Locations[loc]
					result = append(result, []string{loc, humanize.IBytes(size), fmt.Sprintf("%.1f%%", float64(size)*100/float64(bytes))})
				}
				printResult(result, 0, false)
			}
			pct := 0.0
			if bytes != 0 {
				pct = float64(uint64(bytes)-total.MissBytes) * 100 / float64(bytes)
			}
			logger.Infof("%s: %d files checked, %s of %s (%2.1f%%) cached", action, count,
				humanize.IBytes(uint64(bytes)-total.MissBytes),
				humanize.IBytes(uint64(bytes)),
				pct)
		}
	}
	return nil
}


================================================
FILE: cmd/warmup_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"fmt"
	"os"
	"runtime"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
)

func TestWarmup(t *testing.T) {
	mountTemp(t, nil, nil, nil)
	defer umountTemp(t)

	if err := os.WriteFile(fmt.Sprintf("%s/f1.txt", testMountPoint), []byte("test"), 0644); err != nil {
		t.Fatalf("write file failed: %s", err)
	}
	m := meta.NewClient(testMeta, nil)
	format, err := m.Load(true)
	if err != nil {
		t.Fatalf("load setting err: %s", err)
	}
	uuid := format.UUID
	var cacheDir = "/var/jfsCache"
	var filePath string
	switch runtime.GOOS {
	case "linux":
		if os.Getuid() == 0 {
			break
		}
		fallthrough
	case "darwin", "windows":
		homeDir, err := os.UserHomeDir()
		if err != nil {
			t.Fatalf("%v", err)
		}
		cacheDir = fmt.Sprintf("%s/.juicefs/cache", homeDir)
	}

	os.RemoveAll(fmt.Sprintf("%s/%s", cacheDir, uuid))
	defer os.RemoveAll(fmt.Sprintf("%s/%s", cacheDir, uuid))

	if err = Main([]string{"", "warmup", testMountPoint}); err != nil {
		t.Fatalf("warmup: %s", err)
	}

	time.Sleep(2 * time.Second)
	filePath = fmt.Sprintf("%s/%s/raw/chunks/0/0/1_0_4", cacheDir, uuid)
	content, err := os.ReadFile(filePath)
	if err != nil || len(content) < 4 || string(content[:4]) != "test" {
		t.Fatalf("warmup: %s; got content %s", err, content)
	}
}


================================================
FILE: cmd/webdav.go
================================================
//go:build !nowebdav
// +build !nowebdav

/*
 *  * JuiceFS, Copyright 2022 Juicedata, Inc.
 *  *
 *  * Licensed under the Apache License, Version 2.0 (the "License");
 *  * you may not use this file except in compliance with the License.
 *  * You may obtain a copy of the License at
 *  *
 *  *     http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS,
 *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  * See the License for the specific language governing permissions and
 *  * limitations under the License.
 *
 */

package cmd

import (
	"os"
	"path"

	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/urfave/cli/v2"
)

func cmdWebDav() *cli.Command {
	selfFlags := []cli.Flag{
		&cli.StringFlag{
			Name:  "cert-file",
			Usage: "certificate file for https",
		},
		&cli.StringFlag{
			Name:  "key-file",
			Usage: "key file for https",
		},
		&cli.BoolFlag{
			Name:  "gzip",
			Usage: "compress served files via gzip",
		},
		&cli.BoolFlag{
			Name:  "disallowList",
			Usage: "disallow list a directory",
		},
		&cli.BoolFlag{
			Name:  "enable-proppatch",
			Usage: "enable proppatch method support",
		},
		&cli.StringFlag{
			Name:  "log",
			Usage: "path for WebDAV log",
			Value: path.Join(getDefaultLogDir(), "juicefs-webdav.log"), //nolint:typecheck
		},
		&cli.StringFlag{
			Name:  "access-log",
			Usage: "path for JuiceFS access log",
		},
		&cli.BoolFlag{
			Name:    "background",
			Aliases: []string{"d"},
			Usage:   "run in background",
		},
		&cli.IntFlag{
			Name:    "threads",
			Aliases: []string{"p"},
			Value:   50,
			Usage:   "number of threads for delete jobs (max 255)",
		},
		&cli.StringFlag{
			Name:  "mountpoint",
			Value: "webdav",
			Usage: "the mount point for current volume (to follow symlink)",
		},
	}

	return &cli.Command{
		Name:      "webdav",
		Action:    webdav,
		Category:  "SERVICE",
		Usage:     "Start a WebDAV server",
		ArgsUsage: "META-URL ADDRESS",
		Description: `
Examples:
$ export WEBDAV_USER=root
$ export WEBDAV_PASSWORD=1234
$ juicefs webdav redis://localhost localhost:9007`,
		Flags: expandFlags(selfFlags, clientFlags(0), shareInfoFlags()),
	}
}

func webdav(c *cli.Context) error {
	setup(c, 2)
	metaUrl := c.Args().Get(0)
	listenAddr := c.Args().Get(1)
	_, jfs := initForSvc(c, c.String("mountpoint"), "webdav", metaUrl, listenAddr)
	fs.StartHTTPServer(jfs, fs.WebdavConfig{
		Addr:            listenAddr,
		DisallowList:    c.Bool("disallowList"),
		EnableGzip:      c.Bool("gzip"),
		Username:        os.Getenv("WEBDAV_USER"),
		Password:        os.Getenv("WEBDAV_PASSWORD"),
		CertFile:        c.String("cert-file"),
		KeyFile:         c.String("key-file"),
		EnableProppatch: c.Bool("enable-proppatch"),
		MaxDeletes:      c.Int("threads"),
	})
	return jfs.Meta().CloseSession()
}


================================================
FILE: cmd/webdav_noop.go
================================================
//go:build nowebdav
// +build nowebdav

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"errors"

	"github.com/urfave/cli/v2"
)

func cmdWebDav() *cli.Command {
	return &cli.Command{
		Name:        "webdav",
		Category:    "SERVICE",
		Usage:       "Start a WebDAV server (not included)",
		Description: `This feature is not included. If you want it, recompile juicefs without "nowebdav" flag`,
		Action: func(*cli.Context) error {
			return errors.New("not supported")
		},
	}
}


================================================
FILE: codecov.yml
================================================
github_checks: false
coverage:
  status:
    project: false
    patch: false


================================================
FILE: deploy/juicefs-s3-gateway.yaml
================================================
apiVersion: apps/v1
kind: Deployment
metadata:
  name: juicefs-s3-gateway
  namespace: kube-system
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: juicefs-s3-gateway
  template:
    metadata:
      labels:
        app.kubernetes.io/name: juicefs-s3-gateway
    spec:
      initContainers:
        - name: format
          image: juicedata/mount:latest
          command:
            - sh
            - -c
            - juicefs format --storage=${storage} --bucket=${bucket} --access-key=${accesskey} --secret-key=${secretkey} ${metaurl} ${name}
          envFrom:
            - secretRef:
                name: juicefs-secret
          env:
            - name: accesskey
              valueFrom:
                secretKeyRef:
                  name: juicefs-secret
                  key: access-key
            - name: secretkey
              valueFrom:
                secretKeyRef:
                  name: juicefs-secret
                  key: secret-key
      containers:
        - name: gateway
          image: juicedata/mount:latest
          command:
            - sh
            - -c
            - juicefs gateway ${METAURL} ${NODE_IP}:9000 --metrics=${NODE_IP}:9567
          env:
            - name: NODE_IP
              valueFrom:
                fieldRef:
                  fieldPath: status.podIP
            - name: METAURL
              valueFrom:
                secretKeyRef:
                  name: juicefs-secret
                  key: metaurl
            - name: MINIO_ROOT_USER
              valueFrom:
                secretKeyRef:
                  name: juicefs-secret
                  key: access-key
            - name: MINIO_ROOT_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: juicefs-secret
                  key: secret-key
          ports:
            - containerPort: 9000
            - containerPort: 9567
          resources:
            limits:
              cpu: 5000m
              memory: 5Gi
            requests:
              cpu: 1000m
              memory: 1Gi
---
apiVersion: v1
kind: Service
metadata:
  name: juicefs-s3-gateway
  namespace: kube-system
  labels:
    app.kubernetes.io/name: juicefs-s3-gateway
spec:
  selector:
    app.kubernetes.io/name: juicefs-s3-gateway
  ports:
    - name: http
      port: 9000
      targetPort: 9000
    - name: metrics
      port: 9567
      targetPort: 9567


================================================
FILE: docs/README.md
================================================
# JuiceFS User Manual

Please visit JuiceFS Documentation Center for more information:

- [🇬🇧 English](https://juicefs.com/docs/community/introduction)
- [🇨🇳 简体中文](https://juicefs.com/docs/zh/community/introduction)


================================================
FILE: docs/en/administration/destroy.md
================================================
---
title: How to destroy a file system
sidebar_position: 8
---

JuiceFS client provides the `destroy` command to completely destroy a file system, which will result in

- Deletion of all metadata entries of this file system
- Deletion of all data blocks of this file system

Use this command in the following format.

```shell
juicefs destroy <METADATA URL> <UUID>
```

- `<METADATA URL>`: The URL address of the metadata engine
- `<UUID>`: The UUID of the file system

## Find the UUID of the file system

JuiceFS client provides a `status` command to view detailed information about a file system by simply specifying the file system's metadata engine URL, e.g.

```shell {8}
$ juicefs status redis://127.0.0.1:6379

2022/01/26 21:41:37.577645 juicefs[31181] <INFO>: Meta address: redis://127.0.0.1:6379
2022/01/26 21:41:37.578238 juicefs[31181] <INFO>: Ping redis: 55.041µs
{
  "Setting": {
    "Name": "macjfs",
    "UUID": "eabb96d5-7228-461e-9240-fddbf2b576d8",
    "Storage": "file",
    "Bucket": "jfs/",
    "AccessKey": "",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0,
    "TrashDays": 1
  },
  ...
}
```

## Destroy a file system

:::danger
The destroy operation will cause all the data in the database and the object storage associated with the file system to be deleted. Please make sure to back up the important data before operating!
:::

```shell {1}
$ juicefs destroy redis://127.0.0.1:6379 eabb96d5-7228-461e-9240-fddbf2b576d8

2022/01/26 21:52:17.488987 juicefs[31518] <INFO>: Meta address: redis://127.0.0.1:6379
2022/01/26 21:52:17.489668 juicefs[31518] <INFO>: Ping redis: 55.542µs
 volume name: macjfs
 volume UUID: eabb96d5-7228-461e-9240-fddbf2b576d8
data storage: file://jfs/
  used bytes: 18620416
 used inodes: 23
WARNING: The target volume will be destroyed permanently, including:
WARNING: 1. objects in the data storage
WARNING: 2. entries in the metadata engine
Proceed anyway? [y/N]: y
deleting objects: 68
The volume has been destroyed! You may need to delete cache directory manually.
```

When destroying a file system, the client will issue a confirmation prompt. Please make sure to check the file system information carefully and enter `y` after confirming it is correct.

## FAQ

```shell
2022/01/26 21:47:30.949149 juicefs[31483] <FATAL>: 1 sessions are active, please disconnect them first
```

If you receive an error like the one above, which indicates that the file system has not been properly unmounted, please check and confirm that all mount points are unmounted before proceeding.


================================================
FILE: docs/en/administration/fault_diagnosis_and_analysis.md
================================================
---
title: Troubleshooting Methods
sidebar_position: 5
slug: /fault_diagnosis_and_analysis
description: This article introduces troubleshooting methods for JuiceFS mount point, CSI Driver, Hadoop Java SDK, S3 Gateway, and other clients.
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

## Client log {#client-log}

JuiceFS client will output logs for troubleshooting while running. The level of logs in terms of fatality follows DEBUG < INFO < WARNING < ERROR < FATAL. Since DEBUG logs are not printed by default, you need to explicitly enable it if needed, e.g. by adding the `--debug` option when running the JuiceFS client.

Different JuiceFS clients print logs in different ways, which are described as follows.

### Mount point

When a JuiceFS file system is mounted with the [`-d` option](../reference/command_reference.mdx#mount) (indicating running in the background), it will print logs to the system log file and local log file simultaneously. Depending on which user is running when mounting the file system, the paths of the local log files are slightly different. For root, the local log file locates at `/var/log/juicefs.log`, while it locates at `$HOME/.juicefs/juicefs.log` for non-root users. Please refer to [`--log` option](../reference/command_reference.mdx#mount) for details.

Depending on the operating system, there are different commands to retrieve system logs or read local log files directly.

<Tabs>
  <TabItem value="local-log-file" label="Local log file">

```bash
tail -n 100 /var/log/juicefs.log
```

  </TabItem>
  <TabItem value="macos-syslog" label="macOS system log">

```bash
syslog | grep 'juicefs'
```

  </TabItem>
  <TabItem value="debian-syslog" label="Debian system log">

```bash
cat /var/log/syslog | grep 'juicefs'
```

  </TabItem>
  <TabItem value="centos-syslog" label="CentOS system log">

```bash
cat /var/log/messages | grep 'juicefs'
```

  </TabItem>
</Tabs>

You can use the `grep` command to filter different levels of logs for performance analysis or troubleshooting:

```shell
cat /var/log/syslog | grep 'juicefs' | grep '<ERROR>'
```

### Kubernetes CSI Driver

Depending on the version of the JuiceFS CSI Driver, there are different ways to retrieve logs. Please refer to [CSI Driver documentation](https://juicefs.com/docs/csi/troubleshooting) for details.

### S3 Gateway

The S3 gateway can only run in the foreground, so client logs are output directly to the terminal. If you deploys the S3 gateway in Kubernetes, you can get logs from the corresponding pods.

### Hadoop Java SDK

The JuiceFS client logs will be mixed into the logs of processes using JuiceFS Hadoop Java SDK, e.g. Spark executor. Thus, you need to use keywords, e.g. `juicefs` (case-insensitive), to filter out the logs you do not want.

## Access log {#access-log}

Each JuiceFS client has an access log that records all operations on the file system in detail, such as operation type, user ID, group ID, file inodes and time cost. Access logs can be used for various purposes such as performance analysis, auditing, and troubleshooting.

### Access log format

An example format of an access log is as follows:

```
2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010>
```

The meaning of each column is:

- `2021.01.15 08:26:11.003330`: The time of the current operation
- `[uid:0,gid:0,pid:4403]`: User ID, group ID, process ID of the current operation
- `write`: Operation type
- `(17669,8666,4993160)`: The input parameters of the current operation type. For example, the input parameters of the `write` operation in the example are the inode of the written file, the size of the written data, and the offset of the written file. Different operation types have different parameters. For details, please refer to the [`vfs.go`](https://github.com/juicedata/juicefs/blob/main/pkg/vfs/vfs.go) file.
- `OK`: Indicate the current operation is successful or not. If it is unsuccessful, specific failure information will be output.
- `<0.000010>`: The time (in seconds) that the current operation takes.

Access logs tend to get very large and difficult for human to process directly, use [`juicefs profile`](#profile) to quickly visualize performance data based on these logs.

Different JuiceFS clients obtain access log in different ways, which are described below.

### Mount point

There is a virtual file named `.accesslog` in the root directory of the JuiceFS file system mount point, the contents of which can be viewed by the `cat` command (the command will not exit), for example (assuming the root directory of the mount point is `/jfs`):

```bash
cat /jfs/.accesslog
```

```output
2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010>
2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014>
2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006>
```

### Kubernetes CSI Driver

Please refer to [CSI Driver documentation](https://juicefs.com/docs/csi/troubleshooting) to find the mount pod or CSI Driver pod depending on the version of JuiceFS CSI Driver you are using, and the `.accesslog` file can be viewed in the root directory of the JuiceFS file system mount point in the pod. The mount point path in the pod is `/jfs/<pv_volumeHandle>`. Assuming there is a mount pod named as `juicefs-1.2.3.4-pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373`, in which `pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373` is  `<pv_volumeHandle>`, you can then use the following command to view the `.accesslog` file:

```bash
kubectl -n kube-system exec juicefs-chaos-k8s-002-pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373 -- cat /jfs/pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373/.accesslog
```

### S3 Gateway

You need to add the [`--access-log` option](../reference/command_reference.mdx#gateway) when starting the S3 gateway to specify the path to output the access log. By default, the S3 gateway does not output the access log.

### Hadoop Java SDK

You need to add the `juicefs.access-log` configuration item in the [client configurations](../deployment/hadoop_java_sdk.md#other-configurations) of the JuiceFS Hadoop Java SDK to specify the path of the access log output, and the access log is not output by default.

## Collect Various Information Using the `debug` Subcommand {#debug}

The `juicefs debug` subcommand can help you automatically collect various information about a specified mount point, facilitating troubleshooting and diagnosis.

```shell
juicefs debug <mountpoint>
```

This command collects the following information:

1. JuiceFS version
2. Operating system version and kernel version
3. Contents of the JuiceFS `.config` internal file
4. Contents of the `.stat` internal file in JuiceFS and recorded again after 5 seconds
5. Command-line parameters used for mounting
6. Go pprof information
7. JuiceFS logs (defaulting to the last 5000 lines)

By default, a `debug` directory is created in the current directory, and the collected information is saved in that directory. Here's an example:

```shell
$ juicefs debug /tmp/mountpoint

$ tree ./debug
./debug
├── tmp-test1-20230609104324
│   ├── config.txt
│   ├── juicefs.log
│   ├── pprof
│   │   ├── juicefs.allocs.pb.gz
│   │   ├── juicefs.block.pb.gz
│   │   ├── juicefs.cmdline.txt
│   │   ├── juicefs.goroutine.pb.gz
│   │   ├── juicefs.goroutine.stack.txt
│   │   ├── juicefs.heap.pb.gz
│   │   ├── juicefs.mutex.pb.gz
│   │   ├── juicefs.profile.30s.pb.gz
│   │   ├── juicefs.threadcreate.pb.gz
│   │   └── juicefs.trace.5s.pb.gz
│   ├── stats.5s.txt
│   ├── stats.txt
│   └── system-info.log
└── tmp-test1-20230609104324.zip
```

## Real-time performance monitoring {#performance-monitor}

JuiceFS provides the `profile` and `stats` subcommands to visualize real-time performance data, the `profile` command is based on the [file system access log](#access-log), while the `stats` command uses [Real-time statistics](../administration/monitoring.md).

### `juicefs profile` {#profile}

[`juicefs profile`](../reference/command_reference.mdx#profile) will collect data from [file system access log](#access-log), run the `juicefs profile MOUNTPOINT` command, you can see the real-time statistics of each file system operation based on the latest access log:

![JuiceFS-profiling](../images/juicefs-profiling.gif)

Apart from real-time mode, this command also provides a play-back mode, which performs the same visualization on existing access log files:

```shell
# Collect access logs in advance
cat /jfs/.accesslog > /tmp/juicefs.accesslog

# After performance issue is reproduced, re-play this log file to find system bottleneck
juicefs profile -f /tmp/juicefs.accesslog
```

If the replay speed is too fast, pause anytime using <kbd>Enter/Return</kbd>, and continue by pressing it again. If too slow, use `--interval 0` and it will replay the whole log file as fast as possible, and directly show the final result.

If you're only interested in a certain user or process, you can set filters:

```bash
juicefs profile /tmp/juicefs.accesslog --uid 12345
```

### `juicefs stats` {#stats}

The [`juicefs stats`](../reference/command_reference.mdx#stats) command reads JuiceFS Client internal metrics data, and output performance data in a format similar to `dstat`:

![juicefs_stats_watcher](../images/juicefs_stats_watcher.png)

Metrics description:

#### `usage`

- `cpu`: CPU usage of the process.
- `mem`: Physical memory used by the process.
- `buf`: Current [buffer size](../guide/cache.md#buffer-size), if this value is constantly close to (or even exceeds) the configured [`--buffer-size`](../reference/command_reference.mdx#mount-data-cache-options), you should increase buffer size or decrease application workload.
- `cache`: Internal metric, ignore this.

#### `fuse`

- `ops`/`lat`: Operations processed by FUSE per second, and their average latency (in milliseconds).
- `read`/`write`: Read/write bandwidth usage of FUSE.

#### `meta`

- `ops`/`lat`: Metadata operations processed per second, and their average latency (in milliseconds). Please note that, operations returned directly from cache are not counted in, in order to show a more accurate latency of clients actually interacting with metadata engine.
- `txn`/`lat`: Write transactions per second processed by the metadata engine and their average latency (in milliseconds). Read-only requests such as `getattr` are only counted as `ops` but not `txn`.
- `retry`: Write transactions per second that the metadata engine retries.

#### `blockcache`

The `blockcache` stands for local cache data, if read requests are already handled by kernel page cache, they won't be counted into the `blockcache` read metric. If there's consistent `blockcache` read traffic while you are conducting repeated read on a fixed file, this means read requests never enter page cache, and you should probably troubleshoot in this direction (e.g. not enough memory).

- `read`/`write`: Read/write bandwidth of client local data cache

#### `object`

The `object` stands for object storage related metrics, when cache is enabled, penetration to object storage will significantly hinder read performance, use these metrics to check if data has been fully cached. On the other hand, you can also compare `object.get` and `fuse.read` traffic to get a rough idea of the current [read amplification](./troubleshooting.md#read-amplification) status.

- `get`/`get_c`/`lat`: Bandwidth, requests per second, and their average latency (in milliseconds) for object storage processing read requests.
- `put`/`put_c`/`lat`: Bandwidth, requests per second, and their average latency (in milliseconds) for object storage processing write requests.
- `del_c`/`lat`: Delete requests per second the object storage can process, and the average latency (in milliseconds).

## Get runtime information using pprof {#runtime-information}

By default, JuiceFS clients will listen to a TCP port locally via [pprof](https://pkg.go.dev/net/http/pprof) to get runtime information such as Goroutine stack information, CPU performance statistics, memory allocation statistics. You can view the specific port number that the current JuiceFS client is listening to through the `.config` file under the mount point:

```bash
# Assume the mount point is /jfs
$ cat /jfs/.config | grep 'DebugAgent'
  "DebugAgent": "127.0.0.1:6064",
```

The default port number range that pprof listens to starts from 6060 and ends at 6099. From the above example, you can see that the actual port number is 6064. Once you get the listening port number, you can view all the available runtime information by accessing `http://localhost:<port>/debug/pprof`, and some important runtime information will be shown as follows:

- Goroutine stack information: `http://localhost:<port>/debug/pprof/goroutine?debug=1`
- CPU performance statistics: `http://localhost:<port>/debug/pprof/profile?seconds=30`
- Memory allocation statistics: `http://localhost:<port>/debug/pprof/heap`

To make it easier to analyze this runtime information, you can save it locally, e.g.:

```bash
curl 'http://localhost:<port>/debug/pprof/goroutine?debug=1' > juicefs.goroutine.txt
```

```bash
curl 'http://localhost:<port>/debug/pprof/profile?seconds=30' > juicefs.cpu.pb.gz
```

```bash
curl 'http://localhost:<port>/debug/pprof/heap' > juicefs.heap.pb.gz
```

:::tip
You can also use the `juicefs debug` command to automatically collect these runtime information and save it locally. By default, it is saved to the `debug` directory under the current directory, for example:

```bash
juicefs debug /mnt/jfs
```

For more information about the `juicefs debug` command, see [command reference](../reference/command_reference.mdx#debug).
:::

If you have the `go` command installed, you can analyze it directly with the `go tool pprof` command. For example to analyze CPU performance statistics:

```bash
$ go tool pprof 'http://localhost:<port>/debug/pprof/profile?seconds=30'
Fetching profile over HTTP from http://localhost:<port>/debug/pprof/profile?seconds=30
Saved profile in /Users/xxx/pprof/pprof.samples.cpu.001.pb.gz
Type: cpu
Time: Dec 17, 2021 at 1:41pm (CST)
Duration: 30.12s, Total samples = 32.06s (106.42%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 30.57s, 95.35% of 32.06s total
Dropped 285 nodes (cum <= 0.16s)
Showing top 10 nodes out of 192
      flat  flat%   sum%        cum   cum%
    14.73s 45.95% 45.95%     14.74s 45.98%  runtime.cgocall
     7.39s 23.05% 69.00%      7.41s 23.11%  syscall.syscall
     2.92s  9.11% 78.10%      2.92s  9.11%  runtime.pthread_cond_wait
     2.35s  7.33% 85.43%      2.35s  7.33%  runtime.pthread_cond_signal
     1.13s  3.52% 88.96%      1.14s  3.56%  runtime.nanotime1
     0.77s  2.40% 91.36%      0.77s  2.40%  syscall.Syscall
     0.49s  1.53% 92.89%      0.49s  1.53%  runtime.memmove
     0.31s  0.97% 93.86%      0.31s  0.97%  runtime.kevent
     0.27s  0.84% 94.70%      0.27s  0.84%  runtime.usleep
     0.21s  0.66% 95.35%      0.21s  0.66%  runtime.madvise
```

Runtime information can also be exported to visual charts for a more intuitive analysis. The visual charts can be exported to various formats such as HTML, PDF, SVG, PNG, etc. For example, the command to export memory allocation statistics as a PDF file is as follows:

:::note
The export to visual chart function relies on [Graphviz](https://graphviz.org), so please install it first.
:::

```bash
go tool pprof -pdf 'http://localhost:<port>/debug/pprof/heap' > juicefs.heap.pdf
```

For more information about pprof, please see the [official documentation](https://github.com/google/pprof/blob/main/doc/README.md).

### Profiling with the Pyroscope {#use-pyroscope}

![Pyroscope](../images/pyroscope.png)

[Pyroscope](https://github.com/pyroscope-io/pyroscope) is an open source continuous profiling platform. It will help you:

+ Find performance issues and bottlenecks in your code
+ Resolve issues of high CPU utilization
+ Understand the call tree of your application
+ Track changes over time

JuiceFS supports using the `--pyroscope` option to pass in the pyroscope server address, and metrics are pushed to the server every 10 seconds. If permission verification is enabled on the server, the verification information API Key can be passed in by the environment variable `PYROSCOPE_AUTH_TOKEN`:

```bash
export PYROSCOPE_AUTH_TOKEN=xxxxxxxxxxxxxxxx
juicefs mount --pyroscope http://localhost:4040 redis://localhost /mnt/jfs
juicefs dump --pyroscope http://localhost:4040 redis://localhost dump.json
```


================================================
FILE: docs/en/administration/metadata/_category_.yml
================================================
label: "Metadata Engine Best Practices"
position: 1

================================================
FILE: docs/en/administration/metadata/etcd_best_practices.md
================================================
---
sidebar_label: etcd
sidebar_position: 4
slug: /etcd_best_practices
---

# etcd Best Practices

## Data size

By default, etcd sets a [space quota](https://etcd.io/docs/latest/op-guide/maintenance/#space-quota) of 2GB, which can support storing metadata of two million files. Adjusted via the `--quota-backend-bytes` option, [official suggestion](https://etcd.io/docs/latest/dev-guide/limit) do not exceed 8GB.

By default, etcd will keep the modification history of all data until the amount of data exceeds the space quota and the service cannot be provided. It is recommended to add the following options to enable [automatic compaction](https://etcd.io/docs/latest/op-guide/maintenance/#auto-compaction):

````
--auto-compaction-mode revision --auto-compaction-retention 1000000
````

When the amount of data reaches the quota and cannot be written, the capacity can be reduced by manual compaction (`etcdctl compact`) and defragmentation (`etcdctl defrag`). **It is strongly recommended to perform these operations on the nodes of the etcd cluster one by one, otherwise the entire etcd cluster may become unavailable.**

## Performance

etcd provides strongly consistent read and write access, and all operations involve multi-machine transactions and disk data persistence. **It is recommended to use high-performance SSD for deployment**, otherwise it will affect the performance of the file system. For more hardware configuration suggestions, please refer to [official documentation](https://etcd.io/docs/latest/op-guide/hardware).

If the etcd cluster has power-down protection, or other measures that can ensure that all nodes will not go down at the same time, you can also disable data synchronization and disk storage through the `--unsafe-no-fsync` option to reduce access latency and improve files system performance. **At this time, if two nodes are down at the same time, there is a risk of data loss.**

## Kubernetes

It is recommended to build an independent etcd service in the Kubernetes environment for JuiceFS to use, instead of using the default etcd service in the cluster, to avoid affecting the stability of the Kubernetes cluster when the file system access pressure is high.


================================================
FILE: docs/en/administration/metadata/fdb_best_practices.md
================================================
---
sidebar_label: FoundationDB
sidebar_position: 6
slug: /fdb_best_practices
---
# FoundationDB Best Practices

This document is currently only available in chinese, translation is in progress...


================================================
FILE: docs/en/administration/metadata/mysql_best_practices.md
================================================
---
sidebar_label: MySQL
sidebar_position: 2
---
# MySQL Best Practices

For distributed file systems where data and metadata are stored separately, the read and write performance and security of metadata directly affects the efficiency and data security of the whole system, respectively.

In the production environment, it is recommended to select hosted cloud databases provided by cloud computing platforms first, and comebine it with appropriate high availability architecture to use.

Please always pay attention to the integrity and security of metadata when using JuiceFS no matter whether databases is build on your own or in the cloud.

## Passing sensitive information via environment variables

Database password can be set directly through the metadata URL. Although it is easy and convenient, the password may leak during logging and process outputing processes. For the sake of security, it's better to pass the database password through an environment variable.

`META_PASSWORD` is a predefined environment variable for the database password:

```shell
export META_PASSWORD=mypassword
juicefs mount -d "mysql://user:@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

Similarly, `META_PASSWORD_FILE` can be used to provide the database password as a file:

```shell
export META_PASSWORD_FILE=/secret/mypassword.txt
juicefs mount -d "mysql://user:@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

## Database connection control

MySQL is a multiple threads database, every client connection need a dedicate server thread, limition of total connections and new connects are prefered. JuiceFS now provides the following options for better control of the connections:

- max_open_conns: The maximim database connections allowed for this mount point, default value is 0 which means ulimited connections. If a non-zero values is provided, lower limit may cause current requests have to wait for other reqeusts to free the database connections under high concurrency, while higher value may waste the server side resources. Dynamicly adjusting is prefered based on real business trafics.
- max_idle_conns: The minimum database connections allowed for this mount point, default values is double of logical CPU cores. Lower value will bring new database connetions under peak time, while higher value may waste some server side resource and get other mount points lack of database connections in peak time.
- max_idle_time: The maximum idle time allowed for a database connection, default value is 300 seconds. If a connection has no request to database for a given time, it will be closed to free the server side resource. Lower value will bring new database connetions under peak time.
- max_life_time: The maximum life time allowed for a database connection, default value is 0 which means unlimited. As database connections are shared with different business requests, some resources (such as memory) may not be freed cleanly or be fragmented. Provide a non-zero value (such as 3600 seconds) will let the connection to be destroyed at given time to fully release the resource associated.

We can pass the above options in metadata URL :

```shell
export META_PASSWORD=mypassword
juicefs mount -d "mysql://user:@(192.168.1.6:3306)/juicefs?max_open_conns=30&max_life_time=3600" /mnt/jfs
```

Plase refer Go official module manual [Database/SQL](https://pkg.go.dev/database/sql#SetConnMaxIdleTime) for more information.

## Periodic backups

Please refer to the official manual [Chapter 9. Backup and Recovery](https://dev.mysql.com/doc/refman/8.0/en/backup-and-recovery.html) to learn how to back up and restore databases.

It is recommended to make a plan for regularly backing up your database, and at the same time, do some tests to restore the data in an experimental environment to confirm that the backup is valid.

## High Availability

The official MySQL document [Chapter 19. Replication](https://dev.mysql.com/doc/refman/8.0/en/replication.html)  and [Chapter 20. Group Replication](https://dev.mysql.com/doc/refman/8.0/en/group-replication.html) are prefered high availability solutions. Please choose the appropriate ones according to your needs.

:::note
JuiceFS uses [transactions] to ensure atomicity of metadata operations, so a transactional storage engine such as [InnoDB](https://dev.mysql.com/doc/refman/8.0/en/backup-and-recovery.html) is required. Some MySQL based distributed (Multi-Shards) databases may not fully compatiable with MySQL both in SQL syntax or transactions, we do not have any testing or certificating works on them.
:::


================================================
FILE: docs/en/administration/metadata/postgresql_best_practices.md
================================================
---
sidebar_label: PostgreSQL
sidebar_position: 3
slug: /postgresql_best_practices
---
# PostgreSQL Best Practices

For distributed file systems where data and metadata are stored separately, the read and write performance and security of metadata directly affects the efficiency and data security of the whole system, respectively.

In the production environment, it is recommended to select hosted cloud databases provided by cloud computing platforms first, and comebine it with appropriate high availability architecture to use.

Please always pay attention to the integrity and security of metadata when using JuiceFS no matter whether databases is build on your own or in the cloud.

## Communication Security

By default, JuiceFS clients will use SSL encryption to connect to PostgreSQL. If SSL encryption is not enabled on the database, you need to append the `sslmode=disable` parameter to the metadata URL.

It is recommended to configure and keep SSL encryption enabled on the database server side all the time.

## Passing sensitive information via environment variables

Database password can be set directly through the metadata URL. Although it is easy and convenient, the password may leak during logging and process outputing processes. For the sake of security, it's better to pass the database password through an environment variable.

`META_PASSWORD` is a predefined environment variable for the database password:

```shell
export META_PASSWORD=mypassword
juicefs mount -d "postgres://user@192.168.1.6:5432/juicefs" /mnt/jfs
```

Similarly, `META_PASSWORD_FILE` can be used to provide the database password as a file:

```shell
export META_PASSWORD_FILE=/secret/mypassword.txt
juicefs mount -d "postgres://user@192.168.1.6:5432/juicefs" /mnt/jfs
```

PostgreSQL is a multiple process database, every client connection need a dedicate server process, limition of total connections and new connects are prefered. JuiceFS now provides the following options for better control of the connections:

- max_open_conns: The maximim database connections allowed for this mount point, default value is 0 which means ulimited connections. If a non-zero values is provided, lower limit may cause current requests have to wait for other reqeusts to free the database connections under high concurrency, while higher value may waste the server side resources. Dynamicly adjusting is prefered based on real business trafics.
- max_idle_conns: The minimum database connections allowed for this mount point, default values is double of logical CPU cores. Lower value will bring new database connetions under peak time, while higher value may waste some server side resource and get other mount points lack of database connections in peak time.  
- max_idle_time: The maximum idle time allowed for a database connection, default value is 300 seconds. If a connection has no request to database for a given time, it will be closed to free the server side resource. Lower value will bring new database connetions under peak time.
- max_life_time: The maximum life time allowed for a database connection, default value is 0 which means unlimited. As database connections are shared with different business requests, some resources (such as memory) may not be freed cleanly or be fragmented. Provide a non-zero value (such as 3600 seconds) will let the connection to be destroyed at given time to fully release the resource associated.

We can pass the above options in metadata URL :

```shell
export META_PASSWORD=mypassword
juicefs mount -d "postgres://user@192.168.1.6:5432/juicefs?max_open_conns=30&max_life_time=3600" /mnt/jfs
```

Plase refer Go official module manual [Datatabase/SQL](https://pkg.go.dev/database/sql#SetConnMaxIdleTime) for more information.

## Authentication methods

PostgreSQL supports the md5 authentication method. The following section can be adapted in the pg_hba.conf of your PostgreSQL instance.

```
# TYPE  DATABASE        USER            ADDRESS                 METHOD
host    juicefs         juicefsuser     192.168.1.0/24          md5
```

## Periodic backups

Please refer to the official manual [Chapter 26. Backup and Restore](https://www.postgresql.org/docs/current/backup.html) to learn how to back up and restore databases.

It is recommended to make a plan for regularly backing up your database, and at the same time, do some tests to restore the data in an experimental environment to confirm that the backup is valid.

## Using connection pooler

Connection pooler is a middleware that works between client and database and reuses the earlier connection from the pool, which improve connection efficiency and reduce the loss of short connections. Commonly used connection poolers are [PgBouncer](https://www.pgbouncer.org) and [Pgpool-II](https://www.pgpool.net).

## High Availability

The official PostgreSQL document [High Availability, Load Balancing, and Replication](https://www.postgresql.org/docs/current/different-replication-solutions.html) compares several common databases in terms of high availability solutions. Please choose the appropriate ones according to your needs.

:::note
JuiceFS uses [transactions](https://www.postgresql.org/docs/current/tutorial-transactions.html) to ensure atomicity of metadata operations. Since PostgreSQL does not yet support Multi-Shard (Distributed) transactions, do not use a multi-server distributed architecture for the JuiceFS metadata.
:::


================================================
FILE: docs/en/administration/metadata/redis_best_practices.md
================================================
---
sidebar_label: Redis
sidebar_position: 1
slug: /redis_best_practices
---

# Redis Best Practices

To ensure metadata service performance, we recommend use Redis service managed by public cloud provider, see [Recommended Managed Redis Service](#recommended-managed-redis-service).

## Memory usage

The space used by the JuiceFS metadata engine is mainly related to the number of files in the file system. According to our experience, the metadata of each file occupies approximately 300 bytes of memory. Therefore, if you want to store 100 million files, approximately 30 GiB of memory is required.

You can check the specific memory usage through Redis' [`INFO memory`](https://redis.io/commands/info) command, for example:

```
> INFO memory
used_memory: 19167628056
used_memory_human: 17.85G
used_memory_rss: 20684886016
used_memory_rss_human: 19.26G
...
used_memory_overhead: 5727954464
...
used_memory_dataset: 13439673592
used_memory_dataset_perc: 70.12%
```

Among them, `used_memory_rss` is the total memory size actually used by Redis, which includes not only the size of data stored in Redis (that is, `used_memory_dataset` above) but also some Redis [system overhead](https://redis.io/commands/memory-stats) (that is, `used_memory_overhead` above). As mentioned earlier that the metadata of each file occupies about 300 bytes, this is actually calculated by `used_memory_dataset`. If you find that the metadata of a single file in your JuiceFS file system occupies much more than 300 bytes, you can try to run [`juicefs gc`](../../reference/command_reference.mdx#gc) command to clean up possible redundant data.

## High availability

### Sentinel mode {#sentinel-mode}

[Redis Sentinel](https://redis.io/docs/manual/sentinel) is the official solution to high availability for Redis. It provides following capabilities:

- **Monitoring**. Sentinel constantly checks if your master and replica instances are working as expected.
- **Notification**. Sentinel can notify the system administrator, or other computer programs, via an API, that something is wrong with one of the monitored Redis instances.
- **Automatic failover**. If a master is not working as expected, Sentinel can start a failover process where a replica is promoted to master, the other additional replicas are reconfigured to use the new master, and the applications using the Redis server are informed about the new address to use when connecting.
- **Configuration provider**. Sentinel acts as a source of authority for clients service discovery: clients connect to Sentinels in order to ask for the address of the current Redis master responsible for a given service. If a failover occurs, Sentinels will report the new address.

**A stable release of Redis Sentinel is shipped since Redis 2.8**. Redis Sentinel version 1, shipped with Redis 2.6, is deprecated and should not be used.

Before start using Redis sentinel, learn the [fundamentals](https://redis.io/docs/manual/sentinel#fundamental-things-to-know-about-sentinel-before-deploying):

1. You need at least three Sentinel instances for a robust deployment.
2. The three Sentinel instances should be placed into computers or virtual machines that are believed to fail in an independent way. So for example different physical servers or Virtual Machines executed on different availability zones.
3. **Sentinel + Redis distributed system does not guarantee that acknowledged writes are retained during failures, since Redis uses asynchronous replication.** However there are ways to deploy Sentinel that make the window to lose writes limited to certain moments, while there are other less secure ways to deploy it.
4. There is no HA setup which is safe if you don't test from time to time in development environments, or even better if you can, in production environments, if they work. You may have a misconfiguration that will become apparent only when it's too late (at 3am when your master stops working).
5. **Sentinel, Docker, or other forms of Network Address Translation or Port Mapping should be mixed with care**: Docker performs port remapping, breaking Sentinel auto discovery of other Sentinel processes and the list of replicas for a master.

Read the [official documentation](https://redis.io/docs/manual/sentinel) for more information.

Once Redis servers and Sentinels are deployed, `META-URL` can be specified as `redis[s]://[[USER]:PASSWORD@]MASTER_NAME,SENTINEL_ADDR[,SENTINEL_ADDR]:SENTINEL_PORT[/DB]`, for example:

```shell
./juicefs mount redis://:password@masterName,1.2.3.4,1.2.5.6:26379/2 ~/jfs
```

:::tip
For JuiceFS v0.16+, the `PASSWORD` in the URL will be used to connect Redis server, and the password for Sentinel should be provided using the environment variable `SENTINEL_PASSWORD`. For early versions of JuiceFS, the `PASSWORD` is used for both Redis server and Sentinel, which can be overwritten by the environment variables `SENTINEL_PASSWORD` and `REDIS_PASSWORD`.
:::

Since JuiceFS v1.0.0, it is supported to use Redis replica when mounting file systems, to reduce the load on Redis master. In order to achieve this, you must mount the JuiceFS file system in read-only mode (that is, set the `--read-only` mount option), and connect to the metadata engine through Redis Sentinel. Finally, you need to add `?route-read=replica` to the end of the metadata URL. For example: `redis://:password@masterName,1.2.3.4,1.2.5.6:26379/2?route-read=replica`.

It should be noted that since the data of the Redis master node is asynchronously replicated to the replica nodes, the read metadata may not be the latest.

### Cluster mode {#cluster-mode}

:::note
This feature requires JuiceFS v1.0.0 or higher
:::

JuiceFS also supports Redis Cluster as a metadata engine, the `META-URL` format is `redis[s]://[[USER]:PASSWORD@]ADDR:PORT,[ADDR:PORT],[ADDR:PORT][/DB]`. For example:

```shell
juicefs format redis://127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002/1 myjfs
```

:::tip
Redis Cluster does not support multiple databases. However, it splits the key space into 16384 hash slots, and distributes the slots to several nodes. Based on Redis Cluster's [Hash Tag](https://redis.io/docs/reference/cluster-spec/#hash-tags) feature, JuiceFS adds `{DB}` before all file system keys to ensure they will be hashed to the same hash slot, assuring that transactions can still work. Besides, one Redis Cluster can serve for multiple JuiceFS file systems as long as they use different db numbers.
:::

## Data durability

Redis provides various options for [persistence](https://redis.io/docs/manual/persistence) in different ranges:

- **RDB**: The RDB persistence performs point-in-time snapshots of your dataset at specified intervals.
- **AOF**: The AOF persistence logs every write operation received by the server, which will be played again at server startup, meaning that the original dataset will be reconstructed each time server is restarted. Commands are logged using the same format as the Redis protocol in an append-only fashion. Redis is able to rewrite logs in the background when it gets too big.
- **RDB+AOF** <Badge type="success">Recommended</Badge>: It is possible to combine AOF and RDB in the same instance. Notice that, in this case, when Redis restarts the AOF file will be used to reconstruct the original dataset since it is guaranteed to be the most complete.

When using AOF, you can have different fsync policies:

1. No fsync
2. fsync every second <Badge type="primary">Default</Badge>
3. fsync at every query

With the default policy of fsync every second write performance is good enough  (fsync is performed using a background thread and the main thread will try hard to perform writes when no fsync is in progress.), **but you may lose the writes from the last second**.

In addition, be aware that, even if the RBD+AOF mode is adopted, the disk may be damaged and the virtual machine may disappear. Thus, **Redis data needs to be backed up regularly**.

Redis is very data backup friendly since you can copy RDB files while the database is running. The RDB is never modified once produced: while RDB is produced, a temporary name is assigned to it and will be renamed into its final destination atomically using `rename` only when the new snapshot is complete. You can also copy the AOF file to create backups.

Please read the [official documentation](https://redis.io/docs/manual/persistence) for more information.

## Backing up Redis data

**Make Sure to Back up Your Database.** as Disks break, instances in the cloud disappear, and so forth.

By default Redis saves snapshots of the dataset on disk as a binary file called `dump.rdb`. You can configure Redis to save the dataset every N seconds if there are at least M changes in the dataset, or  manually call the [`SAVE`](https://redis.io/commands/save) or [`BGSAVE`](https://redis.io/commands/bgsave) commands as needed.

As we mentioned above, Redis is very data backup friendly. This means that copying the RDB file is completely safe while the server is running. The following are our suggestions:

- Create a cron job in your server, and create hourly snapshots of the RDB file in one directory, and daily snapshots in a different directory.
- Every time running the cron script, call the `find` command to check if old snapshots have been deleted: for instance you can take hourly snapshots for the latest 48 hours, and daily snapshots for one or two months. Make sure to name the snapshots with data and time information.
- Make sure to transfer an RDB snapshot _outside your data center_ or at least _outside the physical machine_ running your Redis instance at least one time every day.

Please read the [official documentation](https://redis.io/docs/manual/persistence) for more information.

## Restore Redis data

After generating the AOF or RDB backup file, you can restore the data by copying the backup file to the path corresponding to the `dir` configuration of the new Redis instance. The instance configuration information can be obtained by the [`CONFIG GET dir`](https://redis.io/commands/config-get) command.

If both AOF and RDB persistence are enabled, Redis will use the AOF file first on starting to recover the data because AOF is guaranteed to be the most complete data.

After recovering Redis data, you can continue to use the JuiceFS file system via the new Redis address. It is recommended to run [`juicefs fsck`](../../reference/command_reference.mdx#fsck) command to check the integrity of the file system data.

## Recommended Managed Redis Service

### Amazon MemoryDB for Redis

[Amazon MemoryDB for Redis](https://aws.amazon.com/memorydb) is a durable, in-memory database service that delivers ultra-fast performance. MemoryDB is compatible with Redis, with MemoryDB, all of your data is stored in memory, which enables you to achieve microsecond read and single-digit millisecond write latency and high throughput. MemoryDB also stores data durably across multiple Availability Zones (AZs) using a Multi-AZ transactional log to enable fast failover, database recovery, and node restarts.

### Google Cloud Memorystore for Redis

[Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) is a fully managed Redis service for the Google Cloud. Applications running on Google Cloud can achieve extreme performance by leveraging the highly scalable, available, secure Redis service without the burden of managing complex Redis deployments.

### Azure Cache for Redis

[Azure Cache for Redis](https://azure.microsoft.com/en-us/services/cache) is a fully managed, in-memory cache that enables high-performance and scalable architectures. It is used to create cloud or hybrid deployments that handle millions of requests per second at sub-millisecond latency, with the advantages of configuration, security, and availability of a managed service.

### Alibaba Cloud ApsaraDB for Redis

[Alibaba Cloud ApsaraDB for Redis](https://www.alibabacloud.com/product/apsaradb-for-redis) is a database service compatible with native Redis protocols. It supports hybrid of memory and hard disks for data persistence. ApsaraDB for Redis provides a highly available hot standby architecture and are scalable to meet requirements for high-performance and low-latency read/write operations.

### Tencent Cloud TencentDB for Redis

[Tencent Cloud TencentDB for Redis](https://intl.cloud.tencent.com/product/crs) is a caching and storage service compatible with the Redis protocol. It features a rich variety of data structure options to help you develop different types of business scenarios, and offers a complete set of database services such as primary-secondary hot backup, automatic switchover for disaster recovery, data backup, failover, instance monitoring, online scaling and data rollback.

## Use Redis compatible product as metadata engine

If you want to use a Redis compatible product as the metadata engine, you need to confirm whether the following Redis data types and commands required by JuiceFS are fully supported.

### Redis data types used by JuiceFS

+ [String](https://redis.io/docs/data-types/strings)
+ [Set](https://redis.io/docs/data-types/sets)
+ [Sorted Set](https://redis.io/docs/data-types/sorted-sets)
+ [Hash](https://redis.io/docs/data-types/hashes)
+ [List](https://redis.io/docs/data-types/lists)

### Redis features used by JuiceFS

+ [Pipelining](https://redis.io/docs/manual/pipelining)

### Redis commands used by JuiceFS

#### String

+ [DECRBY](https://redis.io/commands/decrby)
+ [DEL](https://redis.io/commands/del)
+ [GET](https://redis.io/commands/get)
+ [INCR](https://redis.io/commands/incr)
+ [INCRBY](https://redis.io/commands/incrby)
+ [DECR](https://redis.io/commands/decr)
+ [MGET](https://redis.io/commands/mget)
+ [MSET](https://redis.io/commands/mset)
+ [SETNX](https://redis.io/commands/setnx)
+ [SET](https://redis.io/commands/set)

#### Set

+ [SADD](https://redis.io/commands/sadd)
+ [SMEMBERS](https://redis.io/commands/smembers)
+ [SREM](https://redis.io/commands/srem)

#### Sorted Set

+ [ZADD](https://redis.io/commands/zadd)
+ [ZRANGEBYSCORE](https://redis.io/commands/zrangebyscore)
+ [ZRANGE](https://redis.io/commands/zrange)
+ [ZREM](https://redis.io/commands/zrem)
+ [ZSCORE](https://redis.io/commands/zscore)

#### Hash

+ [HDEL](https://redis.io/commands/hdel)
+ [HEXISTS](https://redis.io/commands/hexists)
+ [HGETALL](https://redis.io/commands/hgetall)
+ [HGET](https://redis.io/commands/hget)
+ [HINCRBY](https://redis.io/commands/hincrby)
+ [HINCRBY](https://redis.io/commands/hincrby)
+ [HKEYS](https://redis.io/commands/hkeys)
+ [HSCAN](https://redis.io/commands/hscan)
+ [HSETNX](https://redis.io/commands/hsetnx)
+ [HSET](https://redis.io/commands/hset) (need to support setting multiple fields and values)

#### List

+ [LLEN](https://redis.io/commands/llen)
+ [LPUSH](https://redis.io/commands/lpush)
+ [LRANGE](https://redis.io/commands/lrange)
+ [LTRIM](https://redis.io/commands/ltrim)
+ [RPUSHX](https://redis.io/commands/rpushx)
+ [RPUSH](https://redis.io/commands/rpush)
+ [SCAN](https://redis.io/commands/scan)

#### Transaction

+ [EXEC](https://redis.io/commands/exec)
+ [MULTI](https://redis.io/commands/multi)
+ [WATCH](https://redis.io/commands/watch)
+ [UNWATCH](https://redis.io/commands/unwatch)

#### Connection management

+ [PING](https://redis.io/commands/ping)

#### Server management

+ [CONFIG GET](https://redis.io/commands/config-get)
+ [CONFIG SET](https://redis.io/commands/config-set)
+ [DBSIZE](https://redis.io/commands/dbsize)
+ [FLUSHDB](https://redis.io/commands/flushdb) (optional)
+ [INFO](https://redis.io/commands/info)

#### Cluster management

+ [CLUSTER INFO](https://redis.io/commands/cluster-info)

#### Scripting (optional)

+ [EVALSHA](https://redis.io/commands/evalsha)
+ [SCRIPT LOAD](https://redis.io/commands/script-load)


================================================
FILE: docs/en/administration/metadata/tikv_best_practices.md
================================================
---
sidebar_label: TiKV
sidebar_position: 5
slug: /tikv_best_practices
---
# TiKV Best Practices

This document is currently only available in chinese, translation is in progress...


================================================
FILE: docs/en/administration/metadata_dump_load.md
================================================
---
title: Metadata Backup & Recovery
sidebar_position: 2
slug: /metadata_dump_load
---

:::tip

- JuiceFS v1.0.0 starts to support automatic metadata backup.
- JuiceFS v1.0.4 starts to support importing an encrypted backup.
- JuiceFS v1.3.0 starts to support binary format metadata backup and recovery.

:::

JuiceFS supports [multiple metadata engines](../reference/how_to_set_up_metadata_engine.md), and each engine stores and manages data in a different format internally. JuiceFS provides the [`dump`](../reference/command_reference.mdx#dump) command to export metadata in a uniform JSON format, also there's the [`load`](../reference/command_reference.mdx#load) command to restore or migrate backups to any metadata storage engine. This dump / load process can also be used to migrate a community edition file system to enterprise edition (read [enterprise docs](https://juicefs.com/docs/cloud/administration/metadata_dump_load) for more), and vice versa.

## Metadata backup {#backup}

:::note

* `juicefs dump` does not provide snapshot consistency. If files are modified during the export, the final backup file will contain information from different points in time, which might prove unusable for some applications (like databases). If you have higher standards for consistency, you should suspend all writes to the system before exporting.
* For large scale file systems, dumping directly from online database may prove risks to system reliability, use with caution.

:::

## File format

JuiceFS supports two formats for metadata backup: JSON and binary. The binary format was introduced in v1.3.0, mainly for large-scale import/export and migration scenarios. The binary format backup is smaller, uses less memory, and supports concurrent import/export.

| Format Type      | Structure & Features         | Use Case                  | Size              | Memory Usage         | Version    |
|------------------|-----------------------------|---------------------------|-------------------|---------------------|------------|
| **JSON**         | Complete directory tree, human-readable | Small/medium FS; troubleshooting | Larger            | Higher              | All versions |
| **Binary**       | Flattened, efficient, compact           | Large-scale import/export/migration | ~1/3 of JSON      | < 1GiB (100M files) | v1.3.0+     |

### Manual backup {#backup-manually}

Using the `dump` command provided by JuiceFS client, you can export metadata to a file, for example:

```shell
# Export as JSON format
juicefs dump redis://192.168.1.6:6379 meta-dump

# Export as binary format
juicefs dump redis://192.168.1.6:6379 meta-dump --binary
```

The JSON or binary file exported by using the `dump` command provided by the JuiceFS client can have any filename and extension that you prefer, as shown in the example above. In particular, if the file extension is `.gz` (e.g. `meta-dump.gz`), the exported data will be compressed using the Gzip algorithm. Starting from version 1.3, the Zstandard compression algorithm is also supported, using .zstd as the file extension.

By default, the `dump` command starts from the root directory `/` and iterates recursively through all the files in the directory tree, and writes the metadata of each file to a JSON output. The object storage credentials will be omitted for data security, but it can be preserved using the `--keep-secret-key` option.

The value of `juicefs dump` is that it can export complete metadata information in a uniform JSON format for easy management and preservation, and it can be recognized and imported by different metadata storage engines.

In practice, the `dump` command should be used in conjunction with the backup tool that comes with the database to complement each other, such as [Redis RDB](https://redis.io/topics/persistence#backing-up-redis-data) and [`mysqldump`](https://dev.mysql.com/doc/mysql-backup-excerpt/5.7/en/mysqldump-sql-format.html), etc.

### Automatic backup {#backup-automatically}

Starting with JuiceFS v1.0.0, the client automatically backs up metadata and copies it to the object storage every hour, regardless of whether the file system is mounted via the `mount` command or accessed via the JuiceFS S3 gateway and Hadoop Java SDK.

The backup files are stored in the `meta` directory of the object storage. It is a separate directory from the data store and not visible in the mount point and does not interact with the data store, and the directory can be viewed and managed using the file browser of the object storage.

![meta-auto-backup-list](../images/meta-auto-backup-list.png)

By default, the JuiceFS client backs up metadata once an hour. The frequency of automatic backups can be adjusted by the `--backup-meta` option when mounting the filesystem, for example, to set the auto-backup to be performed every 8 hours.

```shell
juicefs mount -d --backup-meta 8h redis://127.0.0.1:6379/1 /mnt
```

The backup frequency can be accurate to the second and it supports the following units.

- `h`: accurate to the hour, e.g. `1h`.
- `m`: accurate to the minute, e.g. `30m`, `1h30m`.
- `s`: accurate to the second, such as `50s`, `30m50s`, `1h30m50s`;

It is worth mentioning that the time cost of backup will increase with the number of files in the filesystem. Hence, when the number is too large (by default 1 million) with the automatic backup frequency 1 hour (by default), JuiceFS will automatically skip backup and print the corresponding warning log. At this point you may mount a new client with a bigger `--backup-meta` option value to re-enable automatic backups.

For reference, when using Redis as the metadata engine, backing up the metadata for one million files takes about 1 minute and consumes about 1GB of memory.

:::caution
   When using `--read-only` mount, metadata will not be automatically backed up.
:::

#### Automatic backup policy

Although automatic metadata backup becomes a default action for clients, backup conflicts do not occur when multiple hosts share the same file system mount.

JuiceFS maintains a global timestamp to ensure that only one client performs the backup operation at the same time. When different backup periods are set between clients, then it will back up based on the shortest period setting.

#### Backup cleanup policy

JuiceFS periodically cleans up backups according to the following rules.

- Keep all backups up to 2 days.
- For backups older than 2 days and less than 2 weeks, keep 1 backup for each day.
- For backups older than 2 weeks and less than 2 months, keep 1 backup for each week.
- For backups older than 2 months, keep 1 backup for each month.

## Metadata recovery and migration {#recovery-and-migration}

Use the [`load`](../reference/command_reference.mdx#load) command to restore the metadata dump file into an empty database, for example:

```shell
# Import from JSON file
juicefs load redis://192.168.1.6:6379 meta-dump

# Import from binary backup
juicefs load redis://192.168.1.6:6379 meta-dump --binary
```

Once imported, JuiceFS will recalculate the file system statistics including space usage, inode counters, and eventually generates a globally consistent metadata in the database. If you have a deep understanding of the metadata design of JuiceFS, you can also modify the metadata backup file before restoring to debug.

The dump file is written in an uniform format, which can be recognized and imported by all metadata engines, making it easy to migrate to other types of metadata engines.

For instance, to migrate from a Redis database to MySQL:

1. Exporting metadata backup from Redis:

   ```shell
   juicefs dump redis://192.168.1.6:6379 meta-dump.json
   ```

1. Restoring metadata to a new MySQL database:

   ```shell
   juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs meta-dump.json
   ```

It is also possible to migrate directly through the system's pipe:

```shell
juicefs dump redis://192.168.1.6:6379 | juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs
```

Note that since the API access key for object storage is excluded by default from the backup, when loading metadata, you need to use the [`juicefs config`](../reference/command_reference.mdx#config) command to reconfigure the object storage credentials. For example:

```shell
juicefs config --secret-key xxxxx mysql://user:password@(192.168.1.6:3306)/juicefs
```

### Encrypted file system {#encrypted-file-system}

For [encrypted file system](../security/encryption.md), all data is encrypted before uploading to the object storage, including automatic metadata backups. This is different from the `dump` command, which only output metadata in plain text.

For an encrypted file system, it is necessary to additionally set the `JFS_RSA_PASSPHRASE` environment variable and specify the RSA private key and encryption algorithm when restoring the automatically backed-up metadata:

```shell
export JFS_RSA_PASSPHRASE=xxxxxx
juicefs load \
  --encrypt-rsa-key my-private.pem \
  --encrypt-algo aes256gcm-rsa \
  redis://192.168.1.6:6379/1 \
  dump-2023-03-16-090750.json.gz
```

## Metadata inspection {#inspection}

In addition to completely exporting metadata, you can also export specific subdirectories. You can intuitively inspect the metadata in the directory tree.

```shell
juicefs dump redis://192.168.1.6:6379 meta-dump.json --subdir /path/in/juicefs
```

Using tools like `jq` to analyze the exported file is also an option.

### Binary backup content analysis and troubleshooting

Binary backup also supports direct inspection of type statistics and segment information:

```shell
# View backup metadata type statistics
juicefs load meta-dump --binary --stat

# View backup metadata Segments info (get offset)
juicefs load meta-dump --binary --stat --offset=-1

# View backup metadata for a specific Segment (by offset)
juicefs load meta-dump --binary --stat --offset=123416309
```

Example output:

```
Backup Version: 1
-----------------------
Name      | Num
-----------------------
acl           | 0
chunk      | 1111179
counter    | 6
delFile     | 0
edge        | 1112124
format      | 1
…
Segment: format
Value: {
"Name": "test2",
"UUID": "15b92123-1395-40e4-a5aa-edb38918985a",
"Storage": "file",
"Bucket": "/home/hjf/.juicefs/local/",
"BlockSize": 4096,
"Compression": "none",
"EncryptAlgo": "aes256gcm-rsa",
"TrashDays": 1,
"MetaVersion": 1,
"MinClientVersion": "1.1.0-A",
"DirStats": true,
"EnableACL": false
}
```

> The binary backup is in PB format, and you can also use custom tools to verify and inspect the backup.


================================================
FILE: docs/en/administration/monitoring.md
================================================
---
title: Monitoring and Data Visualization
sidebar_position: 3
description: This guide will help you understand the monitoring metrics provided by JuiceFS, and how to visualize these metrics using Prometheus and Grafana.
---

JuiceFS offers a suite of monitoring metrics, and this document outlines how to collect these metrics and visualize them with a monitoring system similar to the one depicted in the following image using Prometheus and Grafana.

![Monitoring Dashboard](../images/grafana_dashboard.png)

The setup process is as follows:

1. Configure Prometheus to scrape JuiceFS monitoring metrics.
2. Configure Grafana to read the monitoring data from Prometheus.
3. Use the official JuiceFS Grafana dashboard template to display the monitoring metrics.

:::tip
This document uses open-source versions of Grafana and Prometheus for examples.
:::

## 1. Configuring Prometheus to Scrape JuiceFS Monitoring Metrics {#add-scrape-config}

After mounting JuiceFS, it will automatically expose Prometheus-formatted metrics at `http://localhost:9567/metrics`. To observe the state changes of various metrics over a time range, you'll need to set up Prometheus and configure it to periodically scrape and save these metrics.

![Prometheus Client Data](../images/prometheus-client-data.jpg)

The process for collecting metrics may vary slightly depending on the mount method or access type (such as FUSE mount, CSI Driver, S3 Gateway, Hadoop SDK, etc.). For detailed instructions, see [Collecting Monitoring metrics data](#collect-metrics).

For example, here's how you might configure Prometheus for a common FUSE mount: If you haven't already set up Prometheus, follow the [official documentation](https://prometheus.io/docs/prometheus/latest/installation).

Edit your `prometheus.yml` configuration file and add a new scrape configuration under `scrape_configs`. Define the JuiceFS client metrics address:

```yaml {20-22}
global:
  scrape_interval: 15s
  evaluation_interval: 15s

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

rule_files:
  # - "rules.yml"

scrape_configs:
  - job_name: "prometheus"
    static_configs:
      - targets: ["localhost:9090"]

  - job_name: "juicefs"
    static_configs:
      - targets: ["localhost:9567"]
```

Start the Prometheus service:

```shell
./prometheus --config.file=prometheus.yml
```

Visit `http://localhost:9090` to see the Prometheus interface.

## 2. Configuring Grafana to Read from Prometheus {#grafana}

Once Prometheus begins scraping JuiceFS metrics, the next step is to set up Grafana to read from Prometheus.

If you haven't yet installed Grafana, follow the [official documentation](https://grafana.com/docs/grafana/latest/installation).

In Grafana, create a new data source of type Prometheus:

- **Name**: A name that helps you identify the data source, such as the name of the file system.
- **URL**: The Prometheus data API endpoint, typically `http://localhost:9090`.

![Grafana Data Source](../images/grafana-data-source.jpg)

## 3. Using the Official JuiceFS Grafana Dashboard Template {#grafana-dashboard}

JuiceFS's official Grafana dashboard templates can be found in the Grafana Dashboard repository and can be imported directly into Grafana via the URL `https://grafana.com/grafana/dashboards/20794/` or by using the ID `20794`.

Here's what the official JuiceFS Grafana dashboard might look like:

![Grafana Monitoring Dashboard](../images/grafana_dashboard.png)

## Collecting metrics data {#collect-metrics}

For different types of JuiceFS Client, metrics data is handled slightly differently.

### Mount point {#mount-point}

When the JuiceFS file system is mounted via the [`juicefs mount`](../reference/command_reference.mdx#mount) command, you can collect monitoring metrics via the address `http://localhost:9567/metrics`, or you can customize it via the `--metrics` option. For example:

```shell
juicefs mount --metrics localhost:9567 ...
```

You can view these monitoring metrics using the command line tool:

```shell
curl http://localhost:9567/metrics
```

In addition, the root directory of each JuiceFS file system has a hidden file called `.stats`, through which you can also view monitoring metrics. For example (assuming here that the path to the mount point is `/jfs`):

```shell
cat /jfs/.stats
```

:::tip
If you want to view the metrics in real-time, you can use the [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) command.
:::

### Kubernetes {#kubernetes}

See [CSI Driver documentation](https://juicefs.com/docs/csi/administration/going-production#monitoring).

### S3 Gateway {#s3-gateway}

:::note
This feature needs to run JuiceFS client version 0.17.1 and above.
:::

The [JuiceFS S3 Gateway](../guide/gateway.md) will provide monitoring metrics at the address `http://localhost:9567/metrics` by default, or you can customize it with the `-metrics` option. For example:

```shell
juicefs gateway --metrics localhost:9567 ...
```

If you are deploying JuiceFS S3 Gateway [in Kubernetes](../guide/gateway.md#deploy-in-kubernetes), you can refer to the Prometheus configuration in the [Kubernetes](#kubernetes) section to collect monitoring metrics (the difference is mainly in the regular expression for the label `__meta_kubernetes_pod_label_app_kubernetes_io_name`), e.g.:

```yaml {6-8}
scrape_configs:
  - job_name: 'juicefs-s3-gateway'
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
        action: keep
        regex: juicefs-s3-gateway
      - source_labels: [__address__]
        action: replace
        regex: ([^:]+)(:\d+)?
        replacement: $1:9567
        target_label: __address__
      - source_labels: [__meta_kubernetes_pod_node_name]
        target_label: node
        action: replace
```

#### Collected via Prometheus Operator {#prometheus-operator}

[Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) enables users to quickly deploy and manage Prometheus in Kubernetes. With the `ServiceMonitor` CRD provided by Prometheus Operator, scrape configuration can be automatically generated. For example (assuming that the `Service` of the JuiceFS S3 Gateway is deployed in the `kube-system` namespace):

```yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: juicefs-s3-gateway
spec:
  namespaceSelector:
    matchNames:
      - kube-system
  selector:
    matchLabels:
      app.kubernetes.io/name: juicefs-s3-gateway
  endpoints:
    - port: metrics
```

For more information on Prometheus Operator, please refer to the [official documentation](https://prometheus-operator.dev/docs/user-guides/getting-started).

### Hadoop Java SDK {#hadoop}

[JuiceFS Hadoop Java SDK](../deployment/hadoop_java_sdk.md) supports reporting monitoring metrics to [Pushgateway](https://github.com/prometheus/pushgateway), [Graphite](https://graphiteapp.org), and [Prometheus remote write](https://prometheus.io/docs/specs/prw/remote_write_spec) endpoints.

#### Pushgateway

Report metrics to Pushgateway:

```xml
<property>
  <name>juicefs.push-gateway</name>
  <value>host:port</value>
</property>
```

At the same time, the frequency of reporting metrics can be modified through the `juicefs.push-interval` configuration. The default is to report once every 10 seconds.

:::info
According to the suggestion of [Pushgateway official document](https://github.com/prometheus/pushgateway/blob/master/README.md#configure-the-pushgateway-as-a-target-to-scrape), it is required to set `honor_labels: true` in the Prometheus's [scrape configuration](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config).

It is important to note that the timestamp of the metrics scraped by Prometheus from Pushgateway is not the time when the JuiceFS Hadoop Java SDK reported it, but the time when it scraped. For details, please refer to [Pushgateway official document](https://github.com/prometheus/pushgateway/blob/master/README.md#about-timestamps).

By default, Pushgateway will only save metrics in memory. If you need to persist metrics to disk, you can specify the file path for saving by the `--persistence.file` option and the frequency of saving to the file with the `--persistence.interval` option (by default, the metrics will be saved every 5 minutes).
:::

:::note
Each process using JuiceFS Hadoop Java SDK will have a unique metric, and Pushgateway will always remember all the collected metrics. This may cause the continuous accumulation of metrics and taking up too much memory, and it will also make Prometheus scraping metrics slow. Therefore, it is recommended to clean up metrics on Pushgateway regularly.

For this, the following command can help. Clearing the metrics will not affect the running JuiceFS Hadoop Java SDK to continuously report data. **Note that the `--web.enable-admin-api` option must be specified when Pushgateway is started, and the following command will clear all monitoring metrics in Pushgateway.**

```bash
curl -X PUT http://host:9091/api/v1/admin/wipe
```

:::

For more information about Pushgateway, please check [official document](https://github.com/prometheus/pushgateway/blob/master/README.md).

#### Graphite

Report metrics to Graphite:

```xml
<property>
  <name>juicefs.push-graphite</name>
  <value>host:port</value>
</property>
```

At the same time, the frequency of reporting metrics can be modified through the `juicefs.push-interval` configuration. The default is to report every 10 seconds.

#### Remote Write

Report metrics to Prometheus remote write endpoint:

```xml
<property>
  <name>juicefs.push-remote-write</name>
  <value>http://host:port/api/v1/write</value>
</property>
```

At the same time, the frequency of reporting metrics can be modified through the `juicefs.push-interval` configuration. The default is to report every 10 seconds.

:::info
The remote write feature supports various Prometheus-compatible endpoints including:

- [Prometheus with remote write enabled](https://prometheus.io/docs/prometheus/latest/querying/api/#remote-write-receiver)
- [VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/vmagent)
- [Cortex](https://cortexmetrics.io/docs/architecture)
- [Grafana Mimir](https://grafana.com/docs/mimir/latest/send)
- ETC

:::

For all configurations supported by JuiceFS Hadoop Java SDK, please refer to [documentation](../deployment/hadoop_java_sdk.md#client-configurations).

### Use Consul as registration center {#use-consul}

:::note
This feature needs to run JuiceFS client version 1.0.0 and above.
:::

JuiceFS support to use Consul as registration center for metrics API. The default Consul address is `127.0.0.1:8500`. You could customize the address through `--consul` option, e.g.:

```shell
juicefs mount --consul 1.2.3.4:8500 ...
```

When the Consul address is configured, the configuration of the `--metrics` option is not needed, and JuiceFS will automatically configure metrics URL according to its own network and port conditions. If `--metrics` is set at the same time, it will first try to listen on the configured metrics URL.

For each service registered to Consul, the [service name](https://developer.hashicorp.com/consul/docs/services/configuration/services-configuration-reference#name) is always `juicefs`, and the format of [service ID](https://developer.hashicorp.com/consul/docs/services/configuration/services-configuration-reference#id) is `<IP>:<mount-point>`, for example: `127.0.0.1:/tmp/jfs`.

The [`meta`](https://developer.hashicorp.com/consul/docs/services/configuration/services-configuration-reference#meta) of each service contains two keys `hostname` and `mountpoint`, the corresponding values ​​represent the host name and path of the mount point respectively. In particular, the `mountpoint` value for the S3 Gateway is always `s3gateway`.

After successfully registering with Consul, you need to add a new [`consul_sd_config`](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) configuration to `prometheus.yml` and fill in the `services` with `juicefs`.

## Monitoring metrics reference {#metrics-reference}

Refer to [JuiceFS Metrics](../reference/p8s_metrics.md).


================================================
FILE: docs/en/administration/mount_at_boot.md
================================================
---
title: Mount JuiceFS at Boot Time
sidebar_position: 3
slug: /mount_juicefs_at_boot_time
---

After JuiceFS has been successfully mounted, follow this guide to set up auto-mount on boot.

## Linux

Starting with JuiceFS v1.1.0, the `--update-fstab` option of the mount command will automatically help you set up mount at boot:

```bash
$ sudo juicefs mount --update-fstab --max-uploads=50 --writeback --cache-size 204800 <META-URL> <MOUNTPOINT>
$ grep <MOUNTPOINT> /etc/fstab
<META-URL> <MOUNTPOINT> juicefs _netdev,max-uploads=50,writeback,cache-size=204800 0 0
$ ls -l /sbin/mount.juicefs
lrwxrwxrwx 1 root root 29 Aug 11 16:43 /sbin/mount.juicefs -> /usr/local/bin/juicefs
```

If you'd like to control this process by hand, note that:

* A symlink needs to be created from `/sbin/mount.juicefs` to the JuiceFS executable, e.g. `ln -s /usr/local/bin/juicefs /sbin/mount.juicefs`.
* All mount options must also be included in the fstab options to take effect. Remember to remove the prefixing hyphen(s), and add their values with `=`, for example:

  ```bash
  $ sudo juicefs mount --update-fstab --max-uploads=50 --writeback --cache-size 204800 -o max_read=99 <META-URL> /jfs
  # -o stands for FUSE options, and is handled differently
  $ grep jfs /etc/fstab
  redis://localhost:6379/1  /jfs juicefs _netdev,max-uploads=50,max_read=99,writeback,cache-size=204800 0 0
  ```

:::tip
By default, CentOS 6 will NOT mount network file system after boot, run following command to enable it:

```bash
sudo chkconfig --add netfs
```

:::

### Automating Mounting with systemd.mount

If you're using JuiceFS and need to apply settings like database access password, S3 access key, and secret key, which are hidden from the command line using environment variables for security reason, it may not be easy to configure them in the `/etc/fstab` file. In such cases, you can utilize systemd to mount your JuiceFS instance.

Here's how you can set up your systemd configuration file:

1. Create the file `/etc/systemd/system/juicefs.mount` and add the following content:

    ```conf
    [Unit]
    Description=Juicefs
    Before=docker.service

    [Mount]
    Environment="ALICLOUD_ACCESS_KEY_ID=mykey" "ALICLOUD_ACCESS_KEY_SECRET=mysecret" "META_PASSWORD=mypassword"
    What=mysql://juicefs@(mysql.host:3306)/juicefs
    Where=/juicefs
    Type=juicefs
    Options=_netdev,allow_other,writeback_cache

    [Install]
    WantedBy=remote-fs.target
    WantedBy=multi-user.target
    ```

    Feel free to modify the options and environments according to your needs.

2. Enable and start the JuiceFS mount using the following commands:

    ```sh
    ln -s /usr/local/bin/juicefs /sbin/mount.juicefs
    systemctl enable juicefs.mount
    systemctl start juicefs.mount
    ```

After completing these steps, you will be able to access `/juicefs` and store your files there.

## macOS

Create a file named `io.juicefs.<NAME>.plist` under `~/Library/LaunchAgents`. Replace `<NAME>` with JuiceFS file system name. Add following contents to the file (again, replace `NAME`, `PATH-TO-JUICEFS`, `META-URL`, `MOUNTPOINT` and `MOUNT-OPTIONS` with appropriate value):

```xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
        <key>Label</key>
        <string>io.juicefs.NAME</string>
        <key>ProgramArguments</key>
        <array>
                <string>PATH-TO-JUICEFS</string>
                <string>mount</string>
                <string>META-URL</string>
                <string>MOUNTPOINT</string>
                <string>MOUNT-OPTIONS</string>
        </array>
        <key>RunAtLoad</key>
        <true/>
</dict>
</plist>
```

:::tip
If there are multiple mount options, they can be set in multiple lines, for example:

```xml
                <string>--max-uploads</string>
                <string>50</string>
                <string>--cache-size</string>
                <string>204800</string>
```

:::

Use following commands to load the file created in the previous step and test whether the loading is successful. **Please make sure the metadata engine is running properly.**

```bash
launchctl load ~/Library/LaunchAgents/io.juicefs.<NAME>.plist
launchctl start ~/Library/LaunchAgents/io.juicefs.<NAME>
ls <MOUNTPOINT>
```

If mount failed, you can add following configuration to `io.juicefs.<NAME>.plist` file for debug purpose:

```xml
        <key>StandardOutPath</key>
        <string>/tmp/juicefs.out</string>
        <key>StandardErrorPath</key>
        <string>/tmp/juicefs.err</string>
```

Use following commands to reload the latest configuration and inspect the output:

```bash
launchctl unload ~/Library/LaunchAgents/io.juicefs.<NAME>.plist
launchctl load ~/Library/LaunchAgents/io.juicefs.<NAME>.plist
cat /tmp/juicefs.out
cat /tmp/juicefs.err
```

If you install Redis server by Homebrew, you could use following command to start it at boot:

```bash
brew services start redis
```

Then add following configuration to `io.juicefs.<NAME>.plist` file for ensure Redis server is loaded:

```xml
        <key>KeepAlive</key>
        <dict>
                <key>OtherJobEnabled</key>
                <string>homebrew.mxcl.redis</string>
        </dict>
```


================================================
FILE: docs/en/administration/status_check_and_maintenance.md
================================================
---
title: Status Check & Maintenance
sidebar_position: 4
description: This document introduces JuiceFS' status check and maintenance tools to help you ensure file system reliability and integrity.
---

Any storage system needs regular checks and maintenance after it is put into use to promptly identify and address potential issues, ensuring the reliability of the file system and the integrity and consistency of stored data.

JuiceFS provides a series of tools to check and maintain the file system. These tools not only help you understand the basic information of the file system and its operational status, but also help you detect and fix potential problems more easily.

## status

The `juicefs status` command reviews basic information about a JuiceFS file system and the status of all active sessions, including mounts, SDK accesses, S3 Gateway, and WebDAV connections.

The basic information of the file system includes name, UUID, storage type, bucket, and Trash status.

```shell
juicefs status redis://xxx.cache.amazonaws.com:6379/1
```

```json
{
  "Setting": {
    "Name": "myjfs",
    "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766",
    "Storage": "s3",
    "Bucket": "https://xxx.s3.amazonaws.com",
    "AccessKey": "xxx",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "TrashDays": 1,
    "MetaVersion": 1
  },
  "Sessions": [
    {
      "Sid": 2,
      "Heartbeat": "2021-08-23T16:47:59+08:00",
      "Version": "1.0.0+2022-08-08.cf0c269",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869146
    }
  ]
}
```

Specifying the `Sid` of a session with the `--session, -s` option allows you to provide more information about the session.

```shell
juicefs status --session 2 redis://xxx.cache.amazonaws.com:6379/1
```

```json
{
  "Sid": 2,
  "Heartbeat": "2021-08-23T16:47:59+08:00",
  "Version": "1.0.0+2022-08-08.cf0c269",
  "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
  "MountPoint": "/home/herald/mnt",
  "ProcessID": 2869146
}
```

Depending on the status of the session, the message may also include:

- Sustained inodes: These are files that have been deleted but remain open in the current session, temporarily retained until they are closed.
- Flocks: BSD lock information about the file locked by this session.
- Plocks: POSIX lock information about the file locked by this session.

## info

The `juicefs info` command checks the metadata information of the specified file or directory, including the object path on the object storage for each block corresponding to that file.

### Check file metadata

This command checks the metadata of a file:

```shell
$ juicefs info mnt/luggage-6255515.jpg

mnt/luggage-6255515.jpg :
  inode: 36
  files: 1
   dirs: 0
 length: 789.02 KiB (807955 Bytes)
   size: 792.00 KiB (811008 Bytes)
   path: /luggage-6255515.jpg
objects:
+------------+------------------------------+--------+--------+--------+
| chunkIndex |          objectName          |  size  | offset | length |
+------------+------------------------------+--------+--------+--------+
|          0 | myjfs/chunks/0/0/80_0_807955 | 807955 |      0 | 807955 |
+------------+------------------------------+--------+--------+--------+
```

### Check directory metadata

This command checks only one level of directories by default:

```shell
$ juicefs info ./mnt

mnt :
  inode: 1
  files: 9
   dirs: 4
 length: 2.41 MiB (2532102 Bytes)
   size: 2.44 MiB (2555904 Bytes)
   path: /
```

If you want to recursively check all subdirectories, you need to specify the `--recursive, -r` option:

```shell
$ juicefs info -r ./mnt

./mnt :
  inode: 1
  files: 33
   dirs: 4
 length: 80.29 MiB (84191037 Bytes)
   size: 80.34 MiB (84242432 Bytes)
   path: /
```

### Check metadata with inodes

You can also perform reverse lookup on the file path and data block information via inodes, but you need to enter the mount point directory.

```shell
~     $ cd mnt
~/mnt $ juicefs info -i 36

36 :
  inode: 36
  files: 1
   dirs: 0
 length: 789.02 KiB (807955 Bytes)
   size: 792.00 KiB (811008 Bytes)
   path: /luggage-6255515.jpg
objects:
+------------+------------------------------+--------+--------+--------+
| chunkIndex |          objectName          |  size  | offset | length |
+------------+------------------------------+--------+--------+--------+
|          0 | myjfs/chunks/0/0/80_0_807955 | 807955 |      0 | 807955 |
+------------+------------------------------+--------+--------+--------+
```

## gc

The `juicefs gc` command handles "object leaks" and runs compaction on data fragments created by file overwrites. It scans metadata and compares it with object storage to find or clean up any object storage blocks that need processing.

:::info
An **object leak** is a situation where a block of data is in the object storage, but there is no corresponding record in the metadata engine. Object leaks are rare and can be caused by program bugs, unanticipated problems with the metadata engine or object storage, power outages, and network disconnections.
:::

:::tip
Temporary intermediate files may be produced when files are uploaded to the object storage. After the writing is complete, they will be cleaned up. To avoid intermediate files being misclassified as leaked objects, `juicefs gc` skips files uploaded in the last 1 hour by default. The skipped time range (in seconds) can be adjusted via the `JFS_GC_SKIPPEDTIME` environment variable. For example, to set skip the last 2 hours of files: `export JFS_GC_SKIPPEDTIME=7200`.
:::

:::tip
Because the `juicefs gc` command scans all objects in the object storage, there is some overhead in executing this command for file systems with large amounts of data.
:::

### Scan for leaked objects

Although object leaks almost never occur, you can still perform the appropriate routine checks as needed. By default, `juicefs gc` only performs scans:

```shell
$ juicefs gc sqlite3://myjfs.db

2022/11/10 11:35:53.662024 juicefs[24404] <INFO>: Meta address: sqlite3://myjfs.db [interface.go:402]
2022/11/10 11:35:53.662759 juicefs[24404] <INFO>: Data use file:///Users/herald/.juicefs/local/myjfs/ [gc.go:108]
  Listed slices count: 92
Scanned objects count: 91 / 91 [======================================]  done
  Valid objects count: 91
  Valid objects bytes: 7.67 MiB (8040969 Bytes)
 Leaked objects count: 0
 Leaked objects bytes: 0.00 b   (0 Bytes)
Skipped objects count: 0
Skipped objects bytes: 0.00 b   (0 Bytes)
2022/11/10 11:35:53.665015 juicefs[24404] <INFO>: scanned 91 objects, 91 valid, 0 leaked (0 bytes), 0 skipped (0 bytes) [gc.go:306]
```

### Purge leaked objects

When the `juicefs gc` command scans for "leaked objects", you can purge them with the `--delete` option. The client starts 10 threads by default to perform the purge operation. You can adjust the number of threads with the `--threads, -p` option.

```shell
$ juicefs gc sqlite3://myjfs.db --delete

2022/11/10 10:49:31.490016 juicefs[24086] <INFO>: Meta address: sqlite3://myjfs.db [interface.go:402]
2022/11/10 10:49:31.490831 juicefs[24086] <INFO>: Data use file:///Users/herald/.juicefs/local/myjfs/ [gc.go:108]
  Listed slices count: 92
Deleted pending count: 0
Scanned objects count: 103 / 103 [====================================]  done
  Valid objects count: 92
  Valid objects bytes: 7.67 MiB  (8045065 Bytes)
 Leaked objects count: 11
 Leaked objects bytes: 12.87 MiB (13494874 Bytes)
Skipped objects count: 0
Skipped objects bytes: 0.00 b    (0 Bytes)
2022/11/10 10:49:31.493682 juicefs[24086] <INFO>: scanned 103 objects, 92 valid, 11 leaked (13494874 bytes), 0 skipped (0 bytes) [gc.go:306]
```

Then, you can run `juicefs gc` again to check if the purge was successful.

## fsck

The `juicefs fsck` tool performs block-by-block comparison with metadata, mainly to fix various problems that may occur and can be fixed within the file system. It can help you find cases where records exist in the metadata engine but there is no corresponding data block in the object storage. It can also check if the file attribute information exists.

```shell {5}
$ juicefs fsck sqlite3://myjfs2.db

2022/11/10 17:31:19.062348 juicefs[26158] <INFO>: Meta address: sqlite3://myjfs2.db [interface.go:402]
2022/11/10 17:31:19.063132 juicefs[26158] <INFO>: Data use file:///Users/herald/.juicefs/local/myjfs/ [fsck.go:73]
2022/11/10 17:31:19.065857 juicefs[26158] <ERROR>: can't find block 0/1/1063_0_2693747 for file /david-bruno-silva-Z19vToWBDIc-unsplash.jpg: stat /Users/herald/.juicefs/local/myjfs/chunks/0/1/1063_0_2693747: no such file or directory [fsck.go:146]
  Found blocks count: 68
  Found blocks bytes: 34.24 MiB (35904042 Bytes)
 Listed slices count: 65
Scanned slices count: 65 / 65 [=======================================]  done
Scanned slices bytes: 36.81 MiB (38597789 Bytes)
   Lost blocks count: 1
   Lost blocks bytes: 2.57 MiB  (2693747 Bytes)
2022/11/10 17:31:19.066243 juicefs[26158] <FATAL>: 1 objects are lost (2693747 bytes), 1 broken files:
        INODE: PATH
           57: /david-bruno-silva-Z19vToWBDIc-unsplash.jpg [fsck.go:168]
```

As you can see from the results, the `juicefs fsck` scan found a file corruption in the file system due to a missing data block.

Although the result indicates that the file in the backend storage is corrupted, it is still necessary to check if the file is accessible at the mount point. This is because JuiceFS caches the recently accessed file data locally, and the version of the file before the corruption can be re-uploaded with the cached file data block to avoid losing data if it is already cached locally. You can look for cached data in the cache directory (the path corresponding to the `--cache-dir` option) based on the path of the block output from the `juicefs fsck` command. For example, the path of the missing block in the above example is `0/1/1063_0_2693747`.

## compact {#compact}

The `juicefs compact` command is a new feature introduced in version v1.2. It is a tool used to handle the fragmented data caused by overwrite operations. This tool merges or cleans up the large amounts of non-contiguous slices created by random writes, thereby improving the read performance of the file system.

Unlike `juicefs gc`, which performs garbage collection and fragment cleaning for the entire file system, `juicefs compact` only handles the fragmented data caused by overwrite operations and does not handle object leaks or pending cleanup objects. Additionally, `juicefs compact` only handles the fragmented data within a specified directory and does not handle the entire file system.

You can use the following command to execute `juicefs compact`:

```shell
juicefs compact /mnt/jfs/foo
```

You can also specify the number of concurrent threads using the `-p` or `--threads` option to speed up processing. The default value is 10, but you can adjust it based on your actual situation.

```shell
juicefs compact /mnt/jfs/foo -p 20
```


================================================
FILE: docs/en/administration/sync_accounts_between_multiple_hosts.md
================================================
---
title: Sync Accounts between Multiple Hosts
sidebar_position: 7
slug: /sync_accounts_between_multiple_hosts
---

JuiceFS supports Unix file permission, you can manage permissions by directory or file granularity, just like a local file system.

To provide users with an intuitive and consistent permission management experience (e.g. the files accessible by user A on host X should be accessible by the same user on host Y), the same user who wants to access JuiceFS should have the same UID and GID on all hosts.

Here we provide a simple [Ansible](https://www.ansible.com/community) playbook to demonstrate how to ensure an account with same UID and GID on multiple hosts.

:::note
If you are using JuiceFS in Hadoop environment, besides sync accounts between multiple hosts, you can also specify a global user list and user group file. Please refer to [here](../deployment/hadoop_java_sdk.md#other-configurations) for more information.
:::

## Install Ansible

Select a host as a [control node](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#managed-node-requirements) which can access all hosts using `ssh` with the same privileged account like `root` or other sudo account. Then, install Ansible on this host. Refer to [Installing Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#installing-ansible) for details.

## Ensure the same account on all hosts

Create `account-sync/play.yaml` as follows:

```yaml
---
- hosts: all
  tasks:
    - name: "Ensure group {{ group }} with gid {{ gid }} exists"
      group:
        name: "{{ group }}"
        gid: "{{ gid }}"
        state: present

    - name: "Ensure user {{ user }} with uid {{ uid }} exists"
      user:
        name: "{{ user }}"
        uid: "{{ uid }}"
        group: "{{ gid }}"
        state: present
```

Create the Ansible inventory `hosts`, which contains IP addresses of all hosts that need to create account.

Here we ensure an account `alice` with UID 1200 and group `staff` with GID 500 on 2 hosts:

```shell
~/account-sync$ cat hosts
172.16.255.163
172.16.255.180
~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=staff gid=500 user=alice uid=1200" play.yaml

PLAY [all] ************************************************************************************************

TASK [Gathering Facts] ************************************************************************************
ok: [172.16.255.180]
ok: [172.16.255.163]

TASK [Ensure group staff with gid 500 exists] *************************************************************
ok: [172.16.255.163]
ok: [172.16.255.180]

TASK [Ensure user alice with uid 1200 exists] *************************************************************
changed: [172.16.255.180]
changed: [172.16.255.163]

PLAY RECAP ************************************************************************************************
172.16.255.163             : ok=3    changed=1    unreachable=0    failed=0
172.16.255.180             : ok=3    changed=1    unreachable=0    failed=0
```

Now the new account `alice:staff` has been created on these 2 hosts.

If the specified UID or GID has been allocated to another user or group on some hosts, the creation would fail.

```shell
~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=ubuntu gid=1000 user=ubuntu uid=1000" play.yaml

PLAY [all] ************************************************************************************************

TASK [Gathering Facts] ************************************************************************************
ok: [172.16.255.180]
ok: [172.16.255.163]

TASK [Ensure group ubuntu with gid 1000 exists] ***********************************************************
ok: [172.16.255.163]
fatal: [172.16.255.180]: FAILED! => {"changed": false, "msg": "groupmod: GID '1000' already exists\n", "name": "ubuntu"}

TASK [Ensure user ubuntu with uid 1000 exists] ************************************************************
ok: [172.16.255.163]
    to retry, use: --limit @/home/ubuntu/account-sync/play.retry

PLAY RECAP ************************************************************************************************
172.16.255.163             : ok=3    changed=0    unreachable=0    failed=0
172.16.255.180             : ok=1    changed=0    unreachable=0    failed=1
```

In the above example, the group ID 1000 has been allocated to another group on host `172.16.255.180`. So we should **change the GID**  or **delete the group with GID 1000** on host `172.16.255.180`, and then run the playbook again.

:::caution
If the UID / GID of an existing user is changed, the user may lose permissions to previously accessible files. For example:

```shell
$ ls -l /tmp/hello.txt
-rw-r--r-- 1 alice staff 6 Apr 26 21:43 /tmp/hello.txt
$ id alice
uid=1200(alice) gid=500(staff) groups=500(staff)
```

We change the UID of alice from 1200 to 1201

```shell
~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=staff gid=500 user=alice uid=1201" play.yaml
```

Now we have no permission to remove this file as its owner is not alice:

```shell
$ ls -l /tmp/hello.txt
-rw-r--r-- 1 1200 staff 6 Apr 26 21:43 /tmp/hello.txt
$ rm /tmp/hello.txt
rm: remove write-protected regular file '/tmp/hello.txt'? y
rm: cannot remove '/tmp/hello.txt': Operation not permitted
```

:::


================================================
FILE: docs/en/administration/troubleshooting.md
================================================
---
title: Troubleshooting Cases
sidebar_position: 6
---

Debugging process for some frequently encountered JuiceFS problems.

## Volume format error {#format-error}

### Error creating an already formatted volume {#create-file-system-repeatedly}

If `juicefs format` has been run on the metadata engine, executing `juicefs format` command again might result in the following error:

```
cannot update volume XXX from XXX to XXX
```

In this case, clean up the metadata engine, and try again.

### Invalid Redis URL {#invalid-redis-url}

When using Redis below 6.0.0, `juicefs format` will fail when `username` is specified:

```
format: ERR wrong number of arguments for 'auth' command
```

Username is supported in Redis 6.0.0 and above, you'll need to omit the `username` from the Redis URL, e.g. `redis://:password@host:6379/1`.

### Redis Sentinel mode NOAUTH error {#redis-sentinel-noauth-error}

If you encounter the following error when using [Redis Sentinel mode](../administration/metadata/redis_best_practices.md#sentinel-mode):

```
sentinel: GetMasterAddrByName master="xxx" failed: NOAUTH Authentication required.
```

Please confirm whether [the password is set](https://redis.io/docs/management/sentinel/#configuring-sentinel-instances-with-authentication) for the Redis Sentinel instance, if it is set, then you need to pass the `SENTINEL_PASSWORD` environment variable configures the password to connect to the Sentinel instance separately, and the password in the metadata engine URL will only be used to connect to the Redis server.

## Mount errors due to permission issue {#mount-permission-error}

When using [Docker bind mounts](https://docs.docker.com/storage/bind-mounts) to mount a directory on the host machine into a container, you may encounter the following error:

```
docker: Error response from daemon: error while creating mount source path 'XXX': mkdir XXX: file exists.
```

This is usually due to the `juicefs mount` command being executed with a non-root user, thus Docker daemon doesn't have permission to access this directory. You can deal with this using one of below methods:

* Execute `juicefs mount` command with root user
* Add [`allow_other`](../reference/fuse_mount_options.md#allow_other) option to both FUSE config file, and mount command.

When executing `juicefs mount` command with a non-root user, you may see:

```
fuse: fuse: exec: "/bin/fusermount": stat /bin/fusermount: no such file or directory
```

This only occurs when a non-root user is trying to mount file system, meaning `fusermount` is not found, there are two solutions to this problem:

* Execute `juicefs mount` command with root user
* Install `fuse` package (e.g. `apt-get install fuse`, `yum install fuse`)

If current user doesn't have permission to execute `fusermount` command, you'll see:

```
fuse: fuse: fork/exec /usr/bin/fusermount: permission denied
```

When this happens, check `fusermount` permission:

```shell
# Only root user and fuse group user have executable permission
$ ls -l /usr/bin/fusermount
-rwsr-x---. 1 root fuse 27968 Dec  7  2011 /usr/bin/fusermount

# All users have executable permission
$ ls -l /usr/bin/fusermount
-rwsr-xr-x 1 root root 32096 Oct 30  2018 /usr/bin/fusermount
```

## Read write slow & read write error {#read-write-error}

### Connection problems with object storage (slow internet speed) {#io-error-object-storage}

If JuiceFS Client cannot connect to object storage, or the bandwidth is simply not enough, JuiceFS will complain in logs:

```text
# upload speed is slow
<INFO>: slow request: PUT chunks/0/0/1_0_4194304 (%!s(<nil>), 20.512s)

# flush timeouts usually means failure to upload data to object storage
<ERROR>: flush 9902558 timeout after waited 8m0s
<ERROR>: pending slice 9902558-80: ...
```

If the problem is a network connection issue, or the object storage has service issue, troubleshooting is relatively simple. But if the error was caused by low bandwidth, there's some more to consider.

The first issue with slow connection is upload / download timeouts (demonstrated in the above error logs), to tackle this problem:

* Reduce upload concurrency, e.g. [`--max-uploads=1`](../reference/command_reference.mdx#mount-data-storage-options), to avoid upload timeouts.
* Reduce buffer size, e.g. [`--buffer-size=64`](../reference/command_reference.mdx#mount-data-cache-options) or even lower. In a large bandwidth condition, increasing buffer size improves parallel performance. But in a low speed environment, this only makes `flush` operations slow and prone to timeouts.
* Default timeout for GET / PUT requests are 60 seconds, increasing `--get-timeout` and `--put-timeout` may help with read / write timeouts.

In addition, the ["Client Write Cache"](../guide/cache.md#client-write-cache) feature needs to be used with caution in low bandwidth environment. Let's briefly go over the JuiceFS Client background job design: every JuiceFS Client runs background jobs by default, one of which is data compaction, and if the client has poor internet speed, it'll drag down performance for the whole system. A worse case is when client write cache is also enabled, compaction results are uploaded too slowly, forcing other clients into a read hang when accessing the affected files:

```text
# While compaction results are slowly being uploaded in low speed clients, read from other clients will hang and eventually fail
<ERROR>: read file 14029704: input/output error
<INFO>: slow operation: read (14029704,131072,0): input/output error (0) <74.147891>
<WARNING>: fail to read sliceId 1771585458 (off:4194304, size:4194304, clen: 37746372): get chunks/0/0/1_0_4194304: oss: service returned error: StatusCode=404, ErrorCode=NoSuchKey, ErrorMessage="The specified key does not exist.", RequestId=62E8FB058C0B5C3134CB80B6
```

To avoid this type of issue, we recommend disabling background jobs on low-bandwidth clients, i.e. adding [`--no-bgjob`](../reference/command_reference.mdx#mount-metadata-options) option to the mount command.

### WARNING log: block not found in object storage {#warning-log-block-not-found-in-object-storage}

When using JuiceFS at scale, there will be some warnings in client logs:

```
<WARNING>: fail to read sliceId 1771585458 (off:4194304, size:4194304, clen: 37746372): get chunks/0/0/1_0_4194304: oss: service returned error: StatusCode=404, ErrorCode=NoSuchKey, ErrorMessage="The specified key does not exist.", RequestId=62E8FB058C0B5C3134CB80B6
```

When this type of warning occurs, but not accompanied by I/O errors (indicated by `input/output error` in client logs), you can safely ignore them and continue normal use, client will retry automatically and resolves this issue.

This warning means that JuiceFS Client cannot read a particular slice, because a block does not exist, and object storage has to return a `NoSuchKey` error. Usually this is caused by:

* Clients carry out compaction asynchronously, which upon completion, will change the relationship between file and its corresponding blocks, causing problems for other clients that's already reading this file, hence the warning.
* Some clients enabled ["Client Write Cache"](../guide/cache.md#client-write-cache), they write a file, commit to the Metadata Service, but the corresponding blocks are still pending to upload (caused by for example, [slow internet speed](#io-error-object-storage)). Meanwhile, other clients that are already accessing this file will meet this warning.

Again, if no errors occur, just safely ignore this warning.

## Read amplification

In JuiceFS, a typical read amplification manifests as object storage traffic being much larger than JuiceFS Client read speed. For example, JuiceFS Client is reading at 200MiB/s, while S3 traffic grows up to 2GiB/s.

JuiceFS is equipped with the [prefetch mechanism](../guide/cache.md#client-read-cache): when reading a block at arbitrary position, the whole block is asynchronously scheduled for download. This is a read optimization enabled by default, but in some cases, this brings read amplification. Once we know this, we can start the diagnose.

We'll collect JuiceFS access log (see [Access log](./fault_diagnosis_and_analysis.md#access-log)) to determine the file system access patterns of our application, and adjust JuiceFS configuration accordingly. Below is a diagnose process in an actual production environment:

```shell
# Collect access log for a period of time, like 30 seconds:
cat /jfs/.accesslog | grep -v "^#$" >> access.log

# Simple analysis using wc / grep finds out that most operations are read:
wc -l access.log
grep "read (" access.log | wc -l

# Pick a file and track operation history using its inode (first argument of read):
grep "read (148153116," access.log
```

Access log looks like:

```
2022.09.22 08:55:21.013121 [uid:0,gid:0,pid:0] read (148153116,131072,28668010496): OK (131072) <1.309992>
2022.09.22 08:55:21.577944 [uid:0,gid:0,pid:0] read (148153116,131072,14342746112): OK (131072) <1.385073>
2022.09.22 08:55:22.098133 [uid:0,gid:0,pid:0] read (148153116,131072,35781816320): OK (131072) <1.301371>
2022.09.22 08:55:22.883285 [uid:0,gid:0,pid:0] read (148153116,131072,3570397184): OK (131072) <1.305064>
2022.09.22 08:55:23.362654 [uid:0,gid:0,pid:0] read (148153116,131072,100420673536): OK (131072) <1.264290>
2022.09.22 08:55:24.068733 [uid:0,gid:0,pid:0] read (148153116,131072,48602152960): OK (131072) <1.185206>
2022.09.22 08:55:25.351035 [uid:0,gid:0,pid:0] read (148153116,131072,60529270784): OK (131072) <1.282066>
2022.09.22 08:55:26.631518 [uid:0,gid:0,pid:0] read (148153116,131072,4255297536): OK (131072) <1.280236>
2022.09.22 08:55:27.724882 [uid:0,gid:0,pid:0] read (148153116,131072,715698176): OK (131072) <1.093108>
2022.09.22 08:55:31.049944 [uid:0,gid:0,pid:0] read (148153116,131072,8233349120): OK (131072) <1.020763>
2022.09.22 08:55:32.055613 [uid:0,gid:0,pid:0] read (148153116,131072,119523176448): OK (131072) <1.005430>
2022.09.22 08:55:32.056935 [uid:0,gid:0,pid:0] read (148153116,131072,44287774720): OK (131072) <0.001099>
2022.09.22 08:55:33.045164 [uid:0,gid:0,pid:0] read (148153116,131072,1323794432): OK (131072) <0.988074>
2022.09.22 08:55:36.502687 [uid:0,gid:0,pid:0] read (148153116,131072,47760637952): OK (131072) <1.184290>
2022.09.22 08:55:38.525879 [uid:0,gid:0,pid:0] read (148153116,131072,53434183680): OK (131072) <0.096732>
```

Studying the access log, it's easy to conclude that our application performs frequent random small reads on a very large file, notice how the offset (the third argument of `read`) jumps significantly between each read, this means consecutive reads are accessing very different parts of the large file, thus prefetched data blocks is not being effectively utilized (a block is 4MiB by default, an offset of 4194304 bytes), only causing read amplifications. In this situation, we can safely set `--prefetch` to 0, so that prefetch concurrency is zero, which is essentially disabled. Re-mount and our problem is solved.

## High memory usage {#memory-optimization}

If JuiceFS Client takes up too much memory, you may choose to optimize memory usage using below methods, but note that memory optimization is not free, and each setting adjustment will bring corresponding overhead, please do sufficient testing and verification before adjustment.

* Read/Write buffer size (`--buffer-size`) directly correlate to JuiceFS Client memory usage, using a lower `--buffer-size` will effectively decrease memory usage, but please note that the reduction may also affect the read and write performance. Read more at [Read/Write Buffer](../guide/cache.md#buffer-size).
* JuiceFS mount client is an Go program, which means you can decrease `GOGC` (default to 100, in percentage) to adopt a more active garbage collection. This inevitably increase CPU usage and may even directly hinder performance. Read more at [Go Runtime](https://pkg.go.dev/runtime#hdr-Environment_Variables).
* If you use self-hosted Ceph RADOS as the data storage of JuiceFS, consider replacing glibc with [TCMalloc](https://google.github.io/tcmalloc), the latter comes with more efficient memory management and may decrease off-heap memory footprint in this scenario.

## Unmount error {#unmount-error}

If a file or directory are opened when you unmount JuiceFS, you'll see below errors, assuming JuiceFS is mounted on `/jfs`:

```shell
# Linux
umount: /jfs: target is busy.
        (In some cases useful info about processes that use
         the device is found by lsof(8) or fuser(1))

# macOS
Resource busy -- try 'diskutil unmount'
```

In such case:

* Locate the files being opened using commands like `lsof /jfs`, deal with these processes (like force quit), and retry.
* Force close the FUSE connection by `echo 1 > /sys/fs/fuse/connections/[device-number]/abort`, and then retry. You might need to find out the `[device-number]` using `lsof /jfs`, but if JuiceFS is the only FUSE mount point in the system, then `/sys/fs/fuse/connections` will contain only a single directory, no need to check further.
* If you just want to unmount ASAP, and do not care what happens to opened files, run `juicefs umount --force` to forcibly umount, note that behavior is different between Linux and macOS:
  * For Linux, `juicefs umount --force` is translated to `umount --lazy`, file system will be detached, but opened files remain, FUSE client will exit when file descriptors are released.
  * For macOS, `juicefs umount --force` is translated to `umount -f`, file system will be forcibly unmounted and opened files will be closed immediately.

## Fail to mount jfs after system reboot {#netmount}

Minimized Linux distribution, such as Alpine, may lack the 'netmount' package within their base image. The absence of the 'netmount' package can lead to failure in automatically mounting network file system like JuiceFS defined in '/etc/fstab' post-rebooting. To rectify this problem, following is the recommended method to install the 'netmount' package, using Alpine as an example:

```bash
# use --update-fstab to add juicefs mount to /etc/fstab

# install and enable netmount service
apk add openrc

rc-update add netmount boot
# * service netmount added to runlevel boot

 rc-service netmount start
# / # rc-service netmount start
# * Mounting network filesystems ...
```

## Development related issues {#development-related-issues}

Compiling JuiceFS requires GCC 5.4 and above, this error may occur when using lower versions:

```
/go/pkg/tool/linux_amd64/link: running gcc failed: exit status 1
/go/pkg/tool/linux_amd64/compile: signal: killed
```

If glibc version is different between build environment and runtime, you may see below error:

```
$ juicefs
juicefs: /lib/aarch64-linux-gnu/libc.so.6: version 'GLIBC_2.28' not found (required by juicefs)
```

This requires you to re-compile JuiceFS Client in your runtime host environment. Most Linux distributions comes with glibc by default, you can check its version with `ldd --version`.


================================================
FILE: docs/en/administration/upgrade.md
================================================
---
sidebar_position: 9
---

# Upgrade

Upgrade methods vary with different JuiceFS clients.

## Mount point

### Normal upgrade

The JuiceFS client only has one binary file. So to upgrade the new version, you only need to replace the old one with the new one.

- **Use pre-compiled client**: Refer to [Install the pre-compiled client](../getting-started/installation.md#install-the-pre-compiled-client) for details.
- **Manually compile client**: You can pull the latest source code and recompile it to overwrite the old version of the client. Please refer to ["Installation"](../getting-started/installation.md#manually-compiling) for details.

:::caution
For the file system that has been mounted using the old version of JuiceFS client, you need to [unmount file system](../getting-started/for_distributed.md#7-unmount-the-file-system), and then re-mount it with the new version of JuiceFS client.

When unmounting the file system, make sure that no application is accessing it. Otherwise the unmount will fail. Do not forcibly unmount the file system, as it may cause the application unable to continue to access it as expected.
:::

### Smooth upgrade

Starting from version v1.2, JuiceFS supports the smooth upgrade feature, which allows you to mount JuiceFS again at the same mount point to achieve a seamless client upgrade. In addition, this feature can also be used to dynamically adjust mount parameters.

Here are two common scenarios for illustration:

- Client upgrade
    For example, if you have a `juicefs mount` process like `juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs -d` and want to upgrade to a new JuiceFS client without unmounting, perform the following steps:

    ```shell
    # 1. Backup the current binary
    cp juicefs juicefs.bak
   
    # 2. Download the new binary to overwrite the current juicefs binary
   
    # 3. Execute the juicefs mount command again to complete the smooth upgrade
    juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs -d
    ```

- Dynamically adjusting mount parameters

    For example, if you have a `juicefs mount` process like `juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs -d` and want to adjust the log level to debug without unmounting, execute the following command:

```shell
# Adjust the log level
juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs --debug -d
    ```

Notes:

- Smooth upgrades require both old and new JuiceFS client versions to be v1.2 or higher.

- The FUSE parameters in the new mount parameters should be consistent with the old mount parameters, otherwise the smooth upgrade will overwrite the mount at the current mount point.

- When `enable-xattr` is enabled, smooth upgrade will overwrite the mount at the current mount point.

## Kubernetes CSI Driver

Please refer to [official documentation](https://juicefs.com/docs/csi/upgrade-csi-driver) to learn how to upgrade JuiceFS CSI Driver.

## S3 Gateway

Like [mount point](#mount-point), upgrading S3 Gateway is to replace the old version with the new version.

If it is [deployed through Kubernetes](../guide/gateway.md#deploy-in-kubernetes), you need to upgrade according to the specific deployment method, which is described in detail below.

### Upgrade via kubectl

Download and modify the `juicedata/juicefs-csi-driver` image tag in S3 Gateway [deploy YAML](https://github.com/juicedata/juicefs/blob/main/deploy/juicefs-s3-gateway.yaml) to the version you want to upgrade (see [here](https://github.com/juicedata/juicefs-csi-driver/releases) for a detailed description of all versions), and then run the following command:

```shell
kubectl apply -f ./juicefs-s3-gateway.yaml
```

### Upgrade via Helm

Please run the following commands in sequence to upgrade the S3 Gateway:

```shell
helm repo update
helm upgrade juicefs-s3-gateway juicefs-s3-gateway/juicefs-s3-gateway -n kube-system -f ./values.yaml
```

## Hadoop Java SDK

Please refer to [Install and compile the client](../deployment/hadoop_java_sdk.md#install-and-compile-the-client) to learn how to install the new version of the Hadoop Java SDK, and then follow steps in [Deploy the client](../deployment/hadoop_java_sdk.md#deploy-the-client) to redeploy the new version of the client to complete the upgrade.

:::note
Some components must be restarted to use the new version of the Hadoop Java SDK. Please refer to the ["Restart Services"](../deployment/hadoop_java_sdk.md#restart-services) for details.
:::


================================================
FILE: docs/en/benchmark/benchmark.md
================================================
---
title: Performance Benchmark
sidebar_position: 1
slug: .
description: This article describes benchmarking the file system using FIO, mdtest, and the bench command that comes with JuiceFS.
---

Redis is used as Metadata Engine in this benchmark. Under this test condition, JuiceFS performs 10x better than [Amazon EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse).

## Basic benchmark

JuiceFS provides a subcommand `bench` to run a few basic benchmarks to evaluate how it works in your environment:

![JuiceFS Bench](../images/juicefs-bench.png)

## Throughput

Performed sequential read/write benchmarks on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [fio](https://github.com/axboe/fio). Here is the result:

[![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg)](../images/sequential-read-write-benchmark.svg)

It shows JuiceFS can provide 10X more throughput than the other two. Read [more details](fio.md).

## Metadata IOPS

Performed a simple [mdtest](https://github.com/hpc/ior) benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) by [mdtest](https://github.com/hpc/ior). Here is the result:

[![Metadata Benchmark](../images/metadata-benchmark.svg)](../images/metadata-benchmark.svg)

It shows JuiceFS can provide significantly more metadata IOPS than the other two. Read [more details](mdtest.md).

## Analyze performance

See [Real-Time Performance Monitoring](../administration/fault_diagnosis_and_analysis.md#performance-monitor) if you encounter performance issues.


================================================
FILE: docs/en/benchmark/fio.md
================================================
---
title: Benchmark with fio
sidebar_position: 7
slug: /fio
---

:::tip
Trash is enabled in JuiceFS v1.0+ by default. As a result, temporary files are created and deleted in the file system during the benchmark, and these files will be eventually dumped into a directory named `.trash`. To avoid storage space being occupied by `.trash`, you can run command `juicefs config META-URL --trash-days 0` to disable Trash before benchmark. See [trash](../security/trash.md) for details.
:::

## Testing Approach

Perform a sequential read/write benchmark on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) with [fio](https://github.com/axboe/fio).

## Testing Tool

The following tests are performed with `fio` 3.1.

Sequential read test (numjobs: 1):

```
fio --name=sequential-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G
fio --name=sequential-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G
fio --name=sequential-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G
```

Sequential write test (numjobs: 1):

```
fio --name=sequential-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1
fio --name=sequential-write --directory=/efs --rw=write  --refill_buffers --bs=4M --size=4G --end_fsync=1
fio --name=sequential-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1
```

Sequential read test (numjobs: 16):

```
fio --name=big-file-multi-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16
fio --name=big-file-multi-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16
fio --name=big-file-multi-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16
```

Sequential write test (numjobs: 16):

```
fio --name=big-file-multi-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1
fio --name=big-file-multi-write --directory=/efs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1
fio --name=big-file-multi-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1
```

## Testing Environment

All the following tests are all performed using `fio` on a c5d.18xlarge EC2 instance (72 CPU, 144G RAM) with Ubuntu 18.04 LTS (Kernel 5.4.0) operating system. JuiceFS uses a local Redis instance (version 4.0.9) to store metadata.

JuiceFS mount command:

```
./juicefs format --storage=s3 --bucket=https://<BUCKET>.s3.<REGION>.amazonaws.com localhost benchmark
./juicefs mount --max-uploads=150 --io-retries=20 localhost /jfs
```

EFS mount command (the same as the configuration page):

```
mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, <EFS-ID>.efs.<REGION>.amazonaws.com:/ /efs
```

S3FS (version 1.82) mount command:

```
s3fs <BUCKET>:/s3fs /s3fs -o host=https://s3.<REGION>.amazonaws.com,endpoint=<REGION>,passwd_file=${HOME}/.passwd-s3fs
```

## Testing Result

![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg)


================================================
FILE: docs/en/benchmark/mdtest.md
================================================
---
title: Benchmark with mdtest
sidebar_position: 8
slug: /mdtest
---

:::tip
Trash is enabled in JuiceFS v1.0+ by default. As a result, temporary files are created and deleted in the file system during the benchmark, and these files will be eventually dumped into a directory named `.trash`. To avoid storage space being occupied by `.trash`, you can run command `juicefs config META-URL --trash-days 0` to disable Trash before benchmark. See [trash](../security/trash.md) for details.
:::

## Testing Approach

Perform a metadata test on JuiceFS, [EFS](https://aws.amazon.com/efs) and [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) with [mdtest](https://github.com/hpc/ior).

## Testing Tool

The following tests are performed with `mdtest` 3.4.
The arguments of `mdtest` are tuned to ensure that the command will finish within 5 minutes.

```
./mdtest -d /s3fs/mdtest -b 6 -I 8 -z 2
./mdtest -d /efs/mdtest -b 6 -I 8 -z 4
./mdtest -d /jfs/mdtest -b 6 -I 8 -z 4
```

## Testing Environment

All the following tests are performed using `mdtest` on a c5.large EC2 instance (2 CPU, 4G RAM) with Ubuntu 18.04 LTS (Kernel 5.4.0) operating system. The Redis (version 4.0.9) which JuiceFS uses runs on a c5.large EC2 instance in the same available zone to store metadata.

JuiceFS mount command:

```
./juicefs format --storage=s3 --bucket=https://<BUCKET>.s3.<REGION>.amazonaws.com localhost benchmark
nohup ./juicefs mount localhost /jfs &
```

EFS mount command (the same as the configuration page):

```
mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, <EFS-ID>.efs.<REGION>.amazonaws.com:/ /efs
```

S3FS (version 1.82) mount command:

```
s3fs <BUCKET>:/s3fs /s3fs -o host=https://s3.<REGION>.amazonaws.com,endpoint=<REGION>,passwd_file=${HOME}/.passwd-s3fs
```

## Testing Result

![Metadata Benchmark](../images/metadata-benchmark.svg)

### S3FS

```
mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s)
Command line used: ./mdtest '-d' '/s3fs/mdtest' '-b' '6' '-I' '8' '-z' '2'
WARNING: Read bytes is 0, thus, a read test will actually just open/close.
Path                : /s3fs/mdtest
FS                  : 256.0 TiB   Used FS: 0.0%   Inodes: 0.0 Mi   Used Inodes: -nan%
Nodemap: 1
1 tasks, 344 files/directories

SUMMARY rate: (of 1 iterations)
   Operation                      Max            Min           Mean        Std Dev
   ---------                      ---            ---           ----        -------
   Directory creation        :          5.977          5.977          5.977          0.000
   Directory stat            :        435.898        435.898        435.898          0.000
   Directory removal         :          8.969          8.969          8.969          0.000
   File creation             :          5.696          5.696          5.696          0.000
   File stat                 :         68.692         68.692         68.692          0.000
   File read                 :         33.931         33.931         33.931          0.000
   File removal              :         23.658         23.658         23.658          0.000
   Tree creation             :          5.951          5.951          5.951          0.000
   Tree removal              :          9.889          9.889          9.889          0.000
```

### EFS

```
mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s)
Command line used: ./mdtest '-d' '/efs/mdtest' '-b' '6' '-I' '8' '-z' '4'
WARNING: Read bytes is 0, thus, a read test will actually just open/close.
Path                : /efs/mdtest
FS                  : 8388608.0 TiB   Used FS: 0.0%   Inodes: 0.0 Mi   Used Inodes: -nan%
Nodemap: 1
1 tasks, 12440 files/directories

SUMMARY rate: (of 1 iterations)
   Operation                      Max            Min           Mean        Std Dev
   ---------                      ---            ---           ----        -------
   Directory creation        :        192.301        192.301        192.301          0.000
   Directory stat            :       1311.166       1311.166       1311.166          0.000
   Directory removal         :        213.132        213.132        213.132          0.000
   File creation             :        179.293        179.293        179.293          0.000
   File stat                 :        915.230        915.230        915.230          0.000
   File read                 :        371.012        371.012        371.012          0.000
   File removal              :        217.498        217.498        217.498          0.000
   Tree creation             :        187.906        187.906        187.906          0.000
   Tree removal              :        218.357        218.357        218.357          0.000
```

### JuiceFS

```
mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s)
Command line used: ./mdtest '-d' '/jfs/mdtest' '-b' '6' '-I' '8' '-z' '4'
WARNING: Read bytes is 0, thus, a read test will actually just open/close.
Path                : /jfs/mdtest
FS                  : 1024.0 TiB   Used FS: 0.0%   Inodes: 10.0 Mi   Used Inodes: 0.0%
Nodemap: 1
1 tasks, 12440 files/directories

SUMMARY rate: (of 1 iterations)
   Operation                      Max            Min           Mean        Std Dev
   ---------                      ---            ---           ----        -------
   Directory creation        :       1416.582       1416.582       1416.582          0.000
   Directory stat            :       3810.083       3810.083       3810.083          0.000
   Directory removal         :       1115.108       1115.108       1115.108          0.000
   File creation             :       1410.288       1410.288       1410.288          0.000
   File stat                 :       5023.227       5023.227       5023.227          0.000
   File read                 :       3487.947       3487.947       3487.947          0.000
   File removal              :       1163.371       1163.371       1163.371          0.000
   Tree creation             :       1503.004       1503.004       1503.004          0.000
   Tree removal              :       1119.806       1119.806       1119.806          0.000
```


================================================
FILE: docs/en/benchmark/metadata_engines_benchmark.md
================================================
---
title: Metadata Engines Benchmark
sidebar_position: 6
slug: /metadata_engines_benchmark
description: This article describes how to test and evaluate the performance of various metadata engines for JuiceFS using a real-world environment.
---

Conclusion first:

- For pure metadata operations, MySQL costs about 2~4x times of Redis; TiKV has similar performance to MySQL, and in most cases it costs a bit less; etcd costs about 1.5x times of TiKV.
- For small I/O (~100 KiB) workloads, total time costs with MySQL are about 1~3x of those with Redis; TiKV and etcd performs similarly to MySQL.
- For large I/O (~4 MiB) workloads, total time costs with different metadata engines show no significant difference (object storage becomes the bottleneck).

:::note

1. By changing `appendfsync` from `always` to `everysec`, Redis gains performance boost but loses a bit of data reliability. More information can be found [here](https://redis.io/docs/manual/persistence).
2. Both Redis and MySQL store only one replica locally, while TiKV and etcd stores three replicas on three different hosts using Raft protocol.

:::

Details are provided below. Please note all the tests are run with the same object storage (to save data), clients and metadata hosts, only metadata engines differ.

## Environment

### JuiceFS Version

1.1.0-beta1+2023-06-08.5ef17ba0

### Object Storage

Amazon S3

### Client Hosts

- Amazon c5.xlarge: 4 vCPUs, 8 GiB Memory, Up to 10 Gigabit Network
- Ubuntu 20.04.1 LTS

### Metadata Hosts

- Amazon c5d.xlarge: 4 vCPUs, 8 GiB Memory, Up to 10 Gigabit Network, 100 GB SSD (local storage for metadata engines)
- Ubuntu 20.04.1 LTS
- SSD is formatted as ext4 and mounted on `/data`

### Metadata Engines

#### Redis

- Version: [7.0.9](https://download.redis.io/releases/redis-7.0.9.tar.gz)
- Configuration:
  - `appendonly`: `yes`
  - `appendfsync`: `always` or `everysec`
  - `dir`: `/data/redis`

#### MySQL

- Version: 8.0.25
- `/var/lib/mysql` is bind mounted on `/data/mysql`

#### PostgreSQL

- Version: 15.3
- The data directory was changed to `/data/pgdata`

#### TiKV

- Version: 6.5.3
- Configuration:
  - `deploy_dir`: `/data/tikv-deploy`
  - `data_dir`: `/data/tikv-data`

#### etcd

- Version: 3.3.25
- Configuration:
  - `data-dir`: `/data/etcd`

#### FoundationDB

- Version: 6.3.23
- Configuration：
  - `data-dir`：`/data/fdb`

## Tools

All the following tests are run for each metadata engine.

### Golang Benchmark

Simple benchmarks within the source code: [`pkg/meta/benchmarks_test.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/benchmarks_test.go)

### JuiceFS Bench

JuiceFS provides a basic benchmark command:

```bash
./juicefs bench /mnt/jfs -p 4
```

### mdtest

- Version: mdtest-3.3.0

Run parallel tests on 3 client nodes:

```bash
$ cat myhost
client1 slots=4
client2 slots=4
client3 slots=4
```

Test commands:

```bash
# metadata only
mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -b 3 -z 1 -I 100 -u -d /mnt/jfs

# 12000 * 100KiB files
mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -F -w 102400 -I 1000 -z 0 -u -d /mnt/jfs
```

### fio

- Version: fio-3.28

```bash
fio --name=big-write --directory=/mnt/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=4 --end_fsync=1 --group_reporting
```

## Results

### Golang Benchmark

- Shows time cost (us/op). Smaller is better.
- Number in parentheses is the multiple of Redis-Always cost (`always` and `everysec` are candidates for Redis configuration `appendfsync`).
- Because of enabling metadata cache, the results of `read` are all less than 1us, which are not comparable for now.

|              | Redis-Always | Redis-Everysec | MySQL        | PostgreSQL   | TiKV       | etcd         | FoundationDB |
|--------------|--------------|----------------|--------------|--------------|------------|--------------|--------------|
| mkdir        | 558          | 468 (0.8)      | 2042 (3.7)   | 1076 (1.9)   | 1237 (2.2) | 1916 (3.4)   | 1842 (3.3)   |
| mvdir        | 693          | 621 (0.9)      | 2693 (3.9)   | 1459 (2.1)   | 1414 (2.0) | 2486 (3.6)   | 1895 (2.7)   |
| rmdir        | 717          | 648 (0.9)      | 3050 (4.3)   | 1697 (2.4)   | 1641 (2.3) | 2980 (4.2)   | 2088 (2.9)   |
| readdir_10   | 280          | 288 (1.0)      | 1350 (4.8)   | 1098 (3.9)   | 995 (3.6)  | 1757 (6.3)   | 1744 (6.2)   |
| readdir_1k   | 1490         | 1547 (1.0)     | 18779 (12.6) | 18414 (12.4) | 5834 (3.9) | 15809 (10.6) | 15276 (10.3) |
| mknod        | 562          | 464 (0.8)      | 1547 (2.8)   | 849 (1.5)    | 1211 (2.2) | 1838 (3.3)   | 1763 (3.1)   |
| create       | 570          | 455 (0.8)      | 1570 (2.8)   | 844 (1.5)    | 1209 (2.1) | 1849 (3.2)   | 1761 (3.1)   |
| rename       | 728          | 627 (0.9)      | 2735 (3.8)   | 1478 (2.0)   | 1419 (1.9) | 2445 (3.4)   | 1911 (2.6)   |
| unlink       | 658          | 567 (0.9)      | 2365 (3.6)   | 1280 (1.9)   | 1443 (2.2) | 2461 (3.7)   | 1940 (2.9)   |
| lookup       | 173          | 178 (1.0)      | 557 (3.2)    | 375 (2.2)    | 608 (3.5)  | 1054 (6.1)   | 1029 (5.9)   |
| getattr      | 87           | 86 (1.0)       | 530 (6.1)    | 350 (4.0)    | 306 (3.5)  | 536 (6.2)    | 504 (5.8)    |
| setattr      | 471          | 345 (0.7)      | 1029 (2.2)   | 571 (1.2)    | 1001 (2.1) | 1279 (2.7)   | 1596 (3.4)   |
| access       | 87           | 89 (1.0)       | 518 (6.0)    | 356 (4.1)    | 307 (3.5)  | 534 (6.1)    | 526 (6.0)    |
| setxattr     | 393          | 262 (0.7)      | 992 (2.5)    | 534 (1.4)    | 800 (2.0)  | 717 (1.8)    | 1300 (3.3)   |
| getxattr     | 84           | 87 (1.0)       | 494 (5.9)    | 333 (4.0)    | 303 (3.6)  | 529 (6.3)    | 511 (6.1)    |
| removexattr  | 215          | 96 (0.4)       | 697 (3.2)    | 385 (1.8)    | 1007 (4.7) | 1336 (6.2)   | 1597 (7.4)   |
| listxattr_1  | 85           | 87 (1.0)       | 516 (6.1)    | 342 (4.0)    | 303 (3.6)  | 531 (6.2)    | 515 (6.1)    |
| listxattr_10 | 87           | 91 (1.0)       | 561 (6.4)    | 383 (4.4)    | 322 (3.7)  | 565 (6.5)    | 529 (6.1)    |
| link         | 680          | 545 (0.8)      | 2435 (3.6)   | 1375 (2.0)   | 1732 (2.5) | 3058 (4.5)   | 2402 (3.5)   |
| symlink      | 580          | 448 (0.8)      | 1785 (3.1)   | 954 (1.6)    | 1224 (2.1) | 1897 (3.3)   | 1764 (3.0)   |
| newchunk     | 0            | 0 (0.0)        | 1 (0.0)      | 1 (0.0)      | 1 (0.0)    | 1 (0.0)      | 2 (0.0)      |
| write        | 553          | 369 (0.7)      | 2352 (4.3)   | 1183 (2.1)   | 1573 (2.8) | 1788 (3.2)   | 1747 (3.2)   |
| read_1       | 0            | 0 (0.0)        | 0 (0.0)      | 0 (0.0)      | 0 (0.0)    | 0 (0.0)      | 0 (0.0)      |
| read_10      | 0            | 0 (0.0)        | 0 (0.0)      | 0 (0.0)      | 0 (0.0)    | 0 (0.0)      | 0 (0.0)      |

### JuiceFS Bench

|                  | Redis-Always     | Redis-Everysec   | MySQL           | PostgreSQL      | TiKV            | etcd            | FoundationDB    |
|------------------|------------------|------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
| Write big file   | 730.84 MiB/s     | 731.93 MiB/s     | 729.00 MiB/s    | 744.47 MiB/s    | 730.01 MiB/s    | 746.07 MiB/s    | 744.70 MiB/s    |
| Read big file    | 923.98 MiB/s     | 892.99 MiB/s     | 905.93 MiB/s    | 895.88 MiB/s    | 918.19 MiB/s    | 939.63 MiB/s    | 948.81 MiB/s    |
| Write small file | 95.20 files/s    | 109.10 files/s   | 82.30 files/s   | 86.40 files/s   | 101.20 files/s  | 95.80 files/s   | 94.60 files/s   |
| Read small file  | 1242.80 files/s  | 937.30 files/s   | 752.40 files/s  | 1857.90 files/s | 681.50 files/s  | 1229.10 files/s | 1301.40 files/s |
| Stat file        | 12313.80 files/s | 11989.50 files/s | 3583.10 files/s | 7845.80 files/s | 4211.20 files/s | 2836.60 files/s | 3400.00 files/s |
| FUSE operation   | 0.41 ms/op       | 0.40 ms/op       | 0.46 ms/op      | 0.44 ms/op      | 0.41 ms/op      | 0.41 ms/op      | 0.44 ms/op      |
| Update meta      | 2.45 ms/op       | 1.76 ms/op       | 2.46 ms/op      | 1.78 ms/op      | 3.76 ms/op      | 3.40 ms/op      | 2.87 ms/op      |

### mdtest

- Shows rate (ops/sec). Bigger is better.

|                    | Redis-Always | Redis-Everysec | MySQL    | PostgreSQL | TiKV      | etcd     | FoundationDB |
|--------------------|--------------|----------------|----------|------------|-----------|----------|--------------|
| **EMPTY FILES**    |              |                |          |            |           |          |              |
| Directory creation | 4901.342     | 9990.029       | 1252.421 | 4091.934   | 4041.304  | 1910.768 | 3065.578     |
| Directory stat     | 289992.466   | 379692.576     | 9359.278 | 69384.097  | 49465.223 | 6500.178 | 17746.670    |
| Directory removal  | 5131.614     | 10356.293      | 902.077  | 1254.890   | 3210.518  | 1450.842 | 2460.604     |
| File creation      | 5472.628     | 9984.824       | 1326.613 | 4726.582   | 4053.610  | 1801.956 | 2908.526     |
| File stat          | 288951.216   | 253218.558     | 9135.571 | 233148.252 | 50432.658 | 6276.787 | 14939.411    |
| File read          | 64560.148    | 60861.397      | 8445.953 | 20013.027  | 18411.280 | 9094.627 | 11087.931    |
| File removal       | 6084.791     | 12221.083      | 1073.063 | 3961.855   | 3742.269  | 1648.734 | 2214.311     |
| Tree creation      | 80.121       | 83.546         | 34.420   | 61.937     | 77.875    | 56.299   | 74.982       |
| Tree removal       | 218.535      | 95.599         | 42.330   | 44.696     | 114.414   | 76.002   | 64.036       |
| **SMALL FILES**    |              |                |          |            |           |          |              |
| File creation      | 295.067      | 312.182        | 275.588  | 289.627    | 307.121   | 275.578  | 263.487      |
| File stat          | 54069.827    | 52800.108      | 8760.709 | 19841.728  | 14076.214 | 8214.318 | 10009.670    |
| File read          | 62341.568    | 57998.398      | 4639.571 | 19244.678  | 23376.733 | 5477.754 | 6533.787     |
| File removal       | 5615.018     | 11573.415      | 1061.600 | 3907.740   | 3411.663  | 1024.421 | 1750.613     |
| Tree creation      | 57.860       | 57.080         | 23.723   | 52.621     | 44.590    | 19.998   | 11.243       |
| Tree removal       | 96.756       | 65.279         | 23.227   | 19.511     | 27.616    | 17.868   | 10.571       |

### fio

|                 | Redis-Always | Redis-Everysec | MySQL     | PostgreSQL | TiKV      | etcd      | FoundationDB |
|-----------------|--------------|----------------|-----------|------------|-----------|-----------|--------------|
| Write bandwidth | 729 MiB/s    | 737 MiB/s      | 736 MiB/s | 768 MiB/s  | 731 MiB/s | 738 MiB/s | 745 MiB/s    |


================================================
FILE: docs/en/benchmark/performance_evaluation_guide.md
================================================
---
title: Performance Evaluation Guide
sidebar_position: 2
slug: /performance_evaluation_guide
---

Before starting performance testing, it is a good idea to write down a general description of usage scenario, including:

1. What is the application for? For example, Apache Spark, PyTorch, or a program you developed yourself
2. The requisite resource for running the application, including CPU, memory, network, and node size
3. The estimated data size, including the number of files and their volume
4. The file size and access mode (large or small files, sequential or random reads and writes)
5. Performance requirements, such as the amount of data to be written or read per second, QPS, operation latency, etc.

The clearer and more detailed the above description is, the easier it will be to prepare a suitable test plan and find the performance indicators that need to be focused on. Clear plans and good performance indicators are helpful for evaluating the application requirements from various aspects of the storage system, including JuiceFS metadata configuration, network bandwidth requirements, configuration parameters, etc. It is surely not easy to have all details in mind at the beginning, and some of the content can be clarified gradually during the testing process. Still, **it is essential to make the usage scenario descriptions mentioned above and the corresponding test methods, test data, and test results complete at the end of a full test**.

Even if the above is not yet clear, it does not matter. JuiceFS built-in test tools can get the core indicators of benchmark performance of the standalone machine just by a one-line command. This article also introduces two more JuiceFS built-in performance analysis tools, which provide a simple and clear way for more complex tests.

## Performance Testing Quick Start

An example of the basic usage of the JuiceFS built-in `bench` tool is shown below.

### Working Environment

- Host: Amazon EC2 c5.xlarge one
- OS: Ubuntu 20.04.1 LTS (Kernel `5.4.0-1029-aws`)
- Metadata Engine: Redis 6.2.3, storage (dir) configured on system disk
- Object Storage: Amazon S3
- JuiceFS Version: 0.17-dev (2021-09-23 2ec2badf)

### Attention

JuiceFS v1.0+ has Trash enabled by default, which means the benchmark tools will create and delete temporary files in the file system. These files will eventually be dumped to the `.trash` folder which consumes storage space. To avoid this, you can disable the Trash before benchmarking by running `juicefs config META-URL --trash-days 0`. See [trash](../security/trash.md) for details.

### `juicefs bench`

The [`juicefs bench`](../reference/command_reference.mdx#bench) command can help you do a quick performance test on a standalone machine. With the test results, it is easy to evaluate if your environment configuration and JuiceFS performance are normal. Assuming you have mounted JuiceFS to `/mnt/jfs` on your server, execute the following command for this test (the `-p` option is recommended to set to the number of CPU cores on the server). If you need help with initializing or mounting JuiceFS, please refer to [Create a File System](../getting-started/standalone.md#juicefs-format).

```bash
juicefs bench /mnt/jfs -p 4
```

The test results are presented in a table format, where `ITEM` represents the tested item, `VALUE` represents the processing capacity per second (throughput, number of files, number of operations, etc.), and `COST` represents the time required for each file or operation.

The results will be displayed in green, yellow, or red to differentiate performance. If there are red indicators in your results, please check the relevant configurations first. Feel free to post any problems you encountered in detail on [GitHub Discussions](https://github.com/juicedata/juicefs/discussions).

![bench](../images/bench-guide-bench.png)

The detailed `juicefs bench` performance test flows are shown below (The logic behind is very simple. Please take a look at the [source code](https://github.com/juicedata/juicefs/blob/main/cmd/bench.go) if you are interested).

1. N concurrent `write`, each to a large file of 1 GiB with IO size of 1 MiB
2. N concurrent `read`, each from the large file of 1 GiB previously written, with IO size of 1 MiB
3. N concurrent `write`, each to 100 small files of 128 KiB, with IO size of 128 KiB
4. N concurrent `read`, each from the 100 small files of 128 KiB previously written, with IO size of 128 KiB
5. N concurrent `stat`, each on the 100 small files of 128 KiB previously written
6. clean up the temporary directory for testing

The concurrency scale N could be provided through the `-p` option of the `bench` command.

Here's a performance comparison using a few common storage types provided by AWS.

- EFS with 1TiB capacity performs 150MiB/s of `read` and 50MiB/s of `write` at a cost of $0.08/GB-month.
- EBS st1 is a throughput-optimized HDD with a maximum throughput of 500MiB/s, a maximum IOPS (1MiB I/O) of 500, and a maximum capacity of 16TiB, priced at $0.045/GB-month.
- EBS gp2 is a universal SSD with a maximum throughput of 250MiB/s, maximum IOPS (16KiB I/O) of 16,000, and a maximum capacity of 16TiB, priced at $0.10/GB-month.

The above tests clearly show that JuiceFS performs much better than AWS EFS in terms of sequential read and write capabilities and than the commonly used EBS regarding throughput. However, the JuiceFS performance is not that outstanding when writing small files because each file written needs to be persisted to S3 and there is typically a fixed overhead of 10-30ms on calling the object storage API.

:::note
The performance of Amazon EFS is linearly related to capacity ([refer to the official documentation](https://docs.aws.amazon.com/efs/latest/ug/performance.html#performancemodes)), which makes it unsuitable for being used in high throughput scenarios with small data sizes.
:::

:::note
Prices refer to [AWS US East, Ohio Region](https://aws.amazon.com/ebs/pricing/?nc1=h_ls), differing slightly among regions.
:::

:::note
The data above is from [AWS official documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html), and the performance metrics are their maximum values. The actual performance of EBS is related to its volume capacity and instance type of mounted EC2. In general, the larger the volume and the higher the specification of EC2, the better the EBS performance will be, but not exceeding the maximum value mentioned above.
:::

### `juicefs objbench`

The [`juicefs objbench`](../reference/command_reference.mdx#objbench) command can run some tests on object storage to evaluate how well it performs as a backend storage for JuiceFS. Take testing Amazon S3 as an example:

```bash
juicefs objbench \
    --storage s3 \
    --access-key myAccessKey \
    --secret-key mySecretKey \
    https://mybucket.s3.us-east-2.amazonaws.com
```

The test results are shown in the figure below:

![JuiceFS Bench](../images/objbench.png)

Among them, the result `not support` indicates that the tested object storage does not support this feature.

#### Test flow

First perform object storage function test, the following are test cases:

1. Create bucket
2. Upload an object
3. Download an object
4. Download non-existent object
5. Get object part content
6. Get an object metadata
7. Delete an object
8. Delete non-existent object
9. List objects
10. Upload a large object
11. Upload a empty object
12. Multipart upload
13. Change the owner/group of a file (requires `root` permission)
14. Change permission
15. Change mtime (last modified time)

And then perform performance testing:

1. Upload `--small-objects` objects of `--small-object-size` size with `--threads` concurrency
2. Download the objects uploaded in step 1 and check the contents
3. Split the `--big-object-size` object of size according to the size of `--block-size` and upload it concurrently with `--threads`
4. Download the objects uploaded in step 3 and check the content, then clean up all objects uploaded to the object store in step 3
5. List all objects in the object store 100 times with `--threads` concurrency
6. Get meta information of all objects uploaded in step 1 with `--threads` concurrency
7. Change mtime (last modified time) of all objects uploaded in step 1 by `--threads` concurrency
8. Change permission of all objects uploaded in step 1 by `--threads` concurrency
9. Change owner/group of all objects uploaded in step 1 by `--threads` concurrency (requires `root` permission)
10. Remove all objects uploaded in step 1 with `--threads` concurrency

Finally clean up the test files.

## Performance Observation and Analysis Tools

The next two performance observation and analysis tools are essential tools for testing, using, and tuning JuiceFS.

### `juicefs stats`

The [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) command is a tool for real-time statistics of JuiceFS performance metrics, similar to the `dstat` command on Linux systems. It can display changes of metrics for JuiceFS clients in real-time. For this, create a new session and execute the following command when the `juicefs bench` is running:

```bash
juicefs stats /mnt/jfs --verbosity 1
```

The results are shown below, which would be easier to understand when combing with the `bench` performance test flows described above.

![bench-guide-stats](../images/bench-guide-stats.png)

Learn the meaning of indicators in [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats).

### `juicefs profile`

The [`juicefs profile`](../administration/fault_diagnosis_and_analysis.md#profile) command is used to output all [access logs](../administration/fault_diagnosis_and_analysis.md#access-log) of the JuiceFS client in real time, including information about each request. It can also be used to play back and count JuiceFS access logs, and visualize the JuiceFS running status. To run the JuiceFS profile, execute the following command in another session while the `juicefs bench` command is running.

```bash
cat /mnt/jfs/.accesslog > juicefs.accesslog
```

The `.accessslog` is a virtual file for JuiceFS access logs. It does not produce any data until it is read (e.g. by executing `cat`). Press <kbd>Ctrl</kbd> + <kbd>C</kbd> to terminate the `cat` command and run the following one.

```bash
juicefs profile juicefs.accesslog --interval 0
```

The `---interval` parameter sets the sampling interval for accessing the log. 0 means quickly replay the log file to generate statistics, as shown in the following figure.

![profile](../images/bench-guide-profile.png)

Based on the bench performance test flows as described above, a total of `(1 + 100) * 4 = 404` files were created during this test, and each file went through the process of "Create → Write → Close → Open → Read → Close → Delete". So there are a total of:

- 404 `create`, `open` and `unlink` requests
- 808 `flush` requests: `flush` is automatically invoked whenever a file is closed
- 33168 `write`/`read` requests: each large file takes 1024 1 MiB IOs on write, while the maximum size of a request at the FUSE level is 128 KiB by default. It means that each application IO is split into 8 FUSE requests, so there are `(1024 * 8 + 100) * 4 = 33168` requests. The read IOs work in a similar way, and so does its counting.

All these values correspond exactly to the results of `profile`. In addition, the test result shows that the average latency for the `write` operations is extremely low (45 μs). This is because JuiceFS `write` writes to a memory buffer first by default and then calls `flush` to upload data to the object storage when the file is closed, as expected.

## Other Test Tool Configuration Examples

:::tip
JuiceFS v1.0+ has Trash enabled by default. The benchmark process will create and delete temporary files in the file system, and these files will eventually be dumped to the `.trash` folder which consumes storage space. To avoid this, you can disable Trash before benchmarking by running `juicefs config META-URL --trash-days 0`. See [trash](../security/trash.md) for details.
:::

### Fio Standalone Performance Test

Fio is a common performance testing tool that can be used to do more complex performance tests after completing the JuiceFS bench.

#### Working Environment

Consistent with the JuiceFS Bench test environment described above.

#### Testing tasks

Perform the following 4 Fio tasks for sequential write, sequential read, random write, and random read tests.

Sequential write

```shell
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=write --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

Sequential read

```bash
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=read --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

Random write

```shell
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randwrite --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

Random read

```shell
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randread --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

Options explanation:

- `--name`: user-specified test name, which affects the test file name
- `--directory`: test directory
- `--ioengine`: the way to send IO when testing; usually `libaio` is used
- `--rw`: commonly used options are read, write, randread and randwrite, which stand for sequential read/write and random read/write, respectively
- `--bs`: the size of each IO
- `--size`: the total size of IO per thread; usually equal to the size of the test file
- `--numjobs`: number of concurrent test threads; each thread runs with an individual test file by default
- `--direct`: add the `O_DIRECT` flag bit on opening a file to disable system buffering, which can make the test results more stable and accurate

The results are as follows:

```bash
# Sequential
WRITE: bw=703MiB/s (737MB/s), 703MiB/s-703MiB/s (737MB/s-737MB/s), io=4096MiB (4295MB), run=5825-5825msec
READ: bw=817MiB/s (856MB/s), 817MiB/s-817MiB/s (856MB/s-856MB/s), io=4096MiB (4295MB), run=5015-5015msec

# Random
WRITE: bw=285MiB/s (298MB/s), 285MiB/s-285MiB/s (298MB/s-298MB/s), io=4096MiB (4295MB), run=14395-14395msec
READ: bw=93.6MiB/s (98.1MB/s), 93.6MiB/s-93.6MiB/s (98.1MB/s-98.1MB/s), io=4096MiB (4295MB), run=43773-43773msec
```

### Vdbench Multi-machine Performance Test

Vdbench is a commonly used file system evaluation tool, and supports multi-machine concurrent testing well.

#### Working Environment

Similar to the JuiceFS Bench test environment, but with two more hosts (three in total) with the same hardware specifications.

#### Preparation

vdbench needs to be installed under the same path on each node:

1. Download version 50406 from the [Official Website](https://www.oracle.com/downloads/server-storage/vdbench-downloads.html)
2. Install Java: `apt-get install openjdk-8-jre`
3. Verify that vdbench is installed successfully: `./vdbench -t`

Assuming the names of the three nodes are `node0`, `node1` and `node2`, you need to create a configuration file on `node0` as follows (to test reading and writing a large number of small files):

```bash
$ cat jfs-test
hd=default,vdbench=/root/vdbench50406,user=root
hd=h0,system=node0
hd=h1,system=node1
hd=h2,system=node2

fsd=fsd1,anchor=/mnt/jfs/vdbench,depth=1,width=100,files=3000,size=128k,shared=yes

fwd=default,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd1,host=h0
fwd=fwd2,host=h1
fwd=fwd3,host=h2

rd=rd1,fwd=fwd*,fwdrate=max,format=yes,elapsed=300,interval=1
```

Parameters description:

- `vdbench=/root/vdbench50406`: specifies the path where the vdbench tool is installed
- `anchor=/mnt/jfs/vdbench`: specifies the path to run test tasks on each node
- `depth=1,width=100,files=3000,size=128k`: defines the file tree structure of the test task, creating 100 more directories under the test directory, each contains 3000 files of 128 KiB, 300,000 files in total
- `operation=read,xfersize=128k,fileio=random,fileselect=random`: defines the actual test task, which randomly selects files to send 128 KiB size read requests

The results are as follows:

```
FILE_CREATES        Files created:                              300,000        498/sec
READ_OPENS          Files opened for read activity:             188,317        627/sec
```

The overall rate of 128 KiB file creating is 498 (files/s), while file reading rate is 627.

#### More References

Here are some profiles available for simple local evaluation of file system performance. The specific test set size and number of concurrencies can be adjusted according to the actual situation.

##### Sequential reading and writing of large files

All files are 1GiB in size, where `fwd1` is a large file for sequential writing, and `fwd2` is a large file for sequential reading.

```bash
$ cat local-big
fsd=fsd1,anchor=/mnt/jfs/local-big,depth=1,width=1,files=4,size=1g,openflags=o_direct

fwd=fwd1,fsd=fsd1,operation=write,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4
fwd=fwd2,fsd=fsd1,operation=read,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4

rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1
rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1
```

##### Random reading and writing of small files

All files are 128KiB in size, where `fwd1` is a small file for random writing, `fwd2` is a small file for random reading, and `fwd3` is a small file for random mixed reading/writing (ratio read/write = 7:3).

```bash
$ cat local-small
fsd=fsd1,anchor=/mnt/jfs/local-small,depth=1,width=20,files=2000,size=128k,openflags=o_direct

fwd=fwd1,fsd=fsd1,operation=write,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd2,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd3,fsd=fsd1,rdpct=70,xfersize=128k,fileio=random,fileselect=random,threads=4

rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1
rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1
rd=rd3,fwd=fwd3,fwdrate=max,format=restart,elapsed=120,interval=1
```


================================================
FILE: docs/en/community/_roadmap.md
================================================
---
title: Roadmap
sidebar_position: 3
---


================================================
FILE: docs/en/community/adopters.md
================================================
---
title: Adopters
sidebar_position: 1
slug: /adopters
---

| Company/Team | Industry & Use Cases | User Story |
|--------------|----------------------|------------|
| [NAVER](https://www.naver.com) | Search engine, Training, Inference | [NAVER, Korea's No.1 Search Engine, Chose JuiceFS over Alluxio for AI Storage](https://juicefs.com/en/blog/user-stories/juicefs-vs-alluxio-ai-storage-naver)  |
| [Character.AI](https://character.ai) | GenAI, Training |              |
| [Fal](https://fal.ai) | GenAI, Inference    |           |
| [BentoML](https://bentoml.com)  | GenAI, Inference  | [BentoML Reduced LLM Loading Time from 20+ to a Few Minutes with JuiceF](https://juicefs.com/en/blog/user-stories/accelerate-large-language-model-loading)   |
| [Lepton AI](https://www.lepton.ai) | GenAI, Training, Inference | [How Lepton AI Cut Cloud Storage Costs by 98% for AI Workflows with JuiceFS](https://juicefs.com/en/blog/user-stories/cloud-storage-artificial-intelligence-juicefs-vs-efs)          |
| [Graviti Diffus](https://www.diffus.graviti.com) | GenAI, Inference |      |
| [Plus.AI](https://plus.ai) | Autonomous driving, AI pipeline  |                     |
| [Jerry](https://getjerry.com) | Car insurance, Data platform | [Low-Cost Read/Write Separation: Jerry Builds a Primary-Replica ClickHouse Architecture](https://juicefs.com/en/blog/user-stories/read-write-separation) |
| [DJI](https://www.dji.com) | Drone & Autonomous driving, AI pipeline|             |
| [Clobotics](https://clobotics.com)   | Drone, AI pipeline  | [How Clobotics Overcame Multi-Cloud and Massive File Storage Challenges](https://juicefs.com/en/blog/user-stories/multi-cloud-storage-posix-compatible)     |
| [TP-LINK](https://www.tp-link.com) | AI      |       |
| [MemVerge](https://memverge.com)  | BioTech, High performance file store  |            |
| [MDI Biological Laboratory](https://mdibl.org) | BioTech, High performance file store |           |
| [Lawrence Berkeley Lab](https://www.lbl.gov) | BioTech, High performance file store |             |
| [American Museum of Natural History](https://www.amnh.org) | Non-profit, HPC, File sharing |     |
| [Argonne National Laboratory](https://www.anl.gov) | Non-profit |     |
| [Texas A&M Unversity](https://www.tamu.edu) | Education |     |
| [Simon Fraser University](https://www.sfu.ca) | Education |     |
| [University of Canberra](https://www.canberra.edu.au) | Education |     |
| [PITS Globale Datenrettungsdienste](https://www.pitsdatenrettung.de)  | Data recovery service, File Sharing   |           |
| [ExaLeap Semiconductor](https://exaleapsemi.com) | Semiconductor |     |
| [Cherry Digital](https://cherrydigital.com) | Digital media |      |
| [Shopee](https://shopee.com)  | E-commerce, Data platform  | [Shopee x JuiceFS: ClickHouse Cold and Hot Data Separation Storage Architecture and Practice](https://juicefs.com/en/blog/shopee-clickhouse-with-juicefs)    |
| [Zhihu](https://www.zhihu.com)  | Internet service, Training, Inference | [How Zhihu Ensures Stable Storage for LLM Training in Multi-Cloud Architecture](https://juicefs.com/en/blog/user-stories/ai-storage-llm-training-multi-cloud)   |
| [Grab](https://grab.com/sg)  | Data platform  |             |
| [CCB Fintech](https://www.ccbft.com)  | Fintech, AI, File sharing  |      |
| [Pingan Bank](https://pingan.com)  | Fintech, Data platform  |             |
| [Tongdun](https://tongdun.cn)  | Fintech, Data platform |        |
| [Yaoxin Financing Re-Guarantee](https://www.yaoxinhd.com)  | Data platform, File sharing      |          |
| [China Telecom](https://www.chinatelecomglobal.com)  | Telecom, Data platform | [Scaling Hadoop on cloud: Managing PB-Level Data through Separation of Compute and Storage with JuiceFS](https://juicefs.com/en/blog/user-stories/applicatio-of-juicefs-in-china-telecoms-daily-average-pb-data-scenario)   |
| [China Mobile Cloud](https://ecloud.he.chinamobile.com)  | Public cloud, Data platform  | [Improving Apache HBase Performance on Cloud with JuiceFS](https://juicefs.com/en/blog/user-stories/juicefs-support-hbase-at-chinamobile-cloud)    |
| [Volcano Engine](https://www.volcengine.com)  | Public cloud, File sharing, VFX rendering | [How JuiceFS Accelerates Edge Rendering Performance in Volcengine](https://juicefs.com/en/blog/user-stories/how-juicefs-accelerates-edge-rendering-performance-in-volcengine)   |
| [Kingsoft Cloud](https://en.ksyun.com)   | Public cloud, Data platform | [Storing Elasticsearch Warm/Cold Data on Object Storage with JuiceFS: A Guide by Kingsoft Cloud](https://juicefs.com/en/blog/user-stories/kingsoft-cloud-how-to-store-elasticsearch-data-in-objective-storage-with-juicefs) |
| [Piesat Information Technology Co., Ltd.](https://www.piesat.cn)   | GIS, File sharing   |         |
| [National Supercomputing Center in JiNan](https://www.nsccjn.cn)   |  HPC, AI    |              |
| [Xiaomi](https://www.mi.com/global)   | Consumer electronics, Training, Inference  | [Xiaomi: Building a Cloud-Native File Storage Platform to Host 5B+ Files in AI Training & More](https://juicefs.com/en/blog/user-stories/cloud-native-file-storage-platform-ai-training)  |
| [vivo](https://www.vivo.com)   | Consumer electronics, Training, Inference  |            |
| [SF Express](https://www.sf-express.com)  | Logistics, AI pipeline, File sharing   |           |
| [Unisound](https://www.unisound.com)  | AI, Training, Inference   | [Unisound’s HPC Platform accelerates AI model training and development with JuiceFS](https://juicefs.com/en/blog/unisounds-hpc-platform-accelerates-ai-model-training-and-development-with-juicefs)   |
| [Yimian](https://www.yimian.io)  | Consulting, Data platform   | [Yimian Migrated Hadoop to the Cloud: 2x Storage Capacity & Fewer Ops Costs](https://juicefs.com/en/blog/user-stories/migrating-hadoop-to-cloud-2x-storage-capacity-fewer-ops-costs)  |
| [Trip.com](https://www.trip.com)   | Internet service, Data platform, File sharing  | [Trip.com’s practice of massive cold data migrating to object storage with JuiceFS](https://juicefs.com/en/blog/user-stories/a-practice-of-massive-cold-data-migrating-to-oss-with-juicefs), [JuiceFS at Trip.com: Managing 10 PB of Data for Stable and Cost-Effective LLM Storage](https://juicefs.com/en/blog/user-stories/large-language-model-artificial-intelligence-storage-cost-effective)   |
| [Beike](https://ke.com)  | Internet service, AI pipline   | [Beike Loads AI Models 20x Faster with Hybrid Cloud Storage](https://juicefs.com/en/blog/user-stories/ai-model-accelerate)    |
| [Baidu](https://ir.baidu.com/company-overview)  | Internet Service    |             |
| [Tongcheng Travel](https://www.tongchengir.com)  | Internet service, File sharing    | [Tongcheng Travel Chose JuiceFS over CephFS to Manage Hundreds of Millions of Files](https://juicefs.com/en/blog/user-stories/juicefs-vs-cephfs-distributed-file-system-artificial-intelligence-storage)          |
| [Skyplatanus](https://www.kuaidianyuedu.com)  | Internet service, AI, File sharing |              |
| [NetEase Games](https://www.neteasegames.com)   | Gaming, Data platform, File sharing      | [50%+ Cut in Both Storage & Compute Costs: Designing NetEase Games' Cloud Big Data Platform](https://juicefs.com/en/blog/user-stories/cut-storage-compute-costs-cloud-big-data-platform)   |
| [Joyient](http://www.joyient.com)  | Gaming, File sharing, VFX rendering |            |
| [CVTE](http://www.cvte.com/en)  | Education, File sharing  |            |
| [Ricequant](https://www.ricequant.com)  | Quantitative trading, AI, File sharing   |         |
| [Dmall](https://www.dmall.com/en) | SaaS, Data platform, File sharing | [Why DMALL Switched to a Big Data Storage-Compute Decoupled Architecture](https://juicefs.com/en/blog/user-stories/storage-compute-decoupled-architecture-cloud-native-big-data)      |
| [Horizon Robotics](https://horizon.ai)  | Autonomous driving, AI pipeline |                 |
| [Li Auto](https://www.lixiang.com/en) | Automotive, Big Data, AI  | [Migrating on-Premises Hadoop to Cloud with JuiceFS: A Case Study from Li Auto](https://juicefs.com/en/blog/user-stories/li-autos-practice-of-migrating-data-from-hdfs-to-juicefs)   |
| [NIO Auto](https://www.nio.com)  | Automotive, AI, File sharing |         |
| [SAIC Motor](https://www.saicmotor.com/english)    | Automotive, AI   |          |
| [Wuling Auto](https://wuling.com) | Automotive, Data platform   |           |
| [coSence](https://www.coscene.io)  | Robotics, AI pipeline  | [coScene Chose JuiceFS over Alluxio to Tackle Object Storage Drawbacks](https://juicefs.com/en/blog/user-stories/juicefs-vs-alluxio-ai-robot-storage)          |
| [DP Technology](https://www.dp.tech)   | BioTech, AI pipeline  | [AI & HPC Workloads on Hybrid Cloud: Storage Challenges and Solutions](https://juicefs.com/en/blog/user-stories/storage-architectures-for-ai-hpc-in-hybridmulti-cloud)   |
| [Gene Way](https://www.geneway.cn)    | BioTech, File sharing  |              |
| [CoCalc](https://doc.cocalc.com/cloud_file_system.html) | Data Science, AI, Education, Cloud GPU's |   |

You are welcome to share your experience after using JuiceFS, either by submitting a Pull Request directly to this list, or by contacting us at [`hello@juicedata.io`](mailto:hello@juicedata.io).


================================================
FILE: docs/en/community/articles.md
================================================
---
title: JuiceFS Article Collection
sidebar_position: 2
slug: /articles
description: Explore JuiceFS' collection of technical articles and real-world case studies in AI, machine learning, deep learning, big data, data sharing, backup, and recovery scenarios.
---

JuiceFS is widely applicable to various data storage and sharing scenarios. This page compiles its technical articles and real-world case studies. Explore valuable insights and practical examples to deepen your understanding of JuiceFS and related applications. We encourage all community users to contribute and maintain this list.

## Articles sorted in categories

### AI, machine learning, and deep learning

- [How D-Robotics Manages Massive Small Files in a Multi-Cloud Environment with JuiceFS](https://juicefs.com/en/blog/user-stories/multi-cloud-store-massive-small-files), 2026-03-05, Han Zhao @ D-Robotics
- [From GlusterFS to JuiceFS: Lightillusions Achieved 2.5x Faster 3D AIGC Data Processing](https://juicefs.com/en/blog/user-stories/aigc-storage-glusterfs-cephfs-vs-juicefs), 2026-01-08, Weiyu Li @ Lightillusions
- [AI Data Storage: Challenges, Capabilities, and Comparative Analysis](https://juicefs.com/en/blog/solutions/ai-data-storage-challenges-capabilities-solution-comparison), 2025-12-18, Rui Su
- [JuiceFS+MinIO: Ariste AI Achieved 3x Faster I/O and Cut Storage Costs by 40%+](https://juicefs.com/en/blog/user-stories/quantitative-storage-artificial-intelligence-solution), 2025-12-11, Yutang Gao @ Ariste AI
- [NAS vs. Object Storage vs. JuiceFS: Storage Selection of Billion-Dollar Quantitative Firms](https://juicefs.com/en/blog/solutions/quant-research-storage-selection-nas-object-storage-juicefs), 2025-11-27, Jerry Cai
- [Building AI Inference with JuiceFS: Supporting Multi-Modal Complex I/O, Cross-Cloud, and Multi-Tenancy](https://juicefs.com/en/blog/solutions/ai-inference-multi-cloud-storage-multi-tenancy), 2025-10-23, Shaojie Li
- [Zelos Tech Manages Hundreds of Millions of Files for Autonomous Driving with JuiceFS](https://juicefs.com/en/blog/user-stories/multi-cloud-storage-autonomous-driving), 2025-10-09, Junyu Deng @ Zelos Tech
- [Why Gaoding Technology Chose JuiceFS for AI Storage in a Multi-Cloud Architecture](https://juicefs.com/en/blog/user-stories/multi-cloud-storage-artificial-intelligence-training), 2025-09-03, Jia Ke @ Gaoding Technology
- [StepFun Built an Efficient and Cost-Effective LLM Storage Platform with JuiceFS](https://juicefs.com/en/blog/user-stories/artificial-intelligence-storage-large-language-model-multimodal), 2025-07-31, Changxin Miao @ StepFun
- [INTSIG Built Unified Storage Based on JuiceFS to Support Petabyte-Scale AI Training](https://juicefs.com/en/blog/user-stories/artificial-intelligence-model-training-unified-storage-solution), 2025-07-24, Yifan Tang @ INTSIG
- [vivo Migrated from GlusterFS to a Distributed File System Built on JuiceFS](https://juicefs.com/en/blog/user-stories/glusterfs-vs-juicefs-ai-computing), 2025-07-17, Xiangyang Yu @ vivo
- [NFS to JuiceFS: Building a Scalable Storage Platform for LLM Training & Inference](https://juicefs.com/en/blog/user-stories/ai-storage-platform-large-language-model-training-inference), 2025-06-11, Wei Sun
- [BioMap Cut AI Model Storage Costs by 90% Using JuiceFS](https://juicefs.com/en/blog/user-stories/ai-storage-life-sciences-solution-juicefs-vs-lustre-alluxio), 2025-05-15, Zedong​​ ​​Zheng @ BioMap
- [JuiceFS at Trip.com: Managing 10 PB of Data for Stable and Cost-Effective LLM Storage](https://juicefs.com/en/blog/user-stories/large-language-model-artificial-intelligence-storage-cost-effective), 2025-03-13, Songlin Wu @ Trip.com
- [How Lepton AI Cut Cloud Storage Costs by 98% for AI Workflows with JuiceFS](https://juicefs.com/en/blog/user-stories/cloud-storage-artificial-intelligence-juicefs-vs-efs), 2025-02-07, Cong Ding @ Lepton AI
- [Tongcheng Travel Chose JuiceFS over CephFS to Manage Hundreds of Millions of Files](https://juicefs.com/en/blog/user-stories/juicefs-vs-cephfs-distributed-file-system-artificial-intelligence-storage), 2025-01-08, Chuanhai Wei @ Tongcheng Travel
- [LLM Storage Selection & Detailed Performance Analysis of JuiceFS](https://juicefs.com/en/blog/solutions/llm-storage-selection), 2024-10-23, Shaojie Li
- [MiniMax Built a Cost-Effective, High-Performance AI Platform with JuiceFS](https://juicefs.com/en/blog/user-stories/minimax-foundation-model-ai-storage), 2024-09-02
- [How JuiceFS Boosts Foundation Model Inference in Multi-Cloud Architectures](https://juicefs.com/en/blog/solutions/boost-foundation-model-inference-multi-cloud), 2024-08-29, Changjian Gao
- [Enhancing AI Training Workflows with JuiceFS](https://juicefs.com/en/blog/solutions/enhance-ai-training-workflow), 2024-08-27
- [vivo Migrated from GlusterFS to a Distributed File System for AI Training](https://juicefs.com/en/blog/user-stories/improve-ai-training), 2024-07-18, Yige Peng @ vivo
- [iSEE Lab Stores 500M+ Files on JuiceFS Replacing NFS](https://juicefs.com/en/blog/user-stories/deep-learning-ai-storage), 2024-07-03, Guohao Xu @ Sun Yat-sen University
- [Beike Loads AI Models 20x Faster with Hybrid Cloud Storage](https://juicefs.com/en/blog/user-stories/ai-model-accelerate), 2024-06-26, Tianqing Wang @ Beike
- [Low-Cost Read/Write Separation: Jerry Builds a Primary-Replica ClickHouse Architecture](https://juicefs.com/en/blog/user-stories/read-write-separation), 2024-05-29, Tao Ma @ Jerry
- [LLM Storage: Performance, Cost, and Multi-Cloud Architecture](https://juicefs.com/en/blog/solutions/llm-storage-performance-cost-multi-cloud), 2024-04-09, Sui Su
- [How Zhihu Ensures Stable Storage for LLM Training in Multi-Cloud Architecture](https://juicefs.com/en/blog/user-stories/ai-storage-llm-training-multi-cloud), 2024-04-03, Xin Wang @ Zhihu
- [BentoML Reduced LLM Loading Time from 20+ to a Few Minutes with JuiceFS](https://juicefs.com/en/blog/user-stories/accelerate-large-language-model-loading), 2024-02-29, Xipeng Guan @ BentoML
- [coScene Chose JuiceFS over Alluxio to Tackle Object Storage Drawbacks](https://juicefs.com/en/blog/user-stories/juicefs-vs-alluxio-ai-robot-storage), 2024-01-24, Juchao Song @ coScene
- [NAVER, Korea's No.1 Search Engine, Chose JuiceFS over Alluxio for AI Storage](https://juicefs.com/en/blog/user-stories/juicefs-vs-alluxio-ai-storage-naver), 2024-01-17, Nam Kyung-wan @ NAVER
- [Building an Easy-to-Operate AI Training Platform: SmartMore's Storage Selection & Best Practices](https://juicefs.com/en/blog/user-stories/ai-training-storage-selection-seaweedfs-juicefs), 2023-12-14, Jichuan Sun @ SmartMore
- [A Leading Self-Driving Company Chose JuiceFS over Amazon S3 and Alluxio in the Multi-Cloud Architecture](https://juicefs.com/en/blog/user-stories/data-storage-multi-cloud-autonomous-driving-juicefs), 2023-11-09
- [Choosing JuiceFS over s3fs and Alluxio for Our Ultra-Heterogeneous Computing Cluster](https://juicefs.com/en/blog/user-stories/high-performance-scale-out-heterogeneous-computing-power-cluster-storage), 2023-06-09, Chen Hong @ Zhejiang Lab
- [Achieving Elastic Throughput in the Cloud with a Distributed File System to Boost AI Training](https://juicefs.com/en/blog/solutions/accelerate-ai-training-flexible-elastic-throughput-cloud), 2023-05-06, Sui Su
- [Improving Read Performance by ~30% in AI Speech and Text Processing by a Distributed Storage System](https://juicefs.com/en/blog/user-stories/unisounds-hpc-platform-accelerates-ai-model-training-and-development-with-juicefs), 2022-09-06, Dongdong Lv @ Unisound

### Big data

- [From Object Storage to K8s+JuiceFS: 85% Storage Cost Cut, HDFS-Level Performance](https://juicefs.com/en/blog/user-stories/object-storage-kubernetes-hdfs), 2024-02-07, Experienced JuiceFS user
- [From Hadoop to Cloud: Why and How to Decouple Storage and Compute in Big Data Platforms](https://juicefs.com/en/blog/solutions/hadoop-cloud-decouple-storage-compute-big-data), 2023-11-01
- [Costs Cut & Ops Efficiency Boosted: Switching to a Big Data Storage-Compute Decoupled Architecture](https://juicefs.com/en/blog/user-stories/storage-compute-decoupled-architecture-cloud-native-big-data), 2023-09-28, Ming Li @ DMALL
- [50%+ Cut in Both Storage & Compute Costs: Designing NetEase Games' Cloud Big Data Platform](https://juicefs.com/en/blog/user-stories/cut-storage-compute-costs-cloud-big-data-platform), 2023-09-14, Weihong Ke @ NetEase Games
- [Migrating Hadoop to the Cloud: 2x Storage Capacity & Fewer Ops Costs](https://juicefs.com/en/blog/user-stories/migrating-hadoop-to-cloud-2x-storage-capacity-fewer-ops-costs), 2023-08-09, Chang Liu & Yangliang Li @ Yimian
- [Gaoding Technology Saves 60% Of Storage Cost Used By Elasticsearch](https://juicefs.com/en/blog/user-stories/gaoding-with-juicefs), 2021-10-09, Gaoding SRE Team
- [Shopee x JuiceFS: ClickHouse Cold and Hot Data Separation Storage Architecture and Practice](https://juicefs.com/en/blog/user-stories/shopee-clickhouse-with-juicefs), 2021-10-09, Teng @ Shopee
- [How to effectively reduce the load of HDFS cluster for Qutoutiao(NASDAQ:QTT)](https://juicefs.com/blog/en/posts/qutoutiao-big-data-platform-user-case)
- [How does the Globalegrow data platform achieve both speed and money savings?](https://juicefs.com/blog/en/posts/globalegrow-big-data-platform-user-case)
- [How to make HBase faster, more stable, and cheaper](https://juicefs.com/blog/en/posts/how-to-make-hbase-faster-more-stable-and-cheaper)
- [Exploring storage and computing separation for ClickHouse](https://juicefs.com/blog/en/posts/clickhouse-disaggregated-storage-and-compute-practice)

### Cloud-native & Kubernetes

- [Hai Robotics Achieved High Availability & Easy Operations in a Hybrid Cloud Architecture with JuiceFS](https://juicefs.com/en/blog/user-stories/high-availability-easy-operations-hybrid-cloud-ai-storage), 2024-11-27, Sendong Wu @ Hai Robotics
- [TAL: Building a Low-Operation Model Repository Based on JuiceFS in a Multi-Cloud Environment](https://juicefs.com/en/blog/user-stories/multi-cloud-llm-model-repository-storage), 2024-11-21, Longhua He @ TAL
- [Training LLMs: Best Practices for Storing Thousands of Nodes in K8s](https://juicefs.com/en/blog/usage-tips/train-large-language-model-kubernetes-storage), 2024-10-09, Weiwei Zhu
- [How Clobotics Overcame Multi-Cloud and Massive File Storage Challenges](https://juicefs.com/en/blog/user-stories/multi-cloud-storage-posix-compatible), 2024-09-11, Jonnas @ Clobotics
- [K8s Data Persistence: Getting Started with JuiceFS CSI Driver](https://juicefs.com/en/blog/usage-tips/kubernetes-data-persistence-juicefs), 2023-12-28, Herald Yu
- [Building a Cloud-Native File Storage Platform to Host 5B+ Files in AI Training & More](https://juicefs.com/en/blog/user-stories/cloud-native-file-storage-platform-ai-training), 2023-10-12, Jiapeng Sun @ Xiaomi
- [An Elastic Platform & Simplified Storage Achieved by Migrating to Spark+K8s+JuiceFS](https://juicefs.com/en/blog/user-stories/scalable-computing-unified-data-storage-ops-cloud-spark-k8s-juicefs), 2023-05-10, Fengyu Cao @ Douban

### Data sharing

- [Conda + JuiceFS: Enhancing AI Development Environment Sharing](https://juicefs.com/en/blog/usage-tips/improve-artificial-intelligence-development-environment-sharing), 2024-12-18, Herald Yu
- [Hugging Face + JuiceFS: Simplifying Model Sharing Across Multiple Users and Nodes](https://juicefs.com/en/blog/usage-tips/ai-model-storage-share-multi-users-nodes), 2024-10-17, Herald Yu
- [Ollama + JuiceFS: Pull Once, Run Anywhere](https://juicefs.com/en/blog/usage-tips/ollama-large-language-model), 2024-09-25, Weiwei Zhu
- [Building a Milvus Cluster Based on JuiceFS](https://juicefs.com/blog/en/posts/build-milvus-distributed-cluster-based-on-juicefs)

### Data backup and recovery

- [How JuiceFS 1.3 Backs Up 100 Million Files in Just Minutes](https://juicefs.com/en/blog/release-notes/juicefs-1-3-binary-backup), 2025-05-29, Jiefeng Huang
- [Trip.com’s practice of massive cold data migrating to object storage with JuiceFS](https://juicefs.com/en/blog/user-stories/a-practice-of-massive-cold-data-migrating-to-oss-with-juicefs), 2022-09-19, Miaocheng & Xiaofeng @ Trip.com
- [JuiceFS for archive NGINX logs](https://juicefs.com/docs/en/archive_nginx_log_in_juicefs.html)
- [JuiceFS for MySQL backup, verification and recovery](https://juicefs.com/docs/en/backup_mysql_in_juicefs.html)
- [Customer Stories: Xiachufang MySQL backup practice on JuiceFS](https://juicefs.com/blog/en/posts/xiachufang-mysql-backup-practice-on-juicefs)

### Engineering insights

- [The Design Journey of FUSE: From Kernel-Space to User-Space File Systems](https://juicefs.com/en/blog/engineering/design-fuse-kernel-user-space), 2026-02-14, Yuchao Xu
- [Design and Performance Optimization of juice sync for Enterprise Data Synchronization](https://juicefs.com/en/blog/engineering/design-performance-optimization-juice-sync), 2025-12-08, Jian Zhi
- [Deep Dive into the JuiceFS Garbage Collection Mechanism](https://juicefs.com/en/blog/engineering/juicefs-garbage-collection), 2025-11-06, Yuchao Xu
- [MLPerf Storage v2.0: JuiceFS Leads in Bandwidth Utilization and Scalability for AI Training](https://juicefs.com/en/blog/engineering/mlperf-storage-v2-ai-training-storage-performance), 2025-09-25, Feihu Mo
- [Achieving TB-Level Aggregate Bandwidth: How JuiceFS Optimized Distributed Cache Network](https://juicefs.com/en/blog/engineering/terabyte-aggregate-bandwidth-distributed-cache-network), 2025-09-18, Feihu Mo
- [JuiceFS on Windows: Challenges in the Beta Release](https://juicefs.com/en/blog/engineering/optimize-juicefs-on-windows), 2025-08-20, Ethan Chen
- [Deep Dive into JuiceFS Permission Management: Full Compatibility with Linux Security Mechanisms](https://juicefs.com/en/blog/engineering/linux-file-system-juicefs-access-management), 2025-06-26, Jiefeng Huang
- [Code-Level Analysis: Design Principles of JuiceFS Metadata and Data Storage](https://juicefs.com/en/blog/engineering/design-metadata-data-storage), 2024-12-12, Arthur
- [Deep Dive into JuiceFS Data Synchronization and Consistency in Multi-Cloud Architectures](https://juicefs.com/en/blog/engineering/data-synchronization-consistency-multi-cloud-storage), 2024-11-06
- [Optimizing JuiceFS Read Performance: Readahead, Prefetch, and Cache](https://juicefs.com/en/blog/engineering/optimize-read-performance), 2024-08-06, Feihu Mo
- [Smooth Upgrade: Implementation and Usage](https://juicefs.com/en/blog/engineering/smooth-upgrade), 2024-05-08, Jian Zhi
- [How We Optimized ACL Implementation for Minimal Performance Impact](https://juicefs.com/en/blog/engineering/access-control-list), 2024-04-30, Jiefeng Huang
- [98% GPU Utilization Achieved in 1k GPU-Scale AI Training Using Distributed Cache](https://juicefs.com/en/blog/engineering/ai-gpu-utilization-mlperf-benchmark), 2024-03-07, Feihu Mo
- [How a Distributed File System in Go Reduced Memory Usage by 90%](https://juicefs.com/en/blog/engineering/reduce-metadata-memory-usage), 2024-02-22, Sandy
- [How We Achieved a 40x Performance Boost in Metadata Backup and Recovery](https://juicefs.com/en/blog/engineering/increase-performance-metadata-backup-recovery), 2023-12-20, Jian Zhi
- [A Deep Dive into the Design of Directory Quotas in JuiceFS](https://juicefs.com/en/blog/engineering/design-juicefs-directory-quotas), 2023-10-26, Sandy

### Tutorial, guide, and best practice

- [JuiceFS Enterprise 5.3: 500B+ Files per File System & RDMA Support](https://juicefs.com/en/blog/release-notes/juicefs-enterprise-5-3-rdma-support), 2026-02-04, Sandy
- [How Just Two Cache Nodes Achieved 1.45 TB/s Throughput](https://juicefs.com/en/blog/solutions/cache-nodes-support-high-throughput), 2026-01-29, Jerry Cai
- [JuiceFS Writeback: The Write Acceleration Mechanism and Its Applicable Scenarios](https://juicefs.com/en/blog/solutions/juicefs-write-acceleration), 2025-09-11, Jerry Cai
- [JuiceFS Community 1.3: Python SDK, Faster Backup, SQL & Windows Optimizations](https://juicefs.com/en/blog/release-notes/juicefs-1-3-python-sdk-backup-sql-windows-optimization), 2025-07-09
- [JuiceFS 1.3 Beta 2 Integrates Apache Ranger for Fine-Grained Access Control](https://juicefs.com/en/blog/release-notes/juicefs-1-3-integrates-apache-ranger-access-control), 2025-06-18, Youpeng Tang
- [JuiceFS Enterprise Edition 5.2: Supporting Hundreds of Billions of Files and Windows Clients](https://juicefs.com/en/blog/release-notes/juicefs-5-2-windows-client), 2025-06-05
- [​​JuiceFS 1.3 Beta: Enhanced Support for SQL Databases, a New Option for Billion-Scale Metadata Management](https://juicefs.com/en/blog/release-notes/juicefs-1-3-support-sql-database), 2025-04-28, Fangxin Lou
- [Automated Cache Management: JuiceFS Enterprise Edition Introduces Cache Group Operator](https://juicefs.com/en/blog/usage-tips/automated-cache-management-cache-group-operator), 2025-01-16, Xuhui Zhang
- [Database Release and End-to-End Testing: Bringing Modern Software Development Best Practices to the Data World](https://juicefs.com/en/blog/user-stories/end-to-end-test-clickhouse-database-clone), 2024-12-04, Tao Ma @ Jerry
- [JuiceFS CSI: Smooth Upgrades of Mount Pods and Implementation Details](https://juicefs.com/en/blog/usage-tips/mount-pod-smooth-upgrade), 2024-11-13, Weiwei Zhu
- [Getting Started with the JuiceFS Python SDK](https://juicefs.com/en/blog/usage-tips/use-python-sdk), 2024-10-30, Herald Yu
- [JuiceFS CSI Workflow: K8s Pod Creation with PVs](https://juicefs.com/en/blog/usage-tips/csi-workflow-kubernetes-pod), 2024-09-30, Arthur
- [JuiceFS Enterprise 5.1: Write Support for Mirrors, Python SDK, and AI Enhancements](https://juicefs.com/en/blog/release-notes/uicefs-enterprise-5-1-artificial-intelligence), 2024-09-19
- [How to Check If a Database or Object Storage Is Used by JuiceFS](https://juicefs.com/en/blog/usage-tips/check-database-object-storage-in-use), 2024-08-22, Herald Yu
- [Metabit Trading Built a Cloud-Based Quantitative Research Platform with JuiceFS](https://juicefs.com/en/blog/user-stories/build-cloud-quantitative-platform-posix-compatible-storage), 2024-08-14, Jianhong Li @ Metabit Trading
- [Empowering NAS for AI Training with JuiceFS Direct-Mode NFS](https://juicefs.com/en/blog/usage-tips/direct-nfs)，2024-07-25，Herald Yu
- [How to Deploy SeaweedFS+TiKV for Using JuiceFS](https://juicefs.com/en/blog/usage-tips/seaweedfs-tikv), 2024-07-11, Jinhao Yang @ SmartMore
- [JuiceFS 1.2: Introducing Enterprise-Grade Permission Management and Smooth Upgrades](https://juicefs.com/en/blog/release-notes/juicefs-12), 2024-06-20
- [JuiceFS S3 Gateway: IAM and Bucket Event Notifications](https://juicefs.com/en/blog/usage-tips/s3-gateway), 2024-06-13, Herald Yu
- [Managing POSIX ACL Permissions in JuiceFS](https://juicefs.com/en/blog/usage-tips/manage-acl), 2024-06-06, Herald Yu
- [Data Sync in JuiceFS 1.2: Enhanced Selective Sync and Performance Optimizations](https://juicefs.com/en/blog/usage-tips/data-sync), 2024-05-16, Jian Zhi
- [JuiceFS 1.2: Gateway Upgrade, Enhanced Multi-User Permission Management](https://juicefs.com/en/blog/release-notes/juicefs-12-beta-1), 2024-04-22, Jian Zhi
- [How to Monitor the JuiceFS File System with Grafana Cloud](https://juicefs.com/en/blog/usage-tips/monitor-file-system-grafana-cloud), 2024-04-18, Herald Yu
- [How to Persist Data in Google Colab Using JuiceFS](https://juicefs.com/en/blog/usage-tips/colab-persist-data), 2024-03-27, Jet
- [How to Build a Ceph Cluster and Integrate with the JuiceFS File System](https://juicefs.com/en/blog/usage-tips/build-ceph-cluster-integrate-juicefs-file-system), 2023-12-07, Yifu Liu
- [6 Essential Tips for JuiceFS Users](https://juicefs.com/en/blog/usage-tips/juicefs-user-tips-distributed-file-storage-system), 2023-11-23, Herald Yu
- [What's New in JuiceFS Enterprise Edition 5.0](https://juicefs.com/en/blog/release-notes/juicefs-enterprise-edition-v5), 2023-11-20
- [Configuring Samba and NFS on JuiceFS to Unlock Unlimited Cloud Storage](https://juicefs.com/en/blog/usage-tips/scalable-cloud-storage-samba-nfs-shares-juicefs), 2023-08-29, Herald Yu
- [How to Store and Share AI Models for Stable Diffusion in the Cloud](https://juicefs.com/en/blog/usage-tips/share-store-model-data-stable-diffusion-cloud), 2023-07-19, Herald Yu
- [JuiceFS Enterprise Edition: Architecture, Features, and Community Edition Comparison](https://juicefs.com/en/blog/solutions/juicefs-enterprise-edition-features-vs-community-edition), 2023-06-06, Changjian Gao
- [How to Boost AI Model Training with a Distributed Storage System](https://juicefs.com/en/blog/usage-tips/how-to-use-juicefs-to-speed-up-ai-model-training), 2023-04-25, Changjian Gao
- [How To Use JuiceFS To Store Data On DigitalOcean](https://www.youtube.com/watch?v=pdFzyflcRGA&t=75s), Youtube video, by Education Ecosystem
- [Guidance on selecting metadata engine in JuiceFS](https://juicefs.com/en/blog/usage-tips/juicefs-metadata-engine-selection-guide), 2022-10-14, Sandy
- [The strengths and weaknesses of using Redis as the JuiceFS metadata engine](https://juicefs.com/en/blog/usage-tips/introduce-redis-as-juicefs-metadata-engine), 2022-07-22, Changjian Gao
- [How JuiceFS uses Redis as a Metastore](https://www.youtube.com/watch?v=P7H1H-Zj5oU&t=757s) on Redis Monthly Live with Davies Liu and Mikhail Volkov, YouTube video
- [Tutorial, how to use JuiceFS with Cloudflare R2](https://github.com/centminmod/centminmod-juicefs), George Liu (eva2000)
- [JuiceFS Source Code Analysis](https://github.com/dollarkillerx/juicefs-source-analysis), Dollarkillerx

### Others

- [3,000 Concurrent Renders: The JuiceFS Client for Windows Averages 22m 22s](https://juicefs.com/en/blog/solutions/juicefs-windows-performance-test), 2025-08-28, Jerry Cai
- [LanceDB Query Performance: NVMe vs. EBS vs. JuiceFS vs. EFS vs. FSx for Lustre](https://juicefs.com/en/blog/solutions/lancedb-query-performance-benchmark-storage-solutions), 2025-08-13, Brent Bai
- [How JuiceFS Transformed Idle Resources into a 70 GB/s Cache Pool](https://juicefs.com/en/blog/solutions/idle-resources-elastic-high-throughput-storage-cache-pool), 2025-08-07, Jerry Cai
- [Lustre vs. JuiceFS: A Comparative Analysis of Architecture, File Distribution, and Features](https://juicefs.com/en/blog/engineering/lustre-vs-juicefs-architecture-file-distribution-feature), 2025-07-02, Qing Liu
- [Introducing JuiceFS Python SDK: 3x Faster than FUSE for Data Loading](https://juicefs.com/en/blog/release-notes/juicefs-1-3-python-sdk), 2025-05-22, Feihu Mo
- [DeepSeek 3FS vs. JuiceFS: Architectures, Features, and Innovations in AI Storage](https://juicefs.com/en/blog/engineering/deepseek-3fs-vs-juicefs-architecture-feature), 2025-04-02, Qing Liu
- [How JuiceFS Achieves Consistency and Low-Latency Data Distribution in Multi-Cloud Architectures](https://juicefs.com/en/blog/solutions/consistency-low-latency-data-distribution-multi-cloud-storage), 2025-01-22, Jerry Cai
- [JuiceFS Evaluation with AWS EFS and FSx for Lustre](https://juicefs.com/en/blog/engineering/juicefs-vs-efs-fsx-for-lustre), 2024-08-07, Brent Bai
- [MemVerge Chose JuiceFS: Small File Writes 5x Faster than s3fs](https://juicefs.com/en/blog/user-stories/vs-s3fs-memverge), 2024-07-31, Jon Jiang @ MemVerge
- [From HPC to AI: Evolution and Performance Evaluation of File Systems](https://juicefs.com/en/blog/user-stories/hpc-ai-file-system), 2024-05-23, Weizheng Lu @ Renmin University of China
- [Is POSIX Really Unsuitable for Object Stores? A Data-Backed Answer](https://juicefs.com/en/blog/community/posix-object-store-suitable-file-system), 2023-11-16, Herald Yu
- [Comparative Analysis of Major Distributed File System Architectures: GFS vs. Tectonic vs. JuiceFS](https://juicefs.com/en/blog/engineering/compare-distributed-file-system-architectures-gfs-tectonic-juicefs), 2023-10-20, Changjian Gao
- [JuiceFS vs. SeaweedFS](https://juicefs.com/docs/community/comparison/juicefs_vs_seaweedfs), 2023-09-31, Yifu Liu
- [GlusterFS vs. JuiceFS](https://juicefs.com/en/blog/engineering/glusterfs-vs-juicefs-distributed-storage), 2023-09-21, Sandy

## Contribution

If you want to add JuiceFS application cases to this list, you can do so through the following methods:

### GitHub contribution

Feel free to contribute by creating a branch in this repository on GitHub. Add the title and URL of your case page to the appropriate category, and then submit a pull request for review. Our team will review the submission and merge the branch if approved.

### Social media

You can join the official JuiceFS [Slack channel](https://go.juicefs.com/slack). There, you can get in touch with any staff member to discuss your contribution.


================================================
FILE: docs/en/community/integrations.md
================================================
---
sidebar_label: Integrations
sidebar_position: 2
slug: /integrations
---

# Community Integrations

## SDK

- [Megvii](https://en.megvii.com) team contributed [Python SDK](https://github.com/megvii-research/juicefs-python).

## AI

- [UniSound](https://www.unisound.com) team participated in the development of [Fluid](https://github.com/fluid-cloudnative/fluid) JuiceFSRuntime cache engine, please refer to [this document](https://github.com/fluid-cloudnative/fluid/blob/master/docs/en/samples/juicefs_runtime.md).
- [PaddlePaddle](https://github.com/paddlepaddle/paddle) team has integrated JuiceFS into [Paddle Operator](https://github.com/PaddleFlow/paddle-operator), please refer to [the document](https://github.com/PaddleFlow/paddle-operator/blob/sampleset/docs/en/ext-overview.md).
- Build a distributed [Milvus](https://milvus.io) cluster based on JuiceFS, the Milvus team wrote a [case sharing](https://zilliz.com/blog/building-a-milvus-cluster-based-on-juicefs) and [tutorial](https://tutorials.milvus.io/en-juicefs/index.html?index=..%2F..index#0).

## Big data

- [Apache Kylin 4.0](http://kylin.apache.org) that is a OLAP engine could deploy with the JuiceFS in dissaggregated storage and compute architecture on every public cloud platform, there is [the video sharing](https://www.bilibili.com/video/BV1c54y1W72S) (in Chinese) and [the post](https://juicefs.com/en/blog/optimize-kylin-on-juicefs) for this use case.
- [Apache Hudi](https://hudi.apache.org) supports JuiceFS since v0.10.0, you can refer to [official documentation](https://hudi.apache.org/docs/jfs_hoodie) to learn how to configure JuiceFS.

## DevOps

- [Terraform Provider for JuiceFS](https://github.com/toowoxx/terraform-provider-juicefs) by Toowoxx IT GmbH, an IT service company from Germany

## Alfred

JuiceFS documents offers an Alfred workflow to search documents of JuiceFS with instant results

![JuiceFS Alfred Workflow](../images/workflow-root.png)

Simply type your keyword into Alfred (default: jfs) and provide a query to see instant search results from JuiceFS documents.

### Install

Workflow of Alfred 5 version: [Latest Download](https://github.com/zwwhdls/juicefs-alfred-workflow/releases/download/v0.2.0/JuiceFS.Search.alfredworkflow)

### Usage

Search all documents of JuiceFS, including community, enterprise and CSI:

```
# JuiceFS community documents
jfs ce <search>
# JuiceFS enterprise documents
jfs ee <search>
# JuiceFS csi documents
jfs csi <search>
```

![JuiceFS Alfred Workflow demo](../images/workflow-demo.gif)

### Workflow Variables

- `API_KEY`: API key for algolia which JuiceFS documents uses. Default value is ok.
- `LANGUAGE`: Language of JuiceFS documents to search. Default is `en`.
- `HITS_PER_PAGE`: Hits of each search. Default is `10`.

![JuiceFS Alfred Workflow configuration](../images/configuration.png)


================================================
FILE: docs/en/community/usage_tracking.md
================================================
---
title: Usage Tracking
sidebar_position: 4
---

JuiceFS by default collects and reports **anonymous** usage data. It only collects core metrics (e.g. version number, file system size), no user or any sensitive data will be collected. You could review related code [here](https://github.com/juicedata/juicefs/blob/main/pkg/usage/usage.go).

These data help us understand how the community is using this project. You could disable reporting easily by command line option `--no-usage-report`:

```
juicefs mount --no-usage-report
```


================================================
FILE: docs/en/deployment/_share_via_nfs.md
================================================
---
title: Deploy JuiceFS with NFS
sidebar_position: 5
---


================================================
FILE: docs/en/deployment/_share_via_smb.md
================================================
---
title: Deploy JuiceFS with SMB
sidebar_position: 6
---


================================================
FILE: docs/en/deployment/automation.md
================================================
---
title: Automated Deployment
sidebar_position: 7
---

Automated deployment is recommended when JuiceFS Client is to be installed on a large number of hosts.

Below examples only demonstrate the mount process, you should [Create a file system](../getting-started/standalone.md#juicefs-format) before getting started.

## Ansible

Below is the [Ansible](https://ansible.com) example to install and mount JuiceFS in localhost:

```yaml
- hosts: localhost
  tasks:
    - set_fact:
        # Change accordingly
        meta_url: sqlite3:///tmp/myjfs.db
        jfs_path: /jfs
        jfs_pkg: /tmp/juicefs-ce.tar.gz
        jfs_bin_dir: /usr/local/bin

    - get_url:
        # Change download URL accordingly
        url: https://d.juicefs.com/juicefs/releases/download/v1.0.2/juicefs-1.0.2-linux-amd64.tar.gz
        dest: "{{jfs_pkg}}"

    - ansible.builtin.unarchive:
        src: "{{jfs_pkg}}"
        dest: "{{jfs_bin_dir}}"
        include:
          - juicefs

    - name: Create symbolic for fstab
      ansible.builtin.file:
        src: "{{jfs_bin_dir}}/juicefs"
        dest: "/sbin/mount.juicefs"
        state: link

    - name: Mount JuiceFS and create fstab entry
      mount:
        path: "{{jfs_path}}"
        src: "{{meta_url}}"
        fstype: juicefs
        opts: _netdev
        state: mounted
```


================================================
FILE: docs/en/deployment/hadoop_java_sdk.md
================================================
---
title: Use JuiceFS on Hadoop Ecosystem
sidebar_position: 3
slug: /hadoop_java_sdk
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

JuiceFS provides [Hadoop-compatible File System](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/filesystem/introduction.html) by Hadoop Java SDK. Various applications in the Hadoop ecosystem can smoothly use JuiceFS to store data without changing the code.

## Requirements

### 1. Hadoop and related components

JuiceFS Hadoop Java SDK is compatible with Hadoop 2.x and Hadoop 3.x. As well as variety of components in Hadoop ecosystem.

### 2. User permissions

JuiceFS uses local "User/UID" and "Group/GID" mappings by default, and when used in a distributed environment, to avoid permission issues, please refer to [documentation](../administration/sync_accounts_between_multiple_hosts.md) synchronizes the "User/UID" and "Group/GID" that needs to be used to all Hadoop nodes. It is also possible to define a global user and group file to make all nodes in the cluster share the permission configuration. Please see [here](#other-configurations) for related configurations.

### 3. File system

You should first create at least one JuiceFS file system to provide storage for components related to the Hadoop ecosystem through the JuiceFS Java SDK. When deploying the Java SDK, specify the metadata engine address of the created file system in the configuration file.

To create a file system, please refer to [our quick start](../getting-started/standalone.md).

:::note
If you want to use JuiceFS in a distributed environment, when creating a file system, please plan the object storage and database to be used reasonably to ensure that they can be accessed by each node in the cluster.
:::

### 4. Memory

Depending on the read and write load of computing tasks (such as Spark executor), JuiceFS Hadoop Java SDK may require an additional 4 * [`juicefs.memory-size`](#io-configurations) off-heap memory to speed up read and write performance. By default, it is recommended to configure at least 1.2GB of off-heap memory for compute tasks.

### 5. Java runtime version

JuiceFS Hadoop Java SDK is compiled with JDK 8 by default. If it needs to be used in a higher version of Java runtime (such as Java 17), the following options need to be added to the JVM parameters to allow the use of reflection API:

```shell
--add-exports=java.base/sun.nio.ch=ALL-UNNAMED
```

For more information on the above option, please refer to [official documentation](https://docs.oracle.com/en/java/javase/17/migrate/migrating-jdk-8-later-jdk-releases.html#GUID-7BB28E4D-99B3-4078-BDC4-FC24180CE82B).

## Install and compile the client

### Install the pre-compiled client

Please refer to the ["Installation"](../getting-started/installation.md#install-the-pre-compiled-client) document to learn how to download the precompiled JuiceFS Hadoop Java SDK.

### Compile the client manually

:::note
No matter which system environment the client is compiled for, the compiled JAR file has the same name and can only be deployed in the matching system environment. For example, when compiled in Linux, it can only be used in the Linux environment. In addition, since the compiled package depends on glibc, it is recommended to compile with a lower version system to ensure better compatibility.
:::

Compilation depends on the following tools:

- [Go](https://golang.org) 1.20+
- JDK 8+
- [Maven](https://maven.apache.org) 3.3+
- Git
- make
- GCC 5.4+

#### Linux and macOS

Clone the repository:

```shell
git clone https://github.com/juicedata/juicefs.git
```

Enter the directory and compile:

```shell
cd juicefs/sdk/java
make
```

:::note
If Ceph RADOS is used to store data, you need to install `librados-dev` first and [build `libjfs.so`]`.
:::

```shell
cd juicefs/sdk/java
make ceph
```

After the compilation, you can find the compiled `JAR` file in the `sdk/java/target` directory, including two versions:

- Contains third-party dependent packages: `juicefs-hadoop-X.Y.Z.jar`
- Does not include third-party dependent packages: `original-juicefs-hadoop-X.Y.Z.jar`

It is recommended to use a version that includes third-party dependencies.

#### Windows

The client used in the Windows environment needs to be obtained through cross-compilation on Linux or macOS. The compilation depends on [mingw-w64](https://www.mingw-w64.org), which needs to be installed first.

The steps are the same as compiling on Linux or macOS. For example, on the Ubuntu system, install the `mingw-w64` package first to solve the dependency problem:

```shell
sudo apt install mingw-w64
```

Clone and enter the JuiceFS source code directory, execute the following code to compile:

```shell
cd juicefs/sdk/java
```

```shell
make win
```

## Deploy the client

To enable each component of the Hadoop ecosystem to correctly identify JuiceFS, the following configurations are required:

1. Place the compiled JAR file and `$JAVA_HOME/lib/tools.jar` into the `classpath` of the component. The installation paths of common big data platforms and components are shown in the table below.
2. Put JuiceFS configurations into the configuration file of each Hadoop ecosystem component (usually `core-site.xml`), see [Client Configurations](#client-configurations) for details.

It is recommended to place the JAR file in a fixed location, and the other locations are called it through symbolic links.

### Big Data Platforms

| Name              | Installing Paths                                                                                                                                                                                                                                                                                                                               |
|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| CDH               | `/opt/cloudera/parcels/CDH/lib/hadoop/lib`<br></br>`/opt/cloudera/parcels/CDH/spark/jars`<br></br>`/var/lib/impala`                                                                                                                                                                                                                            |
| HDP               | `/usr/hdp/current/hadoop-client/lib`<br></br>`/usr/hdp/current/hive-client/auxlib`<br></br>`/usr/hdp/current/spark2-client/jars`                                                                                                                                                                                                               |
| Amazon EMR        | `/usr/lib/hadoop/lib`<br></br>`/usr/lib/spark/jars`<br></br>`/usr/lib/hive/auxlib`                                                                                                                                                                                                                                                             |
| Alibaba Cloud EMR | `/opt/apps/ecm/service/hadoop/*/package/hadoop*/share/hadoop/common/lib`<br></br>`/opt/apps/ecm/service/spark/*/package/spark*/jars`<br></br>`/opt/apps/ecm/service/presto/*/package/presto*/plugin/hive-hadoop2`<br></br>`/opt/apps/ecm/service/hive/*/package/apache-hive*/lib`<br></br>`/opt/apps/ecm/service/impala/*/package/impala*/lib` |
| Tencent Cloud EMR | `/usr/local/service/hadoop/share/hadoop/common/lib`<br></br>`/usr/local/service/presto/plugin/hive-hadoop2`<br></br>`/usr/local/service/spark/jars`<br></br>`/usr/local/service/hive/auxlib`                                                                                                                                                   |
| UCloud UHadoop    | `/home/hadoop/share/hadoop/common/lib`<br></br>`/home/hadoop/hive/auxlib`<br></br>`/home/hadoop/spark/jars`<br></br>`/home/hadoop/presto/plugin/hive-hadoop2`                                                                                                                                                                                  |
| Baidu Cloud EMR   | `/opt/bmr/hadoop/share/hadoop/common/lib`<br></br>`/opt/bmr/hive/auxlib`<br></br>`/opt/bmr/spark2/jars`                                                                                                                                                                                                                                        |

### Community Components

| Name      | Installing Paths                                                                        |
|-----------|-----------------------------------------------------------------------------------------|
| Hadoop    | `${HADOOP_HOME}/share/hadoop/common/lib/`, `${HADOOP_HOME}/share/hadoop/mapreduce/lib/` |
| Spark     | `${SPARK_HOME}/jars`                                                                    |
| Presto    | `${PRESTO_HOME}/plugin/hive-hadoop2`                                                    |
| Trino     | `${TRINO_HOME}/plugin/hive`                                                             |
| Flink     | `${FLINK_HOME}/lib`                                                                     |
| StarRocks | `${StarRocks_HOME}/fe/lib/`, `${StarRocks_HOME}/be/lib/hadoop/common/lib`               |

### Client Configurations

Please refer to the following table to set the relevant parameters of the JuiceFS file system and write it into the configuration file, which is generally `core-site.xml`.

#### Core Configurations

| Configuration                    | Default Value                | Description                                                                                                                                                                                                                                                                                  |
|----------------------------------|------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `fs.jfs.impl`                    | `io.juicefs.JuiceFileSystem` | Specify the storage implementation to be used. By default, `jfs://` scheme is used. If you want to use different scheme (e.g. `cfs://`), just modify it to `fs.cfs.impl`. No matter what scheme you use, it is always access the data in JuiceFS.                                             |
| `fs.AbstractFileSystem.jfs.impl` | `io.juicefs.JuiceFS`         | Specify the storage implementation to be used. By default, `jfs://` scheme is used. If you want to use different scheme (e.g. `cfs://`), just modify it to `fs.AbstractFileSystem.cfs.impl`. No matter what scheme you use, it is always access the data in JuiceFS.                          |
| `juicefs.meta`                   |                              | Specify the metadata engine address of the pre-created JuiceFS file system. You can configure multiple file systems for the client at the same time through the format of `juicefs.{vol_name}.meta`. Refer to ["Multiple file systems configuration"](#multiple-file-systems-configuration). |

#### Cache Configurations

| Configuration                | Default Value | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
|------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `juicefs.cache-dir`          | `memory`      | Directory paths of local cache. Use colon to separate multiple paths. Also support wildcard in path. **It's recommended create these directories manually and set `0777` permission so that different applications could share the cache data.** If not specified, default to process memory.                                                                                                                                                                                                               |
| `juicefs.cache-size`         | 100           | Maximum size of local cache in MiB. Default size is small because Hadoop SDK uses memory as default cache location. It's the total size when set multiple cache directories.                                                                                                                                                                                                                                                                                                                                |
| `juicefs.cache-full-block`   | `true`        | Whether cache every read blocks, `false` means only cache random/small read blocks.                                                                                                                                                                                                                                                                                                                                                                                                                         |
| `juicefs.free-space`         | 0.1           | Min free space ratio of cache directory                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
| `juicefs.open-cache`         | 0             | Open files cache timeout in seconds (0 means disable this feature)                                                                                                                                                                                                                                                                                                                                                                                                                                          |
| `juicefs.attr-cache`         | 0             | Expire of attributes cache in seconds                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
| `juicefs.entry-cache`        | 0             | Expire of file entry cache in seconds                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
| `juicefs.dir-entry-cache`    | 0             | Expire of directory entry cache in seconds                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
| `juicefs.discover-nodes-url` |               | Specify the node discovery API, the node list will be refreshed every 10 minutes. <br/><br/><ul><li>YARN: `yarn`</li><li>Spark Standalone: `http://spark-master:web-ui-port/json/`</li><li>Spark ThriftServer: `http://thrift-server:4040/api/v1/applications/`</li><li>Presto: `http://coordinator:discovery-uri-port/v1/service/presto/`</li><li>File system: `jfs://{VOLUME}/etc/nodes`, you need to create this file manually, and write the hostname of the node into this file line by line</li></ul> |

#### I/O Configurations

| Configuration            | Default Value | Description                                     |
|--------------------------|---------------|-------------------------------------------------|
| `juicefs.max-uploads`    | 20            | The max number of connections to upload         |
| `juicefs.max-downloads`  | 200           | The max number of connections to download       |
| `juicefs.max-deletes`    | 10            | The max number of connections to delete         |
| `juicefs.get-timeout`    | 5             | The max number of seconds to download an object |
| `juicefs.put-timeout`    | 60            | The max number of seconds to upload an object   |
| `juicefs.memory-size`    | 300           | Total read/write buffering in MiB               |
| `juicefs.prefetch`       | 1             | Prefetch N blocks in parallel                   |
| `juicefs.upload-limit`   | 0             | Bandwidth limit for upload in Mbps              |
| `juicefs.download-limit` | 0             | Bandwidth limit for download in Mbps            |
| `juicefs.io-retries`     | 10            | Number of retries after network failure         |
| `juicefs.writeback`      | `false`       | Upload objects in background                    |

#### Other Configurations

| Configuration           | Default Value | Description                                                                                                                                                                 |
|-------------------------|---------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `juicefs.bucket`        |               | Specify a different endpoint for object storage                                                                                                                             |
| `juicefs.debug`         | `false`       | Whether enable debug log                                                                                                                                                    |
| `juicefs.access-log`    |               | Access log path. Ensure Hadoop application has write permission, e.g. `/tmp/juicefs.access.log`. The log file will rotate  automatically to keep at most 7 files.           |
| `juicefs.superuser`     | `hdfs`        | The super user                                                                                                                                                              |
| `juicefs.supergroup`    | `supergroup`  | The super user group                                                                                                                                                        |
| `juicefs.users`         | `null`        | The path of username and UID list file, e.g. `jfs://name/etc/users`. The file format is `<username>:<UID>`, one user per line.                                              |
| `juicefs.groups`        | `null`        | The path of group name, GID and group members list file, e.g. `jfs://name/etc/groups`. The file format is `<group-name>:<GID>:<username1>,<username2>`, one group per line. |
| `juicefs.umask`         | `null`        | The umask used when creating files and directories (e.g. `0022`), default value is `fs.permissions.umask-mode`.                                                             |
| `juicefs.push-gateway`  |               | [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) address, format is `<host>:<port>`.                                                                     |
| `juicefs.push-auth`     |               | [Prometheus basic auth](https://prometheus.io/docs/guides/basic-auth) information, format is `<username>:<password>`.                                                       |
| `juicefs.push-graphite` |               | [Graphite](https://graphiteapp.org) address, format is `<host>:<port>`.                                                                                                     |
| `juicefs.push-remote-write` |           | [Prometheus remote write](https://prometheus.io/docs/specs/prw/remote_write_spec) endpoint, format is `http://<host>:<port>`. |
| `juicefs.push-remote-write-auth` |       | Authentication for remote write endpoint, format is `<username>:<password>`. |
| `juicefs.push-interval` | 10            | Metric push interval (in seconds)                                                                                                                                           |
| `juicefs.push-labels`   |               | Metric labels, format is `key1:value1;key2:value2`.                                                                                                                         |
| `juicefs.fast-resolve`  | `true`        | Whether enable faster metadata lookup using Redis Lua script                                                                                                                |
| `juicefs.no-usage-report` | `false`       | Whether disable usage reporting. JuiceFS only collects anonymous usage data (e.g. version number), no user or any sensitive data will be collected.                         |
| `juicefs.no-bgjob`      | `false`       | Disable background jobs (clean-up, backup, etc.)                                                                                                                            |
| `juicefs.backup-meta`   | 3600          | Interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup)                                                                       |
| `juicefs.backup-skip-trash` | `false`       | Skip files and directories in trash when backup metadata.                                                                                                                   |
| `juicefs.heartbeat`     | 12            | Heartbeat interval (in seconds) between client and metadata engine. It's recommended that all clients use the same value.                                                   |
| `juicefs.skip-dir-mtime`              | 100ms         | Minimal duration to modify parent dir mtime.                                                                                                                                |
| `juicefs.subdir`        |               | Allow access only to the subpaths of this directory. Multiple paths can be specified, separated by commas. All other paths, including the root or sibling directories, will be denied access.                                     |

#### Multiple file systems configuration

When multiple JuiceFS file systems need to be used at the same time, all the above configuration items can be specified for a specific file system. You only need to put the file system name in the middle of the configuration item, such as `jfs1` and `jfs2` in the following example:

```xml
<property>
  <name>juicefs.jfs1.meta</name>
  <value>redis://jfs1.host:port/1</value>
</property>
<property>
  <name>juicefs.jfs2.meta</name>
  <value>redis://jfs2.host:port/1</value>
</property>
```

#### Configuration Example

The following is a commonly used configuration example. Please replace the `{HOST}`, `{PORT}` and `{DB}` variables in the `juicefs.meta` configuration with actual values.

```xml
<property>
  <name>fs.jfs.impl</name>
  <value>io.juicefs.JuiceFileSystem</value>
</property>
<property>
  <name>fs.AbstractFileSystem.jfs.impl</name>
  <value>io.juicefs.JuiceFS</value>
</property>
<property>
  <name>juicefs.meta</name>
  <value>redis://{HOST}:{PORT}/{DB}</value>
</property>
<property>
  <name>juicefs.cache-dir</name>
  <value>/data*/jfs</value>
</property>
<property>
  <name>juicefs.cache-size</name>
  <value>1024</value>
</property>
<property>
  <name>juicefs.access-log</name>
  <value>/tmp/juicefs.access.log</value>
</property>
```

## Configuration in Hadoop

Please refer to the aforementioned configuration tables and add configuration parameters to the Hadoop configuration file `core-site.xml`.

### CDH6

If you are using CDH 6, in addition to modifying `core-site`, you also need to modify `mapreduce.application.classpath` through the YARN service interface, adding:

```shell
$HADOOP_COMMON_HOME/lib/juicefs-hadoop.jar
```

### HDP

In addition to modifying `core-site`, you also need to modify the configuration `mapreduce.application.classpath` through the MapReduce2 service interface and add it at the end (variables do not need to be replaced):

```shell
/usr/hdp/${hdp.version}/hadoop/lib/juicefs-hadoop.jar
```

### Flink

Add configuration parameters to `conf/flink-conf.yaml`. If you only use JuiceFS in Flink, you don't need to configure JuiceFS in the Hadoop environment, you only need to configure the Flink client.

### Hudi

:::note
Hudi supports JuiceFS since v0.10.0, please make sure you are using the correct version.
:::

Please refer to ["Hudi Official Documentation"](https://hudi.apache.org/docs/jfs_hoodie) to learn how to configure JuiceFS.

### Kafka Connect

It is possible to use Kafka Connect and HDFS Sink Connector（[HDFS 2](https://docs.confluent.io/kafka-connect-hdfs/current/overview.html) and [HDFS 3](https://docs.confluent.io/kafka-connect-hdfs3-sink/current/overview.html)）to store data on JuiceFS.

First you need to add JuiceFS SDK to `classpath` in Kafka Connect, e.g., `/usr/share/java/confluentinc-kafka-connect-hdfs/lib`.

While creating a Connect Sink task, configuration needs to be set up as follows:

- Specify `hadoop.conf.dir` as the directory that contains the configuration file `core-site.xml`. If it is not running in Hadoop environment, you can create a separate directory such as `/usr/local/juicefs/hadoop`, and then add the JuiceFS related configurations to `core-site.xml`.
- Specify `store.url` as a path starting with `jfs://`.

For example:

```ini
# Other configuration items are omitted.
hadoop.conf.dir=/path/to/hadoop-conf
store.url=jfs://path/to/store
```

### HBase

JuiceFS can be used by HBase for HFile, but is not fast (low latency) enough for Write Ahead Log (WAL), because it take much longer time to persist data into object storage than memory of DataNode.

It is recommended to deploy a small HDFS cluster to store WAL and HFile files to be stored on JuiceFS.

#### Create a new HBase cluster

Modify `hbase-site.xml`:

```xml title="hbase-site.xml"
<property>
  <name>hbase.rootdir</name>
  <value>jfs://{vol_name}/hbase</value>
</property>
<property>
  <name>hbase.wal.dir</name>
  <value>hdfs://{ns}/hbase-wal</value>
</property>
```

#### Modify existing HBase cluster

In addition to modifying the above configurations, since the HBase cluster has already stored some data in ZooKeeper, in order to avoid conflicts, there are two solutions:

1. Delete the old cluster

   Delete the znode (default `/hbase`) configured by `zookeeper.znode.parent` via the ZooKeeper client.

   :::note
   This operation will delete all data on this HBase cluster.
   :::

2. Use a new znode

   Keep the znode of the original HBase cluster so that it can be recovered later. Then configure a new value for `zookeeper.znode.parent`:

   ```xml title="hbase-site.xml"
   <property>
     <name>zookeeper.znode.parent</name>
     <value>/hbase-jfs</value>
   </property>
   ```

### Restart Services

When the following components need to access JuiceFS, they should be restarted.

:::note
Before restart, you need to confirm JuiceFS related configuration has been written to the configuration file of each component, usually you can find them in `core-site.xml` on the machine where the service of the component was deployed.
:::

| Components | Services                   |
| ---------- | -------------------------- |
| Hive       | HiveServer<br />Metastore  |
| Spark      | ThriftServer               |
| Presto     | Coordinator<br />Worker    |
| Impala     | Catalog Server<br />Daemon |
| HBase      | Master<br />RegionServer   |

HDFS, Hue, ZooKeeper and other services don't need to be restarted.

When `Class io.juicefs.JuiceFileSystem not found` or `No FilesSystem for scheme: jfs` exceptions was occurred after restart, reference [FAQ](#faq).

### Trash

JuiceFS Hadoop Java SDK also has the same trash function as HDFS, which needs to be enabled by setting `fs.trash.interval` and `fs.trash.checkpoint.interval`, please refer to [HDFS documentation](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html#File_Deletes_and_Undeletes) for more information.

## Environmental Verification

After the deployment of the JuiceFS Java SDK, the following methods can be used to verify the success of the deployment.

### Hadoop CLI

```bash
hadoop fs -ls jfs://{JFS_NAME}/
```

:::info
The `JFS_NAME` is the volume name when you format JuiceFS file system.
:::

### Hive

```sql
CREATE TABLE IF NOT EXISTS person
(
  name STRING,
  age INT
) LOCATION 'jfs://{JFS_NAME}/tmp/person';
```

### Java/Scala project

1. Add Maven or Gradle dependencies:

   <Tabs>
     <TabItem value="maven" label="Maven">

   ```xml
   <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
       <version>{HADOOP_VERSION}</version>
       <scope>provided</scope>
   </dependency>
   <dependency>
       <groupId>io.juicefs</groupId>
       <artifactId>juicefs-hadoop</artifactId>
       <version>{JUICEFS_HADOOP_VERSION}</version>
       <scope>provided</scope>
   </dependency>
   ```

     </TabItem>
     <TabItem value="gradle" label="Gradle">

   ```groovy
   dependencies {
     implementation 'org.apache.hadoop:hadoop-common:${hadoopVersion}'
     implementation 'io.juicefs:juicefs-hadoop:${juicefsHadoopVersion}'
   }
   ```

     </TabItem>
   </Tabs>

2. Use the following sample code to verify:

<!-- autocorrect: false -->
   ```java
   package demo;

   import org.apache.hadoop.conf.Configuration;
   import org.apache.hadoop.fs.FileStatus;
   import org.apache.hadoop.fs.FileSystem;
   import org.apache.hadoop.fs.Path;

   public class JuiceFSDemo {
       public static void main(String[] args) throws Exception {
           Configuration conf = new Configuration();
           conf.set("fs.jfs.impl", "io.juicefs.JuiceFileSystem");
           conf.set("juicefs.meta", "redis://127.0.0.1:6379/0");  // JuiceFS metadata engine URL
           Path p = new Path("jfs://{JFS_NAME}/");  // Please replace "{JFS_NAME}" with the correct value
           FileSystem jfs = p.getFileSystem(conf);
           FileStatus[] fileStatuses = jfs.listStatus(p);
           // Traverse JuiceFS file system and print file paths
           for (FileStatus status : fileStatuses) {
               System.out.println(status.getPath());
           }
       }
   }
   ```
<!-- autocorrect: true -->

## Monitoring metrics collection

Please see the ["Monitoring"](../administration/monitoring.md) documentation to learn how to collect and display JuiceFS monitoring metrics.

## Benchmark

Here are a series of methods to use the built-in stress testing tool of the JuiceFS client to test the performance of the client environment that has been successfully deployed.

### 1. Local Benchmark

#### Metadata

- **create**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench create -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

  This command will create 10000 empty files

- **open**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench open -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

  This command will open 10000 files without reading data

- **rename**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench rename -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

- **delete**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench delete -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

- **For reference**

  | Operation | TPS  | Latency (ms) |
  | --------- | ---- | ------------ |
  | create    | 644  | 1.55         |
  | open      | 3467 | 0.29         |
  | rename    | 483  | 2.07         |
  | delete    | 506  | 1.97         |

#### I/O Performance

- **sequential write**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -write -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local
  ```

- **sequential read**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -read -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local
  ```

  When run the cmd for the second time, the result may be much better than the first run. It's because the data was cached in memory, just clean the local disk cache.

- **For reference**

  | Operation | Throughput (MB/s) |
  | --------- | ----------------- |
  | write     | 647               |
  | read      | 111               |

If the network bandwidth of the machine is relatively low, it can generally reach the network bandwidth bottleneck.

### 2. Distributed Benchmark

The following command will start the MapReduce distributed task to test the metadata and IO performance. During the test, it is necessary to ensure that the cluster has sufficient resources to start the required map tasks.

Computing resources used in this test:

- **Server**: 4 cores and 32 GB memory, burst bandwidth 5Gbit/s x 3
- **Database**: Alibaba Cloud Redis 5.0 Community 4G Master-Slave Edition

#### Metadata

- **create**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench create -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  10 map task, each has 10 threads, each thread create 1000 empty file. 100000 files in total

- **open**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench open -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  10 map task, each has 10 threads, each thread open 1000 file. 100000 files in total

- **rename**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench rename -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  10 map task, each has 10 threads, each thread rename 1000 file. 100000 files in total

- **delete**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench delete -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  10 map task, each has 10 threads, each thread delete 1000 file. 100000 files in total

- **For reference**

  - 10 threads

    | Operation | IOPS | Latency (ms) |
    | --------- | ---- | ------------ |
    | create    | 4178 | 2.2          |
    | open      | 9407 | 0.8          |
    | rename    | 3197 | 2.9          |
    | delete    | 3060 | 3.0          |

  - 100 threads

    | Operation | IOPS  | Latency (ms) |
    | --------- | ----  | ------------ |
    | create    | 11773 | 7.9          |
    | open      | 34083 | 2.4          |
    | rename    | 8995  | 10.8         |
    | delete    | 7191  | 13.6         |

#### I/O Performance

- **sequential write**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -write -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO
  ```

  10 map task, each task write 10000MB random data sequentially

- **sequential read**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -read -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO
  ```

  10 map task, each task read 10000MB random data sequentially

- **For reference**

  | Operation | Average throughput (MB/s) | Total Throughput (MB/s) |
  | --------- | ------------------------- | ----------------------- |
  | write     | 198                       | 1835                    |
  | read      | 124                       | 1234                    |

### 3. TPC-DS

The test dataset is 100GB in size, and both Parquet and ORC file formats are tested.

This test only tests the first 10 queries.

Spark Thrift JDBC/ODBC Server is used to start the Spark resident process and then submit the task via Beeline connection.

#### Test Hardware

| Node Category | Instance Type               | CPU | Memory | Disk                                                      | Number |
| ------------- | -------------               | --- | ------ | ----                                                      | ------ |
| Master        | Alibaba Cloud ecs.r6.xlarge | 4   | 32GiB  | System Disk: 100GiB                                       | 1      |
| Core          | Alibaba Cloud ecs.r6.xlarge | 4   | 32GiB  | System Disk: 100GiB<br />Data Disk: 500GiB Ultra Disk x 2 | 3      |

#### Software Configuration

##### Spark Thrift JDBC/ODBC Server

```shell
${SPARK_HOME}/sbin/start-thriftserver.sh \
  --master yarn \
  --driver-memory 8g \
  --executor-memory 10g \
  --executor-cores 3 \
  --num-executors 3 \
  --conf spark.locality.wait=100 \
  --conf spark.sql.crossJoin.enabled=true \
  --hiveconf hive.server2.thrift.port=10001
```

##### JuiceFS Cache Configurations

The 2 data disks of Core node are mounted in the `/data01` and `/data02` directories, and `core-site.xml` is configured as follows:

```xml
<property>
  <name>juicefs.cache-size</name>
  <value>200000</value>
</property>
<property>
  <name>juicefs.cache-dir</name>
  <value>/data*/jfscache</value>
</property>
<property>
  <name>juicefs.cache-full-block</name>
  <value>false</value>
</property>
<property>
  <name>juicefs.discover-nodes-url</name>
  <value>yarn</value>
</property>
<property>
  <name>juicefs.attr-cache</name>
  <value>3</value>
</property>
<property>
  <name>juicefs.entry-cache</name>
  <value>3</value>
</property>
<property>
  <name>juicefs.dir-entry-cache</name>
  <value>3</value>
</property>
```

#### Test

The task submission command is as follows:

```shell
${SPARK_HOME}/bin/beeline -u jdbc:hive2://localhost:10001/${DATABASE} \
  -n hadoop \
  -f query{i}.sql
```

#### Results

JuiceFS can use local disk as a cache to accelerate data access, the following data is the result (in seconds) after 4 runs using Redis and TiKV as the metadata engine of JuiceFS respectively.

##### ORC

| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS |
| ------- | --------------- | -------------- | ---- |
| q1      | 20              | 20             | 20   |
| q2      | 28              | 33             | 26   |
| q3      | 24              | 27             | 28   |
| q4      | 300             | 309            | 290  |
| q5      | 116             | 117            | 91   |
| q6      | 37              | 42             | 41   |
| q7      | 24              | 28             | 23   |
| q8      | 13              | 15             | 16   |
| q9      | 87              | 112            | 89   |
| q10     | 23              | 24             | 22   |

![orc](../images/spark_ql_orc.png)

##### Parquet

| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS |
| ------- | --------------- | -------------- | ---- |
| q1      | 33              | 35             | 39   |
| q2      | 28              | 32             | 31   |
| q3      | 23              | 25             | 24   |
| q4      | 273             | 284            | 266  |
| q5      | 96              | 107            | 94   |
| q6      | 36              | 35             | 42   |
| q7      | 28              | 30             | 24   |
| q8      | 11              | 12             | 14   |
| q9      | 85              | 97             | 77   |
| q10     | 24              | 28             | 38   |

![parquet](../images/spark_sql_parquet.png)

## Permission control by Apache Ranger(from v1.3)

JuiceFS currently supports path permission control by integrating with Apache Ranger's HDFS module. Only supported in Hadoop Java SDK.

### 1. Configurations

The config for Apache Ranger is sotred in the metadata database. You can enable Ranger permission control by the following methods:

```shell
# configure with format
juicefs format META-URL NAME --ranger-rest-url http://localhost:6080 --ranger-service jfs

# or configure with config
juicefs config META-URL --ranger-rest-url http://localhost:6080 --ranger-service jfs

# disable ranger
juicefs config META-URL --ranger-rest-url "" --ranger-service jfs ""
```

### 2. Dependencies

Considering the convenience of use, JuiceFS packages all Ranger dependencies into the JuiceFS SDK. If you encounter version conflicts with Apache Ranger, you may need to modify the version and recompile.

### 3. Tips

#### 3.1 Ranger version

The code is tested on `Ranger2.3` and `Ranger2.4`. As no other features are used except for `HDFS` module authentication, theoretically all other versions are applicable.

#### 3.2 Ranger Audit

Currently, only support authentication function, and the `Ranger Audit` is disabled.

#### 3.3 Ranger's other parameters

To improve usage efficiency, currently only support some **CORE** parameters of Ranger.

#### 3.4 Security tips

Due to the complete open source of the project, it is unavoidable for users to disrupt permission control by replacing parameters such as `ranger-rest-url`. If stricter control is required, it is recommended to compile the code independently and solve the problem by encrypting relevant security parameters.

## FAQ

### 1. `Class io.juicefs.JuiceFileSystem not found` exception

It means JAR file was not loaded, you can verify it by `lsof -p {pid} | grep juicefs`.

You should check whether the JAR file was located properly, or other users have the read permission.

Some Hadoop distribution also need to modify `mapred-site.xml` and put the JAR file location path to the end of the parameter `mapreduce.application.classpath`.

### 2. `No FilesSystem for scheme: jfs` exception

It means JuiceFS Hadoop Java SDK was not configured properly, you need to check whether there is JuiceFS related configuration in the `core-site.xml` of the component configuration.

### 3. What are the similarities and differences between user permission management in JuiceFS and HDFS?

JuiceFS also uses the "User/Group" method to manage file permissions, using local users and groups by default. In order to ensure the unified permissions of different nodes during distributed computing, you can configure global "User/UID" and "Group/GID" mappings through `juicefs.users` and `juicefs.groups` configurations.

### 4. After the data is deleted, it is directly stored in the `.trash` directory of JuiceFS. Although the files are all there, it is difficult to restore the data through the `mv` command as easily as HDFS. Is there any way to achieve a similar effect of HDFS trash?

In the Hadoop application scenario, the functions similar to the HDFS trash are still retained. It needs to be explicitly enabled by `fs.trash.interval` and `fs.trash.checkpoint.interval` configurations, please refer to [document](#trash) for more information.

### 5. What are the benefits of setting the `juicefs.discover-nodes-url` configuration?

In HDFS, each data block will have [`BlockLocation`](https://hadoop.apache.org/docs/current/api/org/apache/hadoop/fs/BlockLocation.html) information, which the computing engine uses to schedule the computing tasks as much as possible to the nodes where the data is stored. JuiceFS will calculate the corresponding `BlockLocation` for each data block through the consistent hashing algorithm, so that when the same data is read for the second time, the computing engine may schedule the computing task to the same node, and the data cached on the local disk during the first computing can be used to accelerate data access.

This algorithm needs to know all the computing node information in advance. The `juicefs.discover-nodes-url` configuration is used to obtain these computing node information.

### 6. Does the community version of JuiceFS currently support a Kerberos-authenticated CDH cluster?

Not supported. JuiceFS does not verify the validity of Kerberos users, but can use Kerberos-authenticated username.


================================================
FILE: docs/en/deployment/how_to_use_on_kubernetes.md
================================================
---
title: Use JuiceFS on Kubernetes
sidebar_position: 2
slug: /how_to_use_on_kubernetes
---

JuiceFS is an ideal storage layer for Kubernetes, read this chapter to learn how to use JuiceFS in Kubernetes.

## Use JuiceFS via `hostPath`

If you simply need to use JuiceFS inside Kubernetes pods, without any special requirements (e.g. isolation, permission control), then [`hostPath`](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) can be a good practice, which is also really easy to setup:

1. Install and mount JuiceFS on all Kubernetes worker nodes, [Automated Deployment](./automation.md) is recommended for this type of work.
1. Use `hostPath` volume inside pod definition, and mount a JuiceFS sub-directory to container:

   ```yaml {8-16}
   apiVersion: v1
   kind: Pod
   metadata:
     name: juicefs-app
   spec:
     containers:
       - ...
         volumeMounts:
           - name: jfs-data
             mountPath: /opt/app-data
     volumes:
       - name: jfs-data
         hostPath:
           # Assuming JuiceFS is mounted on /jfs
           path: "/jfs/myapp/"
           type: Directory
   ```

In comparison to using JuiceFS CSI Driver, `hostPath` is a much more simple practice, and easier to debug when things go wrong, but notice that:

* For ease of management, generally all pods use the same host mount point. Lack of isolation may lead to data security issues, and obviously, you won't be able to adjust JuiceFS mount parameters separately for each application. Please evaluate carefully.
* All worker nodes should mount JuiceFS in advance, so when adding a new node to the cluster, JuiceFS needs to be installed and mounted during the initialization process, otherwise the new node does not have a JuiceFS mount point, and the container will not be created.
* The system resources (such as CPU, memory, etc.) occupied by the JuiceFS mounting process on the host are not controlled by Kubernetes, and may occupy too many host resources. You can consider using [`system-reserved`](https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#system-reserved) to properly adjust the system resource reservation of Kubernetes, to reserve more resources for the JuiceFS mount process.
* If the JuiceFS mount process on the host exits unexpectedly, the application pod will not be able to access the mount point normally. In this case, the JuiceFS file system needs to be remounted and the application pod must be rebuilt. However, JuiceFS CSI Driver solves this problem well by providing the [Automatic Mount Point Recovery](https://juicefs.com/docs/csi/recover-failed-mountpoint) mechanism.
* If you're using Docker as Kubernetes container runtime, it's best to start JuiceFS mount prior to Docker in startup order, to avoid containers being created before JuiceFS is properly mounted. For systemd, you can use below unit file to manually control startup order:

  ```systemd title="/etc/systemd/system/docker.service.d/override.conf"
  [Unit]
  # Use below command to obtain JuiceFS mount service name
  # systemctl list-units | grep "\.mount"
  After=network-online.target firewalld.service containerd.service jfs.mount
  ```

## JuiceFS CSI Driver

To use JuiceFS in Kubernetes, refer to [JuiceFS CSI Driver Documentation](https://juicefs.com/docs/csi/introduction).

## Mount JuiceFS in the container

In some cases, you may need to mount JuiceFS volume directly in the container, which requires the use of the JuiceFS client in the container. You can refer to the following `Dockerfile` example to integrate the JuiceFS client into your application image:

```dockerfile title="Dockerfile"
FROM alpine:latest
LABEL maintainer="Juicedata <https://juicefs.com>"

# Install JuiceFS client
RUN apk add --no-cache curl && \
  JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \
  wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \
  tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \
  install juicefs /usr/bin && \
  rm juicefs "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \
  rm -rf /var/cache/apk/* && \
  apk del curl

ENTRYPOINT ["/usr/bin/juicefs", "mount"]
```

Since JuiceFS needs to use the FUSE device to mount the file system, it is necessary to allow the container to run in privileged mode when creating a Pod:

```yaml {19-20}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-run
spec:
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
        - name: nginx
          image: linuxserver/nginx
          ports:
            - containerPort: 80
          securityContext:
            privileged: true
```

:::caution
With the privileged mode being enabled by `privileged: true`, the container has access to all devices of the host, that is, it has full control of the host's kernel. Improper uses will bring serious safety hazards. Please conduct a thorough safety assessment before using it.
:::


================================================
FILE: docs/en/deployment/juicefs_on_docker.md
================================================
---
title: Using JuiceFS in Docker
sidebar_position: 6
slug: /juicefs_on_docker
description: Using JuiceFS in Docker in different ways, including volume mapping, volume plugin, and mounting in containers.
---

You can use the JuiceFS file system in Docker by running the client directly in the container or using a volume plugin.

## Using a volume plugin {#volume-plugin}

If you have specific requirements for mount management, such as managing mount points through Docker to facilitate different application containers using different JuiceFS file systems, you can use a [Docker volume plugin](https://github.com/juicedata/docker-volume-juicefs).

Docker plugins are usually provided in the form of images. The [JuiceFS volume plugin image](https://hub.docker.com/r/juicedata/juicefs) contains the [JuiceFS Community Edition](../introduction/README.md) and [JuiceFS Cloud Service](https://juicefs.com/docs/cloud) clients. After installation, you can run the volume plugin to create JuiceFS volumes in Docker.

Install the plugin using the following command and provide the necessary permissions for FUSE as prompted:

```shell
docker plugin install juicedata/juicefs
```

You can use the following commands to manage the volume plugin:

```shell
# Disable the plugin
docker plugin disable juicedata/juicefs

# Upgrade the plugin (must be disabled first)
docker plugin upgrade juicedata/juicefs
docker plugin enable juicedata/juicefs

# Remove the plugin
docker plugin rm juicedata/juicefs
```

### Create a storage volume {#create-volume}

Replace `<VOLUME_NAME>`, `<META_URL>`, `<STORAGE_TYPE>`, `<BUCKET_NAME>`, `<ACCESS_KEY>`, and `<SECRET_KEY>` in the following command with your own file system configuration:

```shell
docker volume create -d juicedata/juicefs \
  -o name=<VOLUME_NAME> \
  -o metaurl=<META_URL> \
  -o storage=<STORAGE_TYPE> \
  -o bucket=<BUCKET_NAME> \
  -o access-key=<ACCESS_KEY> \
  -o secret-key=<SECRET_KEY> \
  jfsvolume
```

For pre-created file systems, you only need to specify the file system name and database address when creating the volume plugin, for example:

```shell
docker volume create -d juicedata/juicefs \
  -o name=<VOLUME_NAME> \
  -o metaurl=<META_URL> \
  jfsvolume
```

If you need to pass additional environment variables when mounting the file system, such as in [Google Cloud](../reference/how_to_set_up_object_storage.md#google-cloud), you can append parameters similar to `-o env=FOO=bar,SPAM=egg` to the above command.

### Usage and management {#usage-and-management}

```shell
# Mount the volume when creating a container
docker run -it -v jfsvolume:/opt busybox ls /opt

# After unmounting, you can delete the storage volume. Note that this only deletes the corresponding resources in Docker and does not affect the data stored in JuiceFS.
docker volume rm jfsvolume
```

### Using the plugin in Docker Compose {#using-plugin-in-docker-compose}

Here is an example of using the JuiceFS volume plugin in `docker-compose`:

```yaml
version: '3'
services:
  busybox:
    image: busybox
    command: "ls /jfs"
    volumes:
      - jfsvolume:/jfs
      
volumes:
  jfsvolume:
    driver: juicedata/juicefs
    driver_opts:
      name: ${VOL_NAME}
      # SQLite creates the database file in the plugin container's local path,
      # and sqlite:// will fail when the service is restarted.
      # (See https://github.com/juicedata/docker-volume-juicefs/issues/37 for details)
      metaurl: ${META_URL}
      storage: ${STORAGE_TYPE}
      bucket: ${BUCKET}
      access-key: ${ACCESS_KEY}
      secret-key: ${SECRET_KEY}
      # If necessary, you can pass additional environment variables using env
      # env: FOO=bar,SPAM=egg
```

Usage and management:

```shell
# Start the service
docker-compose up

# Stop the service and unmount the JuiceFS file system from Docker
docker-compose down --volumes
```

### Troubleshooting the volume plugin {#troubleshooting}

If it is not working properly, it is recommended to first [upgrade the volume plugin](#volume-plugin), and then check the logs based on the problem.

* Collect JuiceFS client logs. The logs are located inside the Docker volume plugin container and need to be accessed by entering the container:

  ```shell
  # Confirm the docker plugins runtime directory, which may be different from the example below depending on the actual situation
  # The directory printed by ls is the container directory, and the name is the container ID
  ls /run/docker/plugins/runtime-root/plugins.moby

  # Print plugin container information
  # If the printed container list is empty, it means that the plugin container failed to be created
  # Read the plugin startup log below to continue troubleshooting
  runc --root /run/docker/plugins/runtime-root/plugins.moby list

  # Enter the container and print the log
  runc --root /run/docker/plugins/runtime-root/plugins.moby exec 452d2c0cf3fd45e73a93a2f2b00d03ed28dd2bc0c58669cca9d4039e8866f99f cat /var/log/juicefs.log
  ```

  If the container does not exist (`ls` finds an empty directory) or the `juicefs.log` does not exist in the final log printing stage, it is likely that the mount itself failed. Continue to check the plugin's own logs to find the cause.

* Collect plugin logs, using systemd as an example:

  ```shell
  journalctl -f -u docker | grep "plugin="
  ```

  If there is an error when the plugin calls `juicefs` or if the plugin itself reports an error, it will be reflected in the logs.

## Using the JuiceFS client in containers {#mount-juicefs-in-docker}

Compared to the volume plugin, using the JuiceFS client directly in the container is more flexible. You can directly mount the JuiceFS file system in the container or access it through S3 Gateway or WebDAV.

### Method 1: Build your own image

The JuiceFS client is a standalone binary program that provides versions for both AMD64 and ARM64 architectures. You can define the command to download and install the JuiceFS client in the Dockerfile, for example:

```Dockerfile
FROM ubuntu:22.04
...
# Use the official one-click installation script
RUN curl -sSL https://d.juicefs.com/install | sh - 
```

For more information, see [Customizing Container Images](https://juicefs.com/docs/csi/guide/custom-image).

### Method 2: Use the officially maintained image

The JuiceFS officially maintained image [`juicedata/mount`](https://hub.docker.com/r/juicedata/mount) is tagged to specify the desired version. **The community edition tags include `latest` and `ce`**, such as `ce-v1.1.2` and `ce-nightly`. The `latest` tag represents the latest community edition, and the `nightly` tag points to the latest development version. For details, see the [tags page](https://hub.docker.com/r/juicedata/mount/tags) on Docker Hub.

Before you start, you need to prepare [object storage](../reference/how_to_set_up_object_storage.md) and [metadata engine](../reference/how_to_set_up_metadata_engine.md).

#### Create a file system

Create a file system through a temporary container, for example:

```sh
docker run --rm \
    juicedata/mount:ce-v1.1.2 juicefs format \
    --storage s3 \
    --bucket https://xxx.your-s3-endpoint.com \
    --access-key=ACCESSKEY \
    --secret-key=SECRETKEY \
    rediss://user:password@xxx.your-redis-server.com:6379/1 myjfs
```

Replace `--storage`, `--bucket`, `--access-key`, `--secret-key`, and the metadata engine URL with your own configuration.

#### Mount the file system directly in the container

Create a container and mount the JuiceFS file system in the container, for example:

```sh
docker run --privileged --name myjfs \
    juicedata/mount:ce-v1.1.2 juicefs mount \
    rediss://user:password@xxx.your-redis-server.com:6379/1 /mnt
```

Replace the metadata engine URL with your own configuration. `/mnt` is the mount point and can be modified as needed. Since FUSE is used, `--privileged` permission is also required.

#### Mount the file system through Docker Compose

Here is an example using Docker Compose. Replace the metadata engine URL and mount point with your own configuration.

```yaml
version: "3"
services:
    busybox:
      image: busybox
      command: "ls /jfs"
      volumes:
        - ./mnt:/jfs
      depends_on:
        juicefs:
          condition: service_healthy

    juicefs:
      image: juicedata/mount:ce-v1.1.2
      container_name: myjfs
      volumes:
        - ./mnt:/mnt:rw,rshared
      cap_add:
        - SYS_ADMIN
      devices:
        - /dev/fuse
      security_opt: 
        - apparmor:unconfined
      command: ["juicefs", "mount", "rediss://user:password@xxx.your-redis-server.com:6379/1", "/mnt"]
      restart: unless-stopped
      healthcheck:
        test: ["CMD-SHELL", "cat /mnt/.control"]
        interval: 60s
        retries: 5
        start_period: 30s
        timeout: 10s
```

In the container, the JuiceFS file system is mounted to the `/mnt` directory, and the volumes section in the configuration file maps the `/mnt` in the container to the `./mnt` directory on the host, allowing direct access to the JuiceFS file system mounted in the container from the host. At the same time, by combining `depends_on` and `volumes`, the directory mapped to the host machine can be remounted into the container for use.

#### Access the file system through S3 Gateway

Here is an example of exposing JuiceFS for access through S3 Gateway. Replace `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD`, the metadata engine URL, and the address and port number to listen on with your own configuration.

```yaml
version: "3"
services:
    s3-gateway:
      image: juicedata/mount:ce-v1.1.2
      container_name: juicefs-s3-gateway
      environment:
        - MINIO_ROOT_USER=your-username
        - MINIO_ROOT_PASSWORD=your-password
      ports:
        - "9090:9090"
      command: ["juicefs", "gateway", "rediss://user:password@xxx.your-redis-server.com:6379/1", "0.0.0.0:9090"]
      restart: unless-stopped
```

Use port `9090` on the host to access the S3 Gateway console, and use the same address to read and write the JuiceFS file system through the S3 client or SDK.


================================================
FILE: docs/en/deployment/nfs.md
================================================
---
title: Create NFS Shares
sidebar_position: 9
description: Learn how to use the NFS protocol to share directories within the JuiceFS file system.
---

NFS (Network File System) is a network file-sharing protocol that allows different computers to share files and directories over a network. It was originally developed by Sun Microsystems and is a standard way of file sharing between Unix and Unix-like systems. The NFS protocol enables clients to access remote file systems as if they were local, achieving transparent remote file access.

When you need to share directories from the JuiceFS file system through NFS, you can simply use the `juicefs mount` command to mount the file system. Then, you can create NFS shares with the JuiceFS mount point or subdirectories.

:::note
`juicefs mount` mounts the file system as a local user-space file system through the FUSE interface, making it identical to the local file system in terms of appearance and usage. Hence, it can be directly used to create NFS shares.
:::

## Step 1. Install NFS

To configure NFS shares, you need to install the relevant software packages on both the server and client sides. Let's take Ubuntu/Debian systems as an example:

### 1. Server-side installation

Create a host for NFS sharing (with the JuiceFS file system also mounted on this server).

```shell
sudo apt install nfs-kernel-server
```

### 2. Client-side installation

All Linux hosts that need to access NFS shares should install the client software.

```shell
sudo apt install nfs-common
```

## Step 2. Create shares

Assuming the JuiceFS is mounted on the server system at the path `/mnt/myjfs`, if you want to set the `media` subdirectory as an NFS share, you can add the following configuration to the `/etc/exports` file on the server system:

```
"/mnt/myjfs/media" *(rw,sync,no_subtree_check,fsid=1)
```

The syntax for NFS share configuration is as follows:

```
<Share Path> <Allowed IPs>(options)
```

For example, if you want to restrict the mounting of this share to hosts in the `192.168.1.0/24` IP range and avoid squashing root privileges, you can modify it as follows:

```
"/mnt/myjfs/media" 192.168.1.0/24(rw,async,no_subtree_check,no_root_squash,fsid=1)
```

### Share option description

**Explanation of the share options:**

- `rw`: Represents read and write permissions. If read-only access is desired, use `ro`.
- `sync` and `async`: `sync` enables synchronous writes, meaning that when writing to the NFS share, the client waits for the server's confirmation of successful data write before proceeding with subsequent operations. `async`, on the other hand, allows asynchronous writes. In this mode, the client does not wait for the server's confirmation of successful write before proceeding with subsequent operations.
- `no_subtree_check`: Disables subtree checking, allowing clients to mount both the parent and child directories of the NFS share. This can reduce some security but improve NFS compatibility. Setting it to `subtree_check` enables subtree checking, allowing clients to only mount the NFS share and its subdirectories.
- `no_root_squash`: Controls the mapping behavior of the client's root user when accessing the NFS share. By default, when the client mounts the NFS share as root, the server maps it to a non-privileged user (usually nobody or nfsnobody), which is known as root squashing. Enabling this option cancels the root squashing, giving the client the same root user privileges as the server. This option comes with certain security risks and should be used with caution.
- `fsid`: A file system identifier used to identify different file systems on NFS. In NFSv4, the root directory of NFS is defined as fsid=0, and other file systems need to be numbered uniquely under it. Here, JuiceFS is an externally mounted FUSE file system, so it needs to be assigned a unique identifier.

### Choosing between async and sync modes

For NFS shares, the sync (synchronous writes) mode can improve data reliability but always requires waiting for the server's confirmation before proceeding with the next operation. This may result in lower write performance. For JuiceFS, which is a cloud-based distributed file system, network latency also needs to be considered. Using the sync mode can often lead to lower write performance due to network latency.

In most cases, when creating NFS shares with JuiceFS, it is recommended to set the write mode to async (asynchronous writes) to avoid sacrificing write performance. If data reliability must be prioritized and sync mode is necessary, it is recommended to configure JuiceFS with a high-performance SSD as a local cache with sufficient capacity and enable the writeback cache mode.


================================================
FILE: docs/en/deployment/production_deployment_recommendations.md
================================================
---
sidebar_position: 1
slug: /production_deployment_recommendations
description: This article is intended as a reference for users who are about to deploy JuiceFS to a production environment and provides a series of environment configuration recommendations.
---

# Production Deployment Recommendations

This document provides deployment recommendations for JuiceFS Community Edition in production environments. It focuses on monitoring metric collection, automatic metadata backup, trash configuration, background tasks of clients, client log rolling, and command-line auto-completion to ensure the stability and reliability of the file system.

## Metrics collection and visualization

It is necessary to collect monitoring metrics from JuiceFS clients and visualize them using Grafana. This allows for real-time monitoring of file system performance and health status. For detailed instructions, see this [document](../administration/monitoring.md).

## Automatic metadata backup

:::tip
Automatic metadata backup is a feature that has been added since JuiceFS v1.0.0.
:::

Metadata is critical to the JuiceFS file system, and any loss or corruption of metadata may affect a large number of files or even the entire file system. Therefore, metadata must be backed up regularly.

This feature is enabled by default and the backup interval is 1 hour. The backed-up metadata is compressed and stored in the corresponding object storage, separate from file system data. Backups are performed by JuiceFS clients, which may increase CPU and memory usage during the process. By default, one client is randomly selected for backup operations.

It is important to note that this feature is disabled when the number of files reaches **one million**. To re-enable it, set a larger backup interval (the `--backup-meta` option). The interval is configured independently for each client. You can use `--backup-meta 0` to disable automatic backup.

:::note
The time required for metadata backup depends on the specific metadata engine. Different metadata engines have different performance.
:::

For detailed information on automatic metadata backups, see this [document](../administration/metadata_dump_load.md#backup-automatically). Alternatively, you can back up metadata manually. In addition, follow the operational and maintenance recommendations of the metadata engine you are using to back up your data regularly.

## Trash

:::tip
The Trash feature has been available since JuiceFS v1.0.0.
:::

Trash is enabled by default. The retention time for deleted files defaults to 1 day to mitigate the risk of accidental data loss.

However, enabling Trash may have side effects. If the application needs to frequently delete files or overwrite them, it will cause the object storage usage to be much larger than the file system. This is because the JuiceFS client retain deleted files and overwritten blocks on the object storage for a certain period. Therefore, it is highly recommended to evaluate workload requirements before deploying JuiceFS in a production environment to configure Trash appropriately. You can configure the retention time as follows (`--trash-days 0` disables Trash):

- For new file systems: set via the `--trash-days <value>` option of `juicefs format`
- For existing file systems: modify with the `--trash-days <value>` option of `juicefs config`

For more information on Trash, see this [document](../security/trash.md).

## Client background tasks

The JuiceFS file system maintains background tasks through clients, which can automatically execute cleaning tasks such as deleting pending files and objects, purging expired files and fragments from Trash, and terminating long-stalled client sessions.

All clients of the same JuiceFS volume share a set of background tasks during runtime. Each task is executed at regular intervals, with the client chosen randomly. Background tasks include:

- Cleaning up files and objects to be deleted
- Clearing out-of-date files and fragments in Trash
- Cleaning up stale client sessions
- Automatic backup of metadata

Since these tasks take up some resources when executed, you can set the `--no-bgjob` option to disable them for clients with heavy workload.

:::note
Make sure that at least one JuiceFS client can execute background tasks.
:::

## Client log rotation

When running a JuiceFS mount point in the background, the client outputs logs to a local file by default. The path to the local log file is slightly different depending on the user running the process:

- For the root user, the path is `/var/log/juicefs.log`.
- For others, the path is `$HOME/.juicefs/juicefs.log`.

The local log file is not rotated by default and needs to be configured manually in production to prevent excessive disk space usage. The following is a configuration example for log rotation:

```text title="/etc/logrotate.d/juicefs"
/var/log/juicefs.log {
    daily
    rotate 7
    compress
    delaycompress
    missingok
    notifempty
    copytruncate
}
```

You can check the correctness of the configuration file with the `logrotate -d` command:

```shell
logrotate -d /etc/logrotate.d/juicefs
```

For details about the logrotate configuration, see this [link](https://linux.die.net/man/8/logrotate).

## Command line auto-completion

JuiceFS provides command line auto-completion scripts for Bash and Zsh to facilitate the use of `juicefs` commands. For details, see this [document](../reference/command_reference.mdx#auto-completion) for details.


================================================
FILE: docs/en/deployment/python_sdk.md
================================================
---
title: Python SDK
sidebar_position: 6
---

The JuiceFS Community Edition introduced the Python SDK in v1.3.0, making it suitable for containerized or virtualized environments where FUSE mounting is not available. The Python SDK also implements the `fsspec` interface, enabling easy integration with frameworks such as Ray.

## Compilation

You can compile the Python SDK directly in your current working environment or use a Docker container. Both methods require you to first clone the repository and navigate to the SDK directory.

```bash
# Clone JuiceFS repository
git clone https://github.com/juicedata/juicefs.git
# Enter JuiceFS directory
cd juicefs/sdk/python
```

### Direct Compilation

Direct compilation requires `go1.20+` and `python3` environments.

#### Step 1: Compile libjfs.so

```bash
go build -buildmode c-shared -ldflags="-s -w" -o juicefs/juicefs/libjfs.so ../java/libjfs
```

The compiled `libjfs.so` and `libjfs.h` files will be in the `sdk/python/juicefs/juicefs` directory.

#### Step 2: Compile Python SDK

```bash
cd juicefs && python3 -m build -w
```

The compiled Python SDK will be in the `juicefs/sdk/python/dist` directory, named `juicefs-1.3.0-py3-none-any.whl`.

### Docker Compilation

Using Docker containers for compilation requires `Docker`, `make`, and `go1.20+` installed on your system.

#### Step 1: Build Docker image

```bash
# For arm64
make arm-builder

# For amd64
make builder
```

#### Step 2: Compile Python SDK

```bash
make juicefs
```

The compiled Python SDK will be in the `juicefs/sdk/python/dist` directory, named `juicefs-1.3.0-py3-none-any.whl`.

### Compilation Error Handling

If you encounter an error like `sed: 1: "juicefs/setup.py": invalid command code j` during compilation, you can try commenting out the `sed`-related commands in the `Makefile`.

## Installation and Usage

### Installing the SDK

Copy the compiled `juicefs-1.3.0-py3-none-any.whl` file to the target machine and install it using `pip`:

```bash
pip install juicefs-1.3.0-py3-none-any.whl
```

### Preparing the File System

:::tip
JuiceFS Python SDK currently does not support formatting a file system, so please ensure you have already created a JuiceFS file system before use.
:::

Let's assume there is a pre-created file system named `myfs` with metadata engine URL `redis://192.168.1.8/0`.

### Using the Client

The `Client` class implementation is similar to Python's io module.

You can instantiate a JuiceFS client with the following code, where the `name` parameter is the file system name and the `meta` parameter is the URL of the metadata engine. The `name` parameter must exist but can be an empty string or `None`.

```python
from juicefs import Client

# Create JuiceFS client
jfs = Client(name='', meta='redis://192.168.1.8/0')

# List files in a directory
jfs.listdir('/')
```

### Using fsspec

JuiceFS Python SDK also supports the `fsspec` interface to operate the JuiceFS file system.

```bash
# Install fsspec
pip install fsspec
```

Using `fsspec` is similar to using the `Client` class, but you need to specify `jfs` or `juicefs` as the file system type.

```python
import fsspec
from juicefs.spec import JuiceFS

jfs = fsspec.filesystem('jfs', name='', meta='redis://192.168.1.8/0')

# List files in a directory
jfs.ls('/')
```

### Getting Help Information

You can use the `help()` function to get help information for classes and methods.

```python
import juicefs

help(juicefs.Client)
```

You can also use the `dir()` function to get a list of classes and methods.

```python
import juicefs

dir(juicefs.Client)
```


================================================
FILE: docs/en/deployment/samba.md
================================================
---
title: Create Samba Shares
sidebar_position: 8
description: Learn how to share directories in the JuiceFS file system through Samba.
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

Samba is an open-source software suite that implements the SMB/CIFS (Server Message Block / Common Internet File System) protocol, which is a commonly used file-sharing protocol in Windows systems. With Samba, you can create shared directories on Linux/Unix servers, allowing Windows computers to access and use these shared resources over the network.

To create a shared folder on a Linux system with Samba installed, you can edit the `smb.conf` configuration file. Once configured, Windows and macOS systems can access and read/write the shared folder using their file managers. Linux needs to install the Samba client for access.

When you need to share directories from the JuiceFS file system through Samba, you can simply use the `juicefs mount` command to mount the file system. Then, you can create Samba shares with the JuiceFS mount point or subdirectories.

:::note
`juicefs mount` mounts the file system as a local user-space file system through the FUSE interface, making it identical to the local file system in terms of appearance and usage. Hence, it can be directly used to create Samba shares.
:::

## Step 1: Install Samba

Most Linux distributions provide Samba through their package managers.

<Tabs>
<TabItem value="debian" label="Debian and derivatives">

```shell
sudo apt install samba
```

</TabItem>
    <TabItem value="redhat" label="RHEL and derivatives">

```shell
sudo dnf install samba
```

</TabItem>
</Tabs>

If you need to configure AD/DC (Active Directory / Domain Controller), additional software packages need to be installed. For more details, refer to the [Samba Official Installation Guide](https://wiki.samba.org/index.php/Distribution-specific_Package_Installation).

## Step 2: Enable JuiceFS extended attribute (xattr) support

According to the [Samba official documentation](https://wiki.samba.org/index.php/File_System_Support#File_systems_without_xattr_support), it is recommended to use file systems that support extended attributes (xattr). To enable extended attribute support for JuiceFS during the mount process, use the `--enable-xattr` option. For example:

```shell
sudo juicefs mount -d --enable-xattr sqlite3://myjfs.db /mnt/myjfs
```

For cases where you configure automatic mounting through `/etc/fstab`, you can add the `enable-xattr` option to the mount options section. For example:

```ini
# <metadata engine URL> <mount point> <file system type> <mount options>
redis://127.0.0.1:6379/0 /mnt/myjfs juicefs _netdev,max-uploads=50,writeback,cache-size=1024000,enable-xattr 0 0
```

### Knowledge extension: why Samba requires file system support for extended attributes

Samba is software designed for Linux/Unix systems, serving file sharing to Windows systems. In Windows systems, many files and directories have additional metadata, for example, file authors, keywords, and icon positions. This information is typically stored outside the POSIX file system and requires xattr format for storage in Windows. To ensure that these files can be correctly stored in Linux systems, Samba recommends using file systems that support extended attributes when creating shares.

## Step 3: Create a Samba share

Assuming the JuiceFS mount point is `/mnt/myjfs`, if you want to create a Samba share for the `media` directory within it, you can configure it as follows:

```ini
[Media]
    path = /mnt/myjfs/media
    guest ok = no
    read only = no
    browseable = yes
```

## Share for macOS

Apple macOS systems support direct access to Samba shares. Similar to Windows, macOS also has additional metadata (e.g., icon positions, Spotlight search) that needs to be saved using xattr. Samba version 4.9 and above have the support for macOS extended attributes enabled by default.

If your Samba version is lower than 4.9, you need to add the `ea support = yes` option to the [global] section of the Samba configuration to enable extended attribute support for macOS. Edit the configuration file `/etc/samba/smb.conf`, for example:

```ini
[global]
    workgroup = SAMBA
    security = user
    passdb backend = tdbsam
    ea support = yes
```

## User management in Samba

Samba has its own user database, independent of the operating system users. However, since Samba shares directories from the system, appropriate user permissions are required to read and write files.

### Create Samba users

When creating users for Samba, it is required that the user already exists in the system, as Samba will automatically map the Samba user to the same-named system user with corresponding permissions.

- If the user already exists in the system, assuming the system account is "herald," you can create a Samba account for it as follows:

    ```shell
    sudo smbpasswd -a herald
    ```

    Follow the on-screen prompts to set the password. The Samba account can have a different password than the system user.

- If you need to create a new user, taking the example of creating a user named "abc":

    1. Create a user:

        ```shell
        sudo adduser abc
        ```

    2. Create a corresponding Samba user with the same name:

        ```shell
        sudo smbpasswd -a abc
        ```

### View created Samba users

`pdbedit` is a built-in tool in Samba used to manage the Samba user database. You can use this tool to list all the created Samba users:

```shell
sudo pdbedit -L
```

It will display a list of all created Samba users, including their usernames, security identifiers (SIDs), group membership, and other related information.


================================================
FILE: docs/en/deployment/webdav.md
================================================
---
title: Deploy WebDAV Server
sidebar_position: 5
---

WebDAV is an extension of the HTTP protocol, a sharing protocol that facilitates collaborative editing and management of documents on a network between multiple users. WebDAV client support is built into many tools involved in file editing and synchronization, macOS Finder, and the file managers of some Linux distributions.

JuiceFS supports accessing through the WebDAV protocol, which is very convenient for macOS and other operating systems that do not have native FUSE support.

## Pre-requisites

Before you can configure a WebDAV server, you need to [create a JuiceFS file system](../getting-started/standalone.md#juicefs-format).

## Anonymous WebDAV

For security insensitive environments such as standalone or intranet, anonymous WebDAV without authentication can be configured with the following command format.

```shell
juicefs webdav META-URL LISTENING-ADDRESS:PORT
```

For example, enable the WebDAV access protocol for a JuiceFS file system:

```shell
sudo juicefs webdav sqlite3://myjfs.db 192.168.1.8:80
```

WebDAV server needs to be accessed through the set listening address and port, such as the above example uses the IP address `192.168.1.8` of the intranet, and the standard Web port number `80`, when accessing without specifying the port, directly access `http://192.168.1.8`.

If you use another port number, you need to specify it explicitly in the address, for example, if you listen to port `9007`, the access address should be `http://192.168.1.8:9007`.

:::tip
Do not use "Guest" identity when accessing anonymous WebDAV using macOS's Finder. Please use "Registered User" identity, user name can enter any character, password can be empty, and then connect directly.
:::

## WebDAV with authentication

:::info
JuiceFS v1.0.3 and previous versions do not support authentication features.
:::

The WebDAV authentication feature of JuiceFS requires setting the user name (`WEBDAV_USER`) and password (`WEBDAV_PASSWORD`) through environment variables, e.g.:

```shell
export WEBDAV_USER=user
export WEBDAV_PASSWORD=mypassword
sudo juicefs webdav sqlite3://myjfs.db 192.168.1.8:80
```

## Enable HTTPS support

JuiceFS supports configuring WebDAV server protected by the HTTPS protocol, specifying certificates and private keys through `--cert-file` and `--key-file` options, either using a certificate issued by a trusted digital certificate authority CA or using OpenSSL to create self-signed certificate.

### Self-signed certificate

To create a private key and certificate using OpenSSL.

1. Generate server private key

   ```shell
   openssl genrsa -out client.key 4096
   ```

2. Generate Certificate Signing Request (CSR)

   ```shell
   openssl req -new -key client.key -out client.csr
   ```

3. Issuing certificates using CSR

   ```shell
   openssl x509 -req -days 365 -in client.csr -signkey client.key -out client.crt
   ```

The above command will produce the following files in the current directory:

- `client.key`: Server private Key
- `client.csr`: Certificate Signing Request file
- `client.crt`: Self-signed certificate

To create a WebDAV server you need to use `client.key` and `client.crt`, e.g.

```shell
sudo juicefs webdav \
   --cert-file ./client.crt \
   --key-file ./client.key \
   sqlite3://myjfs.db 192.168.1.8:443
```

With HTTPS support enabled, the listening port number can be changed to the standard HTTPS port number `443`, and then the `https://` protocol is used instead, so that the port number does not need to be specified when accessing, for example: `https://192.168.1.8`.

Likewise, if a non-HTTPS standard port number is set, it should be explicitly specified in the access address, e.g., if you set a port to listen on `9999`, the access address should be `https://192.168.1.8:9999`.


================================================
FILE: docs/en/development/contributing_guide.md
================================================
---
title: Contributing Guide
sidebar_position: 1
description: JuiceFS is open source software and the code is contributed and maintained by developers worldwide. Learn how to participate in this article.
---

## Guidelines

- Before starting work on a feature or bug fix, search GitHub or reach out to us via GitHub or Slack, make sure no one else is already working on it and we'll ask you to open a GitHub issue if necessary.
- Before contributing, use the GitHub issue to discuss the feature and reach an agreement with the core developers.
- For major feature updates, write a design document to help the community understand your motivation and solution.
- Find issues with the label ["kind/good-first-issue"](https://github.com/juicedata/juicefs/labels/kind%2Fgood-first-issue) or ["kind/help-wanted"](https://github.com/juicedata/juicefs/labels/kind%2Fhelp-wanted).

Read [internals](./internals.md) for important data structure references.

## Coding style

- We're following ["Effective Go"](https://go.dev/doc/effective_go) and ["Go Code Review Comments"](https://github.com/golang/go/wiki/CodeReviewComments).
- Use `go fmt` to format your code before committing. You can find information in editor support for Go tools in ["IDEs and Plugins for Go"](https://github.com/golang/go/wiki/IDEsAndTextEditorPlugins).
- Every new source file must begin with a license header.
- Install [pre-commit](https://pre-commit.com) and use it to set up a pre-commit hook for static analysis. Just run `pre-commit install` in the root of the repo.

## Sign the CLA

Before you can contribute to JuiceFS, you will need to sign the [Contributor License Agreement](https://cla-assistant.io/juicedata/juicefs). There're a CLA assistant to guide you when you first time submit a pull request.

## What is a good PR

- Presence of unit tests
- Adherence to the coding style
- Adequate in-line comments
- Explanatory commit message

## Contribution flow

1. Create a topic branch from where to base the contribution. This is usually `main`.
1. Make commits of logical units.
1. Make sure commit messages are in the proper format.
1. Push changes in a topic branch to a personal fork of the repository.
1. Submit a pull request to [`juicedata/juicefs`](https://github.com/juicedata/juicefs/compare). The PR should link to one issue which either created by you or others.
1. The PR must receive approval from at least one maintainer before it be merged.


================================================
FILE: docs/en/development/internals.md
================================================
---
title: Internals
sidebar_position: 4
slug: /internals
---

This article introduces implementation details of JuiceFS, use this as a reference if you'd like to contribute. The content below is based on JuiceFS v1.0.0, metadata version v1.

Before digging into source code, you should read [Data Processing Workflow](../introduction/io_processing.md).

## Keyword Definition

High level concepts:

- File system: i.e. JuiceFS Volume, represents a separate namespace. Files can be moved freely within the same file system, while data copies are required between different file systems.
- Metadata engine: A supported database instance of your choice, that stores and manages file system metadata. There are three categories of metadata engines currently supported by JuiceFS.
  - Redis: Redis and various protocol-compatible services
  - SQL: MySQL, PostgreSQL, SQLite, etc.
  - TKV: TiKV, BadgerDB, etc.
- Datastore: Object storage service that stores and manages file system data, such as Amazon S3, Aliyun OSS, etc. It can also be served by other storage systems that are compatible with object storage semantics, such as local file systems, Ceph RADOS, TiKV, etc.
- Client: can be in various forms, such as mount process, S3 gateway, WebDAV server, Java SDK, etc.
- File: refers to all types of files in general in this documentation, including regular files, directory files, link files, device files, etc.
- Directory: is a special kind of file used to organize the tree structure, and its contents are an index to a set of other files.

Low level concepts (learn more at [Data Processing Workflow](../introduction/io_processing.md)):

- Chunk: Logical concept, file is split into 64MiB chunks, allowing fast lookups during file reads;
- Slice: Logical concept, basic unit for file writes. Block's purpose is to improve read speed, and slice exists to improve file edits and random writes. All file writes are assigned a new or existing slice, and when file is read, what application sees is the consolidated view of all slices.
- Block: A chunk contains one or more blocks (4MiB by default), block is the basic storage unit in object storage. JuiceFS Client reads multiple blocks concurrently which greatly improves read performance. Apart from this, block is also the basic storage unit on disk cache, so this design improves cache eviction efficiency. Apart from this, block is immutable, all file edits is achieved through new blocks: after file edit, new blocks are uploaded to object storage, and new slices are appended to the slice list in the corresponding file metadata;

## Learn source code  {#source-code-structure}

Assuming you're already familiar with Go, as well as [JuiceFS architecture](https://juicefs.com/docs/community/architecture), this is the overall code structure:

* [`cmd`](https://github.com/juicedata/juicefs/tree/main/cmd) is the top-level entrance, all JuiceFS functionalities is rooted here, e.g. the `juicefs format` command resides in `cmd/format.go`；
* [`pkg`](https://github.com/juicedata/juicefs/tree/main/pkg) is actual implementation:
  * `pkg/fuse/fuse.go` provides abstract FUSE API;
  * `pkg/vfs` contains actual FUSE implementation, Metadata requests are handled in `pkg/meta`, read requests are handled in `pkg/vfs/reader.go` and write requests are handled by `pkg/vfs/writer.go`;
  * `pkg/meta` directory is the implementation of all metadata engines, where:
    * `pkg/meta/interface.go` is the interface definition for all types of metadata engines
    * `pkg/meta/redis.go` is the interface implementation of Redis database
    * `pkg/meta/sql.go` is the interface definition and general interface implementation of relational database, and the implementation of specific databases is in a separate file (for example, the implementation of MySQL is in `pkg/meta/sql_mysql.go`)
    * `pkg/meta/tkv.go` is the interface definition and general interface implementation of the KV database, and the implementation of a specific database is in a separate file (for example, the implementation of TiKV is in `pkg/meta/tkv_tikv.go`)
  * `pkg/object` contains all object storage integration code;
* [`sdk/java`](https://github.com/juicedata/juicefs/tree/main/sdk/java) is the Hadoop Java SDK, it uses `sdk/java/libjfs` through JNI.

## FUSE interface implementation {#fuse-interface-implementation}

JuiceFS implements a userspace file system based on [FUSE](https://en.wikipedia.org/wiki/Filesystem_in_Userspace) (Filesystem in Userspace), and the implementation library [`libfuse`](https://github.com/libfuse/libfuse) provides two APIs: high-level API and low-level API, where the high-level API is based on file name and path, and the low-level API is based on inode.

JuiceFS is implemented based on low-level API (in fact JuiceFS does not depend on `libfuse`, but [`go-fuse`](https://github.com/hanwen/go-fuse)), because this is the same set of APIs used by kernel VFS when interacting with FUSE. If JuiceFS were to use high level API, it'll have to implement the VFS tree within `libfuse`, and then expose path based API. This method works better for systems that already expose path based APIs (e.g. HDFS, S3). If metadata itself implements file / directory tree based on inode, the inode → path → inode conversions will have an impact on performance (this is the reason why FUSE API for HDFS doesn't perform well). JuiceFS Metadata directly implements file tree and API based on inode, so naturally it uses FUSE low level API.

## Metadata Structure

File systems are usually organized in a tree structure, where nodes represent files and edges represent directory containment relationships. There are more than ten metadata structures in JuiceFS. Most of them are used to maintain the organization of file tree and properties of individual nodes, while the rest are used to manage system configuration, client sessions, asynchronous tasks, etc. All metadata structures are described below.

### General Structure

#### Setting

It is created when the `juicefs format` command is executed, and some of its fields can be modified later by the `juicefs config` command. The structure is specified as follows.

```go
type Format struct {
    Name             string
    UUID             string
    Storage          string
    Bucket           string
    AccessKey        string `json:",omitempty"`
    SecretKey        string `json:",omitempty"`
    SessionToken     string `json:",omitempty"`
    BlockSize        int
    Compression      string `json:",omitempty"`
    Shards           int    `json:",omitempty"`
    HashPrefix       bool   `json:",omitempty"`
    Capacity         uint64 `json:",omitempty"`
    Inodes           uint64 `json:",omitempty"`
    EncryptKey       string `json:",omitempty"`
    KeyEncrypted     bool   `json:",omitempty"`
    TrashDays        int    `json:",omitempty"`
    MetaVersion      int    `json:",omitempty"`
    MinClientVersion string `json:",omitempty"`
    MaxClientVersion string `json:",omitempty"`
    EnableACL        bool
}
```

- Name: name of the file system, specified by the user when formatting
- UUID: unique ID of the file system, automatically generated by the system when formatting
- Storage: short name of the object storage used to store data, such as `s3`, `oss`, etc.
- Bucket: the bucket path of the object storage
- AccessKey: access key used to access the object storage
- SecretKey: secret key used to access the object storage
- SessionToken: session token used to access the object storage, as some object storage supports the use of temporary token to obtain permission for a limited time
- BlockSize: size of the data block when splitting the file (the default is 4 MiB)
- Compression: compression algorithm that is executed before uploading data blocks to the object storage (the default is no compression)
- Shards: number of buckets in the object storage, only one bucket by default; when Shards > 1, data objects will be randomly hashed into Shards buckets
- HashPrefix: whether to set a hash prefix for the object name, false by default
- Capacity: quota limit for the total capacity of the file system
- Inodes: quota limit for the total number of files in the file system
- EncryptKey: the encrypted private key of the data object, which can be used only if the data encryption function is enabled
- KeyEncrypted: whether the saved key is encrypted or not, by default the SecretKey, EncryptKey and SessionToken will be encrypted
- TrashDays: number of days the deleted files are kept in trash, the default is 1 day
- MetaVersion: the version of the metadata structure, currently V1 (V0 and V1 are the same)
- MinClientVersion: the minimum client version allowed to connect, clients earlier than this version will be denied
- MaxClientVersion: the maximum client version allowed to connect
- EnableACL: enable ACL or not

This structure is serialized into JSON format and stored in the metadata engine.

#### Counter

Maintains the value of each counter in the system and the start timestamps of some background tasks, specifically

- usedSpace: used capacity of the file system
- totalInodes: number of used files in the file system
- nextInode: the next available inode number (in Redis, the maximum inode number currently in use)
- nextChunk: the next available sliceId (in Redis, the largest sliceId currently in use)
- nextSession: the maximum SID (sessionID) currently in use
- nextTrash: the maximum trash inode number currently in use
- nextCleanupSlices: timestamp of the last check on the cleanup of residual slices
- lastCleanupSessions: timestamp of the last check on the cleanup of residual stale sessions
- lastCleanupFiles: timestamp of the last check on the cleanup of residual files
- lastCleanupTrash: timestamp of the last check on the cleanup of trash

#### Session

Records the session IDs of clients connected to this file system and their timeouts. Each client sends a heartbeat message to update the timeout, and those who have not updated for a long time will be automatically cleaned up by other clients.

:::tip
Read-only clients cannot write to the metadata engine, so their sessions **will not** be recorded.
:::

#### SessionInfo

Records specific metadata of the client session so that it can be viewed with the `juicefs status` command. This is specified as

```go
type SessionInfo struct {
    Version    string // JuiceFS version
    HostName   string // Host name
    MountPoint string // path to mount point. S3 gateway and WebDAV server are "s3gateway" and "webdav" respectively
    ProcessID  int    // Process ID
}
```

This structure is serialized into JSON format and stored in the metadata engine.

#### Node

Records attribute information of each file, as follows

```go
type Attr struct {
    Flags     uint8  // reserved flags
    Typ       uint8  // type of a node
    Mode      uint16 // permission mode
    Uid       uint32 // owner id
    Gid       uint32 // group id of owner
    Rdev      uint32 // device number
    Atime     int64  // last access time
    Mtime     int64  // last modified time
    Ctime     int64  // last change time for meta
    Atimensec uint32 // nanosecond part of atime
    Mtimensec uint32 // nanosecond part of mtime
    Ctimensec uint32 // nanosecond part of ctime
    Nlink     uint32 // number of links (sub-directories or hardlinks)
    Length    uint64 // length of regular file

    Parent    Ino  // inode of parent; 0 means tracked by parentKey (for hardlinks)
    Full      bool // the attributes are completed or not
    KeepCache bool // whether to keep the cached page or not

    AccessACL  uint32 // access ACL id (identical ACL rules share the same access ACL ID.)
    DefaultACL uint32 // default ACL id (default ACL and the access ACL share the same cache and store)
}
```

There are a few fields that need clarification.

- Atime/Atimensec: See [`--atime-mode`](../reference/command_reference.mdx#mount-metadata-options)
- Nlink
  - Directory file: initial value is 2 ('.' and '..'), add 1 for each subdirectory
  - Other files: initial value is 1, add 1 for each hard link created
- Length
  - Directory file: fixed at 4096
  - Soft link (symbolic link) file: the string length of the path to which the link points
  - Other files: the length of the actual content of the file

This structure is usually encoded in binary format and stored in the metadata engine.

#### Edges

Records information on each edge in the file tree, as follows

```
parentInode, name -> type, inode
```

where parentInode is the inode number of the parent directory, and the others are the name, type, and inode number of the child files, respectively.

#### LinkParent

Records the parent directory of some files. The parent directory of most files is recorded in the Parent field of the attribute; however, for files that have been created with hard links, there may be more than one parent directory, so the Parent field is set to 0, and all parent inodes are recorded independently, as follows

```
inode -> parentInode, links
```

where links is the count of the parentInode, because multiple hard links can be created in the same directory, and these hard links share one inode.

#### Chunk

Records information on each Chunk, as follows

```
inode, index -> []Slices
```

where inode is the inode number of the file to which the Chunk belongs, and index is the number of all Chunks in the file, starting from 0. The Chunk value is an array of Slices. Each Slice represents a piece of data written by the client, and is appended to this array in the order of writing time. When there is an overlap between different Slices, the later Slice is used.

```go
type Slice struct {
    Pos  uint32 // offset of the Slice in the Chunk
    ID   uint64 // ID of the Slice, globally unique
    Size uint32 // size of the Slice
    Off  uint32 // offset of valid data in this Slice
    Len  uint32 // size of valid data in this Slice
}
```

This structure is encoded and saved in binary format, taking up 24 bytes.

#### SliceRef {#sliceref}

Records the reference count of a Slice, as follows

```
sliceId, size -> refs
```

Since the reference count of most Slices is 1, to reduce the number of related entries in the database, the actual value minus 1 is used as the stored count value in Redis and TKV. In this way, most of the Slices have a refs value of 0, and there is no need to create related entries in the database.

#### Symlink

Records the location of the softlink file, as follows

```
inode -> target
```

#### Xattr

Records extended attributes (Key-Value pairs) of a file, as follows

```
inode, key -> value
```

#### Flock

Records BSD locks (flock) of a file, specifically.

```
inode, sid, owner -> ltype
```

where `sid` is the client session ID, `owner` is a string of numbers, usually associated with a process, and `ltype` is the lock type, which can be 'R' or 'W'.

#### Plock

Record POSIX record locks (fcntl) of a file, specifically

```
inode, sid, owner -> []plockRecord
```

Here plock is a more fine-grained lock that can only lock a certain segment of the file.

```go
type plockRecord struct {
    ltype uint32 // lock type
    pid   uint32 // process ID
    start uint64 // start position of the lock
    end   uint64 // end position of the lock
}
```

This structure is encoded and stored in binary format, taking up 24 bytes.

#### DelFiles

Records the list of files to be cleaned. It is needed as data cleanup of files is an asynchronous and potentially time-consuming operation that can be interrupted by other factors.

```
inode, length -> expire
```

where length is the length of the file and expire is the time when the file was deleted.

#### DelSlices

Records delayed deleted Slices. When the Trash feature is enabled, old Slices deleted by the Slice Compaction will be kept for the same amount of time as the Trash configuration, to be available for data recovery if necessary.

```
sliceId, deleted -> []slice
```

where sliceId is the ID of the new slice after compaction, deleted is the timestamp of the compaction, and the mapped value is the list of all old slices that were compacted. Each slice only encodes its ID and size.

```go
type slice struct {
    ID   uint64
    Size uint32
}
```

This structure is encoded and stored in binary format, taking up 12 bytes.

#### Sustained

Records the list of files that need to be kept temporarily during the session. If a file is still open when it is deleted, the data cannot be cleaned up immediately, but needs to be held temporarily until the file is closed.

```
sid -> []inode
```

where `sid` is the session ID and the mapped value is the list of temporarily undeleted file inodes.

### Redis

The common format of keys in Redis is `${prefix}${JFSKey}`, where

- In standalone mode the prefix is an empty string, while in cluster mode it is a database number enclosed in curly braces, e.g. "{10}"
- JFSKey is the Key of different data structures in JuiceFS, which are listed in the subsequent subsections

In Redis Keys, integers (including inode numbers) are represented as decimal strings if not otherwise specified.

#### Setting {#redis-setting}

- Key: `setting`
- Value Type: String
- Value: file system formatting information in JSON format

#### Counter

- Key: counter name
- Value Type: String
- Value: value of the counter, which is actually an integer

#### Session

- Key: `allSessions`
- Value Type: Sorted Set
- Value: all non-read-only sessions connected to this file system. In Set,
  - Member: session ID
  - Score: timeout point of this session

#### SessionInfo

- Key: `sessionInfos`
- Value Type: Hash
- Value: basic meta-information on all non-read-only sessions. In Hash,
  - Key: session ID
  - Value: session information in JSON format

#### Node {#redis-node}

- Key: `i${inode}`
- Value Type: String
- Value: binary encoded file attribute

#### Edge {#redis-edge}

- Key: `d${inode}`
- Value Type: Hash
- Value: all directory entries in this directory. In Hash,
  - Key: file name
  - Value: binary encoded file type and inode number

#### LinkParent

- Key: `p${inode}`
- Value Type: Hash
- Value: all parent inodes of this file. in Hash.
  - Key: parent inode
  - Value: count of this parent inode

#### Chunk {#redis-chunk}

- Key: `c${inode}_${index}`
- Value Type: List
- Value: list of Slices, each Slice is binary encoded with 24 bytes

#### SliceRef

- Key: `sliceRef`
- Value Type: Hash
- Value: the count value of all Slices to be recorded. In Hash,
  - Key: `k${sliceId}_${size}`
  - Value: reference count of this Slice minus 1 (if the reference count is 1, the corresponding entry is generally not created)

#### Symlink

- Key: `s${inode}`
- Value Type: String
- Value: path that the symbolic link points to

#### Xattr

- Key: `x${inode}`
- Value Type: Hash
- Value: all extended attributes of this file. In Hash,
  - Key: name of the extended attribute
  - Value: value of the extended attribute

#### Flock

- Key: `lockf${inode}`
- Value Type: Hash
- Value: all flocks of this file. In Hash,
  - Key: `${sid}_${owner}`, owner in hexadecimal
  - Value: lock type, can be 'R' or 'W'

#### Plock {#redis-plock}

- Key: `lockp${inode}`
- Value Type: Hash
- Value: all plocks of this file. In Hash,
  - Key: `${sid}_${owner}`, owner in hexadecimal
  - Value: array of bytes, where every 24 bytes corresponds to a [plockRecord](#plock)

#### DelFiles

- Key：`delfiles`
- Value Type: Sorted Set
- Value: list of all files to be cleaned. In Set,
  - Member: `${inode}:${length}`
  - Score: the timestamp when this file was added to the set

#### DelSlices {#redis-delslices}

- Key: `delSlices`
- Value Type: Hash
- Value: all Slices to be cleaned. In Hash,
  - Key: `${sliceId}_${deleted}`
  - Value: array of bytes, where every 12 bytes corresponds to a [slice](#delslices)

#### Sustained

- Key: `session${sid}`
- Value Type: List
- Value: list of files temporarily reserved in this session. In List,
  - Member: inode number of the file

### SQL

Metadata is stored in different tables by type, and each table is named with `jfs_` followed by its specific structure name to form the table name, e.g. `jfs_node`. Some tables use `Id` with the `bigserial` type as primary keys to ensure that each table has a primary key, and the `Id` columns do not contain actual information.

#### Setting {#sql-setting}

```go
type setting struct {
    Name  string `xorm:"pk"`
    Value string `xorm:"varchar(4096) notnull"`
}
```

There is only one entry in this table with "format" as Name and file system formatting information in JSON as Value.

#### Counter

```go
type counter struct {
    Name  string `xorm:"pk"`
    Value int64  `xorm:"notnull"`
}
```

#### Session

```go
type session2 struct {
    Sid    uint64 `xorm:"pk"`
    Expire int64  `xorm:"notnull"`
    Info   []byte `xorm:"blob"`
}
```

#### SessionInfo

There is no separate table for this, but it is recorded in the `Info` column of `session2`.

#### Node {#sql-node}

```go
type node struct {
    Inode  Ino    `xorm:"pk"`
    Type   uint8  `xorm:"notnull"`
    Flags  uint8  `xorm:"notnull"`
    Mode   uint16 `xorm:"notnull"`
    Uid    uint32 `xorm:"notnull"`
    Gid    uint32 `xorm:"notnull"`
    Atime  int64  `xorm:"notnull"`
    Mtime  int64  `xorm:"notnull"`
    Ctime  int64  `xorm:"notnull"`
    Nlink  uint32 `xorm:"notnull"`
    Length uint64 `xorm:"notnull"`
    Rdev   uint32
    Parent Ino
    AccessACLId  uint32 `xorm:"'access_acl_id'"`
    DefaultACLId uint32 `xorm:"'default_acl_id'"`
}
```

Most of the fields are the same as [Attr](#node), but the timestamp precision is lower, i.e., Atime/Mtime/Ctime are in microseconds.

#### Edge {#sql-edge}

```go
type edge struct {
    Id     int64  `xorm:"pk bigserial"`
    Parent Ino    `xorm:"unique(edge) notnull"`
    Name   []byte `xorm:"unique(edge) varbinary(255) notnull"`
    Inode  Ino    `xorm:"index notnull"`
    Type   uint8  `xorm:"notnull"`
}
```

#### LinkParent

There is no separate table for this. All `Parent`s are found based on the `Inode` index in `edge`.

#### Chunk {#sql-chunk}

```go
type chunk struct {
    Id     int64  `xorm:"pk bigserial"`
    Inode  Ino    `xorm:"unique(chunk) notnull"`
    Indx   uint32 `xorm:"unique(chunk) notnull"`
    Slices []byte `xorm:"blob notnull"`
}
```

Slices are an array of bytes, and each [Slice](#chunk) corresponds to 24 bytes.

#### SliceRef

```go
type sliceRef struct {
    Id   uint64 `xorm:"pk chunkid"`
    Size uint32 `xorm:"notnull"`
    Refs int    `xorm:"notnull"`
}
```

#### Symlink

```go
type symlink struct {
    Inode  Ino    `xorm:"pk"`
    Target []byte `xorm:"varbinary(4096) notnull"`
}
```

#### Xattr

```go
type xattr struct {
    Id    int64  `xorm:"pk bigserial"`
    Inode Ino    `xorm:"unique(name) notnull"`
    Name  string `xorm:"unique(name) notnull"`
    Value []byte `xorm:"blob notnull"`
}
```

#### Flock

```go
type flock struct {
    Id    int64  `xorm:"pk bigserial"`
    Inode Ino    `xorm:"notnull unique(flock)"`
    Sid   uint64 `xorm:"notnull unique(flock)"`
    Owner int64  `xorm:"notnull unique(flock)"`
    Ltype byte   `xorm:"notnull"`
}
```

#### Plock {#sql-plock}

```go
type plock struct {
    Id      int64  `xorm:"pk bigserial"`
    Inode   Ino    `xorm:"notnull unique(plock)"`
    Sid     uint64 `xorm:"notnull unique(plock)"`
    Owner   int64  `xorm:"notnull unique(plock)"`
    Records []byte `xorm:"blob notnull"`
}
```

Records is an array of bytes, and each [plockRecord](#plock) corresponds to 24 bytes.

#### DelFiles

```go
type delfile struct {
    Inode  Ino    `xorm:"pk notnull"`
    Length uint64 `xorm:"notnull"`
    Expire int64  `xorm:"notnull"`
}
```

#### DelSlices {#sql-delslices}

```go
type delslices struct {
    Id      uint64 `xorm:"pk chunkid"`
    Deleted int64  `xorm:"notnull"`
    Slices  []byte `xorm:"blob notnull"`
}
```

Slices is an array of bytes, and each [slice](#delslices) corresponds to 12 bytes.

#### Sustained

```go
type sustained struct {
    Id    int64  `xorm:"pk bigserial"`
    Sid   uint64 `xorm:"unique(sustained) notnull"`
    Inode Ino    `xorm:"unique(sustained) notnull"`
}
```

### TKV

The common format of keys in TKV (Transactional Key-Value Database) is `${prefix}${JFSKey}`, where

- prefix is used to distinguish between different file systems, usually `${VolumeName}0xFD`, where `0xFD` is used as a special byte to handle cases when there is an inclusion relationship between different file system names. In addition, for databases that are not shareable (e.g. BadgerDB), the empty string is used as prefix.
- JFSKey is the JuiceFS Key for different data types, which is listed in the following subsections.

In TKV's Keys, all integers are stored in encoded binary form.

- inode and counter value occupy 8 bytes and are encoded with **small endian**.
- SID, sliceId and timestamp occupy 8 bytes and are encoded with **big endian**.

#### Setting {#tkv-setting}

```
setting -> file system formatting information in JSON format
```

#### Counter

```
C${name} -> counter value
```

#### Session

```
SE${sid} -> timestamp
```

#### SessionInfo

```
SI${sid} -> session information in JSON format
```

#### Node {#tkv-node}

```
A${inode}I -> encoded Attr
```

#### Edge {#tkv-edge}

```
A${inode}D${name} -> encoded {type, inode}
```

#### LinkParent

```
A${inode}P${parentInode} -> counter value
```

#### Chunk {#tkv-chunk}

```
A${inode}C${index} -> Slices
```

where index takes up 4 bytes and is encoded with **big endian**. Slices is an array of bytes, one [Slice](#chunk) per 24 bytes.

#### SliceRef

```
K${sliceId}${size} -> counter value
```

where size takes up 4 bytes and is encoded with **big endian**.

#### Symlink

```
A${inode}S -> target
```

#### Xattr

```
A${inode}X${name} -> xattr value
```

#### Flock

```
F${inode} -> flocks
```

where flocks is an array of bytes, one flock per 17 bytes.

```go
type flock struct {
    sid   uint64
    owner uint64
    ltype uint8
}
```

#### Plock {#tkv-plock}

```
P${inode} -> plocks
```

where plocks is an array of bytes and the corresponding plock is variable-length.

```go
type plock struct {
    sid     uint64
    owner     uint64
    size     uint32
    records []byte
}
```

where size is the length of the records array and every 24 bytes in records corresponds to one [plockRecord](#plock).

#### DelFiles

```
D${inode}${length} -> timestamp
```

where length takes up 8 bytes and is encoded with **big endian**.

#### DelSlices {#tkv-delslices}

```
L${timestamp}${sliceId} -> slices
```

where slices is an array of bytes, and one [slice](#delslices) corresponds to 12 bytes.

#### Sustained

```
SS${sid}${inode} -> 1
```

Here the Value value is only used as a placeholder.

## File Data Format

### Finding files by path

According to the design of [Edge](#edges), only the direct children of each directory are recorded in the metadata engine. When an application provides a path to access a file, JuiceFS needs to look it up level by level. Now suppose the application wants to open the file `/dir1/dir2/testfile`, then it needs to

1. search for the entry with name "dir1" in the Edge structure of the root directory (inode number is fixed to 1) and get its inode number N1
2. search for the entry with the name "dir2" in the Edge structure of N1 and get its inode number N2
3. search for the entry with the name "testfile" in the Edge structure of N2, and get its inode number N3
4. search for the [Node](#node) structure corresponding to N3 to get the attributes of the file

Failure in any of the above steps will result in the file pointed to by that path not being found.

### File data splitting

From the previous section, we know how to find the file based on its path and get its attributes. The metadata related to the contents of the file can be found based on the inode and size fields in the file properties. Now suppose a file has an inode of 100 and a size of 160 MiB, then the file has `(size-1) / 64 MiB + 1 = 3` Chunks, as follows.

```
 File: |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|_ _ _ _ _ _ _ _|
Chunk: |<---        Chunk 0        --->|<---        Chunk 1        --->|<-- Chunk 2 -->|
```

In standalone Redis, this means that there are 3 [Chunk Keys](#chunk), i.e.,`c100_0`, `c100_1` and `c100_2`, each corresponding to a list of Slices. These Slices are mainly generated when the data is written and may overwrite each other or may not fill the Chunk completely, so you need to traverse this list of Slices sequentially and reconstruct the latest version of the data distribution before using it, so that

1. the part covered by more than one Slice is based on the last added Slice
2. the part that is not covered by Slice is automatically zeroed, and is represented by sliceId = 0
3. truncate Chunk according to file size

Now suppose there are 3 Slices in Chunk 0

```go
Slice{pos: 10M, id: 10, size: 30M, off: 0, len: 30M}
Slice{pos: 20M, id: 11, size: 16M, off: 0, len: 16M}
Slice{pos: 16M, id: 12, size: 10M, off: 0, len: 10M}
```

It can be illustrated as follows (each '_' denotes 2 MiB)

```
   Chunk: |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
Slice 10:           |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
Slice 11:                     |_ _ _ _ _ _ _ _|
Slice 12:                 |_ _ _ _ _|

New List: |_ _ _ _ _|_ _ _|_ _ _ _ _|_ _ _ _ _|_ _|_ _ _ _ _ _ _ _ _ _ _ _|
               0      10      12         11    10             0
```

The reconstructed new list contains and only contains the latest data distribution for this Chunk as follows

```go
Slice{pos:   0, id:  0, size: 10M, off:   0, len: 10M}
Slice{pos: 10M, id: 10, size: 30M, off:   0, len:  6M}
Slice{pos: 16M, id: 12, size: 10M, off:   0, len: 10M}
Slice{pos: 26M, id: 11, size: 16M, off:  6M, len: 10M}
Slice{pos: 36M, id: 10, size: 30M, off: 26M, len:  4M}
Slice{pos: 40M, id:  0, size: 24M, off:   0, len: 24M} // can be omitted
```

### Data objects

#### Object naming {#object-storage-naming-format}

Block is the basic unit for JuiceFS to manage data. Its size is 4 MiB by default, and can be changed only when formatting a file system, within the interval [64 KiB, 16 MiB]. Each Block is an object in the object storage after upload, and is named in the format `${fsname}/chunks/${hash}/${basename}`, where

- fsname is the file system name
- "chunks" is a fixed string representing the data object of JuiceFS
- hash is the hash value calculated from basename, which plays a role in isolation management
- basename is the valid name of the object in the format of `${sliceId}_${index}_${size}`, where
  - sliceId is the ID of the Slice to which the object belongs, and each Slice in JuiceFS has a globally unique ID
  - index is the index of the object in the Slice it belongs to, by default a Slice can be split into at most 16 Blocks, so its value range is [0, 16)
  - size is the size of the Block, and by default it takes the value of (0, 4 MiB]

Currently there are two hash algorithms, and both use the sliceId in basename as the parameter. Which algorithm will be chosen to use follows the [HashPrefix](#setting) of the file system.

```go
func hash(sliceId int) string {
    if HashPrefix {
        return fmt.Sprintf("%02X/%d", sliceId%256, sliceId/1000/1000)
    }
    return fmt.Sprintf("%d/%d", sliceId/1000/1000, sliceId/1000)
}
```

Suppose a file system named `jfstest` is written with a continuous 10 MiB of data and internally given a SliceID of 1 with HashPrefix disabled, then the following three objects will be generated in the object storage.

```
jfstest/chunks/0/0/1_0_4194304
jfstest/chunks/0/0/1_1_4194304
jfstest/chunks/0/0/1_2_2097152
```

Similarly, now taking the 64 MiB chunk in the previous section as an example, its actual data distribution is as follows

```
 0 ~ 10M: Zero
10 ~ 16M: 10_0_4194304, 10_1_4194304(0 ~ 2M)
16 ~ 26M: 12_0_4194304, 12_1_4194304, 12_2_2097152
26 ~ 36M: 11_1_4194304(2 ~ 4M), 11_2_4194304, 11_3_4194304
36 ~ 40M: 10_6_4194304(2 ~ 4M), 10_7_2097152
40 ~ 64M: Zero
```

According to this, the client can quickly find the data needed for the application. For example, reading 8 MiB data at offset 10 MiB location will involve 3 objects, as follows

- Read the entire object from `10_0_4194304`, corresponding to 0 to 4 MiB of the read data
- Read 0 to 2 MiB from `10_1_4194304`, corresponding to 4 to 6 MiB of the read data
- Read 0 to 2 MiB from `12_0_4194304`, corresponding to 6 to 8 MiB of the read data

To facilitate obtaining the list of objects of a certain file, JuiceFS provides the `info` command, e.g. `juicefs info /mnt/jfs/test.tmp`.

```bash
objects:
+------------+---------------------------------+----------+---------+----------+
| chunkIndex |            objectName           |   size   |  offset |  length  |
+------------+---------------------------------+----------+---------+----------+
|          0 |                                 | 10485760 |       0 | 10485760 |
|          0 | jfstest/chunks/0/0/10_0_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/10_1_4194304 |  4194304 |       0 |  2097152 |
|          0 | jfstest/chunks/0/0/12_0_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/12_1_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/12_2_2097152 |  2097152 |       0 |  2097152 |
|          0 | jfstest/chunks/0/0/11_1_4194304 |  4194304 | 2097152 |  2097152 |
|          0 | jfstest/chunks/0/0/11_2_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/11_3_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/10_6_4194304 |  4194304 | 2097152 |  2097152 |
|          0 | jfstest/chunks/0/0/10_7_2097152 |  2097152 |       0 |  2097152 |
|        ... |                             ... |      ... |     ... |      ... |
+------------+---------------------------------+----------+---------+----------+
```

The empty objectName in the table means a file hole and is read as 0. As you can see, the output is consistent with the previous analysis.

It is worth mentioning that the 'size' here is size of the original data in the Block, rather than that of the actual object in object storage. The original data is written directly to object storage by default, so the 'size' is equal to object size. However, when data compression or data encryption is enabled, the size of the actual object will change and may no longer be the same as the 'size'.

#### Data compression

You can configure the compression algorithm (supporting `lz4` and `zstd`) with the `--compress <value>` parameter when formatting a file system, so that all data blocks of this file system will be compressed before uploading to object storage. The object name remains the same as default, and the content is the result of the compression algorithm, without any other meta information. Therefore, the compression algorithm in the [file system formatting Information](#setting) is not allowed to be modified, otherwise it will cause the failure of reading existing data.

#### Data encryption

The RSA private key can be configured to enable [static data encryption](../security/encryption.md) when formatting a file system with the `--encrypt-rsa-key <value>` parameter, which allows all data blocks of this file system to be encrypted before uploading to the object storage. The object name is still the same as default, while its content becomes a header plus the result of the data encryption algorithm. The header contains a random seed and the symmetric key used for decryption, and the symmetric key itself is encrypted with the RSA private key. Therefore, it is not allowed to modify the RSA private key in the [file system formatting Information](#setting), otherwise reading existing data will fail.

:::note
If both compression and encryption are enabled, the original data will be compressed and then encrypted before uploading to the object storage.
:::


================================================
FILE: docs/en/faq.md
================================================
---
title: FAQ
slug: /faq
---

## My question is not answered in the documentation

If you can't find an answer in the documentation, please try using the "Ask AI" feature (in the bottom right corner). If the AI assistant's answer helps you or provides a wrong answer, feel free to leave feedback on the response. Alternatively, use the document search feature (in the top right corner) and try searching with different keywords.

If these methods still do not resolve your question, you can join the [JuiceFS Community](https://juicefs.com/en/community) for further assistance.

## General Questions

### What's the difference between JuiceFS and XXX?

See ["Comparison with Others"](introduction/comparison/juicefs_vs_alluxio.md) for more information.

### How to upgrade JuiceFS client?

First unmount JuiceFS volume, then re-mount the volume with newer version client.

### Where is the JuiceFS log?

Different types of JuiceFS clients have different ways to obtain logs. For details, please refer to ["Client log"](administration/fault_diagnosis_and_analysis.md#client-log) document.

### Can JuiceFS directly read files that already exist in object storage?

JuiceFS cannot directly read files that already exist in object storage. Although JuiceFS typically uses object storage as the data storage layer, it is not a tool for accessing object storage in the traditional sense. You can refer to the [technical architecture](introduction/architecture.md) documentation for more details.

If you want to migrate existing data in an object storage bucket to JuiceFS, you can use [`JuiceFS Sync`](guide/sync.md).

### How can I combine multiple servers into a single JuiceFS file system for use?

No, while JuiceFS supports using local disks or SFTP as the underlying storage, it does not interfere with the logical structure management of the underlying storage. If you wish to consolidate storage space from multiple servers, you may consider using MinIO or Ceph to create an object storage cluster, and then create a JuiceFS file system on top of it.

## Metadata Related Questions

### Does support Redis in Sentinel or Cluster-mode as the metadata engine for JuiceFS?

Yes, There is also a [best practice document](administration/metadata/redis_best_practices.md) for Redis as the JuiceFS metadata engine for reference.

## Object Storage Related Questions

### Why doesn't JuiceFS support XXX object storage?

JuiceFS already supported many object storage, please check [the list](reference/how_to_set_up_object_storage.md#supported-object-storage) first. If this object storage is compatible with S3, you could treat it as S3. Otherwise, try reporting issue.

### Why do I delete files at the mount point, but there is no change or very little change in object storage footprint?

The first reason is that you may have enabled the trash feature. In order to ensure data security, the trash is enabled by default. The deleted files are actually placed in the trash and are not actually deleted, so the size of the object storage will not change. trash retention time can be specified with `juicefs format` or modified with `juicefs config`. Please refer to the ["Trash"](security/trash.md) documentation for more information.

The second reason is that JuiceFS deletes the data in the object storage asynchronously, so the space change of the object storage will be slower. If you need to immediately clean up the data in the object store that needs to be deleted, you can try running the [`juicefs gc`](reference/command_reference.mdx#gc) command.

### How Does JuiceFS Asynchronous Deletion Work?

* ​**When trash is disabled:**
  - The system checks whether the file is being opened by other processes:
    * If the file is in use, it is marked as ​**"deferred deletion (`sustained`)"** and will be processed after the program closes the file
    * If the file is not in use, it is marked as ​**pending deletion (`delfile`)** and attempts to place it into the ​**deletion queue (`maxDeleting`)**
  
* ​**When trash is enabled:**
  - The system creates subdirectories in the trash based on ​**current time (accurate to the hour)** (e.g., `2024-01-15-14`)
  - Files pending deletion are moved to the corresponding time-stamped directory:
    * ​**All chunks and slices of data remain intact**
    * Only the ​**parent directory pointer** in metadata changes
    * Filenames are ​**re-encoded** to prevent conflicts
  - A background task cleans expired files based on retention period:
    * Starts cleaning from the ​**oldest directory**
    * Method: Marked as ​**pending deletion (`delfile`)**, placed into the ​**deletion queue (`maxDeleting`)**
  
* ​**Deletion queue processing (asynchronous cleanup):**
  1. ​**Find all chunks corresponding to the file and delete them**
  2. Deleting chunks will ​**decrement the reference count of their slices**
  3. When a slice's reference count drops to zero, it becomes ​**`Pending Deleted Slices`**
  4. The background task cleans these data slices from object storage

![JuiceFS-delete-file](./images/juicefs-delete-file-English.svg)

* The deletion queue has a capacity limit. If too many files are deleted simultaneously, deletion requests will return immediately when the queue is full. Then a background cleanup task that runs hourly continues the cleanup. It finds all files marked as ​**pending deletion (`delfile`)** and cleans them using the same method as files in the deletion queue.
* If NoBGJob is configured, the hourly scheduled background cleanup task and trash cleanup task are disabled. After deleting files, manual cleanup is required in the trash.
* In a special scenario, when you manually delete files directly from the trash, it ensures synchronous insertion into the deletion queue, enabling relatively fast reclamation of object storage space. However, subsequent chunk cleanup remains asynchronous.
* Regarding slice reference count: Deleting chunks and compaction (`compact`) will decrease the reference count of related slices, while `clone` and `copyFileRange` will increase the reference count of related slices.

### Why is file system data size different from object storage usage? {#size-inconsistency}

* ["Random write in JuiceFS"](#random-write) produces data fragments, causing higher storage usage for object storage, especially after a large number of overwrites in a short period of time, many fragments will be generated. These fragments continue to occupy space in object storage until they are compacted and released. You shouldn't worry about this because JuiceFS checks for file compaction with every read/write, and cleans up in the client background job. Alternatively, you can manually trigger merges and garbage collection with [`juicefs gc --compact --delete`](./reference/command_reference.mdx#gc).
* If [Trash](./security/trash.md) is enabled, deleted files will be kept for a specified period of time, and then be garbage collected (all carried out in client background job).
* After data fragments are compacted, stale slices will be kept inside Trash as well (not visible to user), following the same expiration settings. To delete this type of data, read [Trash and stale slices](./security/trash.md#gc).
* If compression is enabled (the `--compress` parameter in the [`format`](./reference/command_reference.mdx#format) command, disabled by default), object storage usage may be smaller than the actual file size (depending on the compression ratio of different types of files).
* Different [storage class](reference/how_to_set_up_object_storage.md#storage-class) of the object storage may calculate storage usage differently. The cloud service provider may set the minimum billable size for some storage classes. For example, the [minimum billable size](https://www.alibabacloud.com/help/en/object-storage-service/latest/storage-fees) for Alibaba Cloud OSS IA storage is 64KB. If a file is smaller than 64KB, it will be calculated as 64KB.
* For self-hosted object storage services, for example MinIO, actual data usage is affected by [storage class settings](https://github.com/minio/minio/blob/master/docs/erasure/storage-class/README.md).

### Does JuiceFS support using a directory in object storage as the value of the `--bucket` option?

As of the release of JuiceFS 1.0, this feature is not supported.

### Does JuiceFS support accessing data that already exists in object storage?

As of the release of JuiceFS 1.0, this feature is not supported.

### Is it possible to bind multiple different object storages to a single file system (e.g. one file system with Amazon S3, GCS and OSS at the same time)?

No. However, you can set up multiple buckets associated with the same object storage service when creating a file system, thus solving the problem of limiting the number of individual bucket objects, for example, multiple S3 Buckets can be associated with a single file system. Please refer to [`--shards`](./reference/command_reference.mdx#format) option for details.

## Performance Related Questions

### How is the performance of JuiceFS?

JuiceFS is a distributed file system, the latency of metadata is determined by 1 (reading) or 2 (writing) round trip(s) between client and metadata service (usually 1-3 ms). The latency of first byte is determined by the performance of underlying object storage (usually 20-100 ms). Throughput of sequential read/write could be 50MB/s - 2800MiB/s (see [fio benchmark](benchmark/fio.md) for more information), depends on network bandwidth and how the data could be compressed.

JuiceFS is built with multiple layers of caching (invalidated automatically), once the caching is warmed up, the latency and throughput of JuiceFS could be close to local file system (having the overhead of FUSE).

### Does JuiceFS support random read/write? How? {#random-write}

Yes, including those issued using mmap. Currently JuiceFS is optimized for sequential reading/writing, and optimized for random reading/writing is work in progress. If you want better random read performance, it's best to turn off compression ([`--compress none`](reference/command_reference.mdx#format)).

JuiceFS does not store the original file in the object storage, but splits it into data blocks using a fixed size (4MiB by default), then uploads it to the object storage, and stores the ID of the data block in the metadata engine. When random write happens, the original metadata is marked stale, and then JuiceFS Client uploads the **new data block** to the object storage, then update the metadata accordingly.

When reading the data of the overwritten part, according to the **latest metadata**, it can be read from the **new data block** uploaded during random writing, and the **old data block** may be deleted by the background garbage collection tasks automatically clean up. This shifts complexity from random writes to reads.

Read [JuiceFS Internals](development/internals.md) and [Data Processing Flow](introduction/io_processing.md) to learn more.

### How to copy a large number of small files into JuiceFS quickly?

Use the [`--writeback` option](reference/command_reference.mdx#mount-data-cache-options) to write data to the local cache and then asynchronously upload it to the object storage backend. This is significantly faster than writing directly to object storage. See ["Write Cache in Client"](guide/cache.md#client-write-cache) for more information.

### Does JuiceFS support distributed cache?

[Distributed cache](https://juicefs.com/docs/cloud/guide/distributed-cache) is supported in our enterprise edition.

## Mount Related Questions

### Can I mount JuiceFS without `root` privileges?

Yes, JuiceFS could be mounted using `juicefs` without root privileges. The default directory for caching is `$HOME/.juicefs/cache` (macOS) or `/var/jfsCache` (Linux), you should change that to a directory which you have write permission.

See ["Read Cache in Client"](guide/cache.md#client-read-cache) for more information.

## Access Related Questions

### What other ways does JuiceFS offer to access data?

In addition to mounting, the following methods are also supported:

- Kubernetes CSI Driver: Use JuiceFS as the storage layer of Kubernetes cluster through the Kubernetes CSI Driver. For details, please refer to ["Use JuiceFS on Kubernetes"](deployment/how_to_use_on_kubernetes.md).
- Hadoop Java SDK: It is convenient to use a Java client compatible with the HDFS interface to access JuiceFS in the Hadoop ecosystem. For details, please refer to ["Use JuiceFS on Hadoop Ecosystem"](deployment/hadoop_java_sdk.md).
- S3 Gateway: Access JuiceFS through the S3 protocol. For details, please refer to ["Deploy JuiceFS S3 Gateway"](./guide/gateway.md).
- Docker Volume Plugin: A convenient way to use JuiceFS in Docker, please refer to ["Use JuiceFS on Docker"](deployment/juicefs_on_docker.md).
- WebDAV Gateway: Access JuiceFS via WebDAV protocol

### Why does the same username have different permissions on different hosts when accessing JuiceFS files?

Although a user has the same username on both hosts (for example, `alice` on host X and host Y), their user ID (UID) or group ID (GID) differs between them. File permissions in JuiceFS are based on these numeric IDs, not the username.

To confirm this, run the `id` command on each host and compare the output:

```bash
$ id alice
uid=1201(alice) gid=500(staff) groups=500(staff)
```

See [Sync Accounts between Multiple Hosts](administration/sync_accounts_between_multiple_hosts.md) to resolve this issue.

### Does JuiceFS S3 Gateway support advanced features such as multi-user management?

The built-in `gateway` subcommand does not support functions including as multi-user management, and provides only basic S3 gateway functions. If you need to use these advanced features, please refer to the [documentation](guide/gateway.md).

### Is there an SDK available for JuiceFS?

As of the release of JuiceFS 1.0, the community has two SDKs, one is the [Java SDK](deployment/hadoop_java_sdk.md) that is highly compatible with the HDFS interface officially maintained by Juicedata, and the other is the [Python SDK](https://github.com/megvii-research/juicefs-python) maintained by community users.


================================================
FILE: docs/en/getting-started/for_distributed.md
================================================
---
sidebar_position: 3
description: This article will guide you through building a distributed, shared-access JuiceFS file system using cloud-based object storage and databases.
---

# Distributed Mode

[The previous document](./standalone.md) introduces how to create a file system that can be mounted on any host using an *object storage* and an *SQLite* database. Since object storage is accessible by any computer with privileges on the network, you can also access the same JuiceFS file system on different computers by simply copying the SQLite database file to any computer that needs to access the storage.

However, this approach does not guarantee real-time file availability when the file system is shared. Since SQLite is a single file database that cannot be accessed by multiple computers at the same time, a database that supports network access is needed, such as Redis, PostgreSQL, or MySQL. This allows a file system to be mounted and read by multiple computers in a distributed environment.

In this document, a multi-user *cloud database* will be used to replace the single-user *SQLite* database used in the previous document. This aims to implement a distributed file system that can be mounted on any computer on the network for reading and writing.

## Network databases

A *network database* is one that allows multiple users to access it simultaneously through a network. From this perspective, databases can generally be classified as:

- **Standalone databases**: Single-file databases usually only accessed locally, such as SQLite and Microsoft Access.
- **Network databases**: Databases usually with complex multi-file structures, providing network-based access interfaces and supporting simultaneous access by multiple users, such as Redis and PostgreSQL.

JuiceFS currently supports the following network-based databases.

- **Key-value databases**: Redis, TiKV, etcd, and FoundationDB
- **Relational databases**: PostgreSQL, MySQL, and MariaDB

Different databases have different performance and stability. For example, Redis is an in-memory key-value database with excellent performance but relatively weak reliability, while PostgreSQL is a more reliable relational database with lower performance than in-memory databases.

A detailed guide on database selection will be available soon.

## Cloud databases

Cloud computing platforms usually offer a wide variety of cloud databases, such as Amazon RDS for various relational database versions and Amazon ElastiCache for Redis-compatible in-memory database products. These services allow to create a multi-copy and highly available database cluster by a simple initial setup.

Alternatively, you can also build your own database on the server.

For simplicity, we'll use Amazon ElastiCache for Redis as an example. The most basic information of a network database include:

- **Database address**: The database's access address, with different links for internal and external networks.
- **Username and password**: Authentication information used to access the database.

## Hands-on practice

### 1. Install the client

Install the JuiceFS client on all computers that need to mount the file system. Refer to the [Installation](installation.md) guide for details.

### 2. Prepare object storage

Here is a pseudo sample with Amazon S3 as an example. You can also use other object storage services. Refer to [JuiceFS Supported Storage](../reference/how_to_set_up_object_storage.md#supported-object-storage) for details.

- **Bucket Endpoint**: `https://myjfs.s3.us-west-1.amazonaws.com`
- **Access Key ID**: `ABCDEFGHIJKLMNopqXYZ`
- **Access Key Secret**: `ZYXwvutsrqpoNMLkJiHgfeDCBA`

### 3. Prepare the database

Here is a pseudo sample with Amazon ElastiCache for Redis as an example. You can also use other types of databases. Refer to [JuiceFS Supported Databases](../reference/how_to_set_up_metadata_engine.md) for details.

- **Database address**: `myjfs-sh-abc.apse1.cache.amazonaws.com:6379`
- **Database username**: `tom`
- **Database password**: `mypassword`

The format for using a Redis database in JuiceFS is as follows:

```
redis://<username>:<password>@<Database-IP-or-URL>:6379/1
```

:::tip
Redis versions lower than 6.0 do not have a username, so omit the `<username>` part in the URL. For example: `redis://:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1` (note that the colon before the password is a separator and must be included).
:::

### 4. Create a file system

To create a file system that supports cross-network, multi-server simultaneous mounts with shared read/write access using object storage and a Redis database, run:

```shell
juicefs format \
    --storage s3 \
    --bucket https://myjfs.s3.us-west-1.amazonaws.com \
    --access-key ABCDEFGHIJKLMNopqXYZ \
    --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \
    redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 \
    myjfs
```

Once the file system is created, you'll see an output similar to:

```shell
2021/12/16 16:37:14.264445 juicefs[22290] <INFO>: Meta address: redis://@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1
2021/12/16 16:37:14.277632 juicefs[22290] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/12/16 16:37:14.281432 juicefs[22290] <INFO>: Ping redis: 3.609453ms
2021/12/16 16:37:14.527879 juicefs[22290] <INFO>: Data uses s3://myjfs/myjfs/
2021/12/16 16:37:14.593450 juicefs[22290] <INFO>: Volume is formatted as {Name:myjfs UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:s3 Bucket:https://myjfs AccessKey:ABCDEFGHIJKLMNopqXYZ SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

:::info
Once the file system is created, all relevant information, including its name, object storage details, and access keys, are stored in the database. In this example, the file system information is stored in Redis, so any computer with the database address, username, and password information can mount and read the file system.
:::

### 5. Mount the file system

Since the file system's *data* and *metadata* are stored in cloud services, it can be mounted on any computer with a JuiceFS client installed for shared reads and writes at the same time. For example:

```shell
juicefs mount redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 ~/jfs
```

#### Strong data consistency guarantee

JuiceFS ensures *close-to-open* consistency. When multiple clients are reading and writing to the same file, changes made by client A may not be immediately visible to client B. Once client A closes the file, any other client, no matter whether the file is on the same node with A, is guaranteed to see the latest data upon reopening the file.

#### Increase cache size to improve performance

Since object storage is a network-based service, access latency is inevitable. To mitigate this, JuiceFS offers a caching mechanism, enabled by default. This allocates a portion of local storage as a buffer layer between your data and the object storage, asynchronously caching data to local storage when files are accessed. For more details, refer to [Cache](../guide/cache.md).

JuiceFS sets 100GiB cache in the `$HOME/.juicefs/cache` or `/var/jfsCache` directory by default. Setting a larger cache space on a faster SSD can effectively improve read and write performance of JuiceFS.

You can use `--cache-dir` to adjust the location of the cache directory and `--cache-size` to adjust the size of the cache space. For example:

```shell
juicefs mount
    --background \
    --cache-dir /mycache \
    --cache-size 512000 \
    redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 \
    ~/jfs
```

:::note
The JuiceFS process needs read and write permissions for the `--cache-dir` directory.
:::

The above command sets the cache directory in the `/mycache` directory and specifies the cache space as 500GiB.

#### Auto-mount on boot

In a Linux environment, you can set up automatic mounting when mounting a file system via the `--update-fstab` option, which adds the necessary options to mount JuiceFS to `/etc/fstab`. For example:

:::note
This feature requires JuiceFS version 1.1.0 or above.
:::

```bash
$ sudo juicefs mount --update-fstab --max-uploads=50 --writeback --cache-size 204800 redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 <MOUNTPOINT>
$ grep <MOUNTPOINT> /etc/fstab
redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 <MOUNTPOINT> juicefs _netdev,max-uploads=50,writeback,cache-size=204800 0 0
$ ls -l /sbin/mount.juicefs
lrwxrwxrwx 1 root root 29 Aug 11 16:43 /sbin/mount.juicefs -> /usr/local/bin/juicefs
```

Refer to [Mount JuiceFS at Boot Time](../administration/mount_at_boot.md) for more details.

### 6. Verify the file system

After the file system is mounted, you can use the `juicefs bench` command to perform basic performance tests and functional verification of the file system to ensure that the JuiceFS file system can be accessed normally and its performance meets expectations.

:::info
The `juicefs bench` command can only complete basic performance tests. If you need a more comprehensive evaluation of JuiceFS, refer to [JuiceFS Performance Evaluation Guide](../benchmark/performance_evaluation_guide.md).
:::

```shell
juicefs bench ~/jfs
```

This command writes and reads a specified number of large (1 by default) and small (100 by default) files to and from the JuiceFS file system according to the specified concurrency (1 by default). The command then measures the throughput and latency of read and write operations, as well as the latency of metadata engine access.

If you encounter any problems during the verification of the file system, refer to the [Fault Diagnosis and Analysis](../administration/fault_diagnosis_and_analysis.md) document for troubleshooting.

### 7. Unmount the file system

You can unmount the JuiceFS file system (assuming the mount point path is `~/jfs`) by the command `juicefs umount`.

```shell
juicefs umount ~/jfs
```

If the command fails to unmount the file system after execution, it will prompt `Device or resource busy`.

```shell
2021-05-09 22:42:55.757097 I | fusermount: failed to unmount ~/jfs: Device or resource busy
exit status 1
```

This failure happens probably because some programs are reading or writing files in the file system when executing the `unmount` command. To avoid data loss,first determine which processes are accessing files in the file system (for example, via the `lsof` command) and try to release the files before re-executing the `unmount` command.

:::caution
The following command may result in file corruption and loss. Proceed with caution!
:::

You can force unmounting by adding the `--force` or `-f` option if you're sure of the consequences:

```shell
juicefs umount --force ~/jfs
```


================================================
FILE: docs/en/getting-started/installation.md
================================================
---
title: Installation
sidebar_position: 1
description: Learn how to install JuiceFS on Linux, macOS, and Windows, including one-click installation, pre-compiled installation, and containerized deployment methods.
---

JuiceFS has good cross-platform capability and supports various operating systems across almost all major architectures, including but not limited to Linux, macOS, and Windows.

The JuiceFS client has only one binary file. You can either download the pre-compiled version to unzip it and use it directly, or manually compile it from the provided source code.

## One-click installation {#one-click-installation}

The one-click installation script is available for Linux and macOS systems. It automatically downloads and installs the latest version of the JuiceFS client based on your hardware architecture.

**Option 1 (Recommended):** Install to the default location `/usr/local/bin`:

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

**Option 2:** If you need to install to a custom location, for example `/tmp` directory:

```shell
curl -sSL https://d.juicefs.com/install | sh -s /tmp
```

:::tip
Most users should choose **Option 1** for the default installation. Only use **Option 2** if you have specific requirements for the installation directory.
:::

## Install the pre-compiled client {#install-the-pre-compiled-client}

You can download the latest version of the client at [GitHub](https://github.com/juicedata/juicefs/releases). Pre-compiled versions for different CPU architectures and operating systems are available in the download list of each client version. Please select the version that best suits your application. For example:

| File Name                            | Description                                                                                  |
|--------------------------------------|----------------------------------------------------------------------------------------------|
| `juicefs-x.y.z-darwin-amd64.tar.gz`  | For macOS systems with Intel chips                                                           |
| `juicefs-x.y.z-darwin-arm64.tar.gz`  | For macOS systems with M1 series chips                                                       |
| `juicefs-x.y.z-linux-amd64.tar.gz`   | For Linux distributions on x86 architecture                                                  |
| `juicefs-x.y.z-linux-arm64.tar.gz`   | For Linux distributions on ARM architecture                                                  |
| `juicefs-x.y.z-windows-amd64.tar.gz` | For Windows on x86 architecture                                                              |
| `juicefs-hadoop-x.y.z.jar`           | Hadoop Java SDK on x86 and ARM architectures (supports Linux, macOS, and Windows systems) |

### Linux {#linux}

For Linux systems with x86 architecture, download the file with the file name `linux-amd64` and execute the following commands in the terminal.

1. Get the latest version number:

   ```shell
   JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v')
   ```

2. Download the client to the current directory:

   ```shell
   wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz"
   ```

3. Unzip the installation package:

   ```shell
   tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz"
   ```

4. Install the client:

   ```shell
   sudo install juicefs /usr/local/bin
   ```

After completing the above 4 steps, execute the `juicefs` command in the terminal. If the client installation is successful, a help message will be returned.

:::info
If the terminal prompts `command not found`, it is probably because `/usr/local/bin` is not in your system's `PATH` environment variable. You can execute `echo $PATH` to see which executable paths are set in your system. Based on the returned result, select an appropriate path, adjust, and re-execute the installation command in step 4.
:::

#### Ubuntu PPA

JuiceFS also provides a [PPA](https://launchpad.net/~juicefs) repository, which makes it easy to install the latest version of the client on Ubuntu systems. Choose the corresponding PPA repository based on your CPU architecture:

- **x86 architecture**：`ppa:juicefs/ppa`
- **ARM architecture**：`ppa:juicefs/arm64`

For example, on a Ubuntu 22.04 system with x86 architecture, execute the following commands:

1. Add the PPA repository:

   ```shell
   sudo add-apt-repository ppa:juicefs/ppa
   ```

2. Update the package list:

   ```shell
   sudo apt-get update
   ```

3. Install the JuiceFS client:

   ```shell
   sudo apt-get install juicefs
   ```

#### Fedora Copr

JuiceFS also provides a [Copr](https://copr.fedorainfracloud.org/coprs/juicedata/juicefs) repository, which allows for easy installation of the latest version of the client on Red Hat and its derivatives. The supported systems currently include:

- **Amazonlinux 2023**
- **CentOS 8, 9**
- **Fedora 37, 38, 39, rawhide**
- **RHEL 7, 8, 9**

Taking Fedora 38 as an example, execute the following commands to install the client:

Enable the Copr repository:

```shell
sudo dnf copr enable -y juicedata/juicefs
```

Install the client:

```shell
sudo dnf install juicefs
```

#### Snapcraft

We have also packaged and released the [Snap version of the JuiceFS client](https://github.com/juicedata/juicefs-snapcraft) on the [Canonical Snapcraft](https://snapcraft.io) platform. For Ubuntu 16.04 and above and other operating systems that support Snap, you can install it using the following commands:

```shell
sudo snap install juicefs
```

Since Snap is a closed sandbox environment, it may affect the client's FUSE mount. You can remove the restriction by executing the following command. If you only need to use WebDAV and Gateway, there is no need to execute:

```shell
sudo ln -s -f /snap/juicefs/current/juicefs /snap/bin/juicefs
```

When there is a new version, execute the following command to update the client:

```shell
sudo snap refresh juicefs
```

#### AUR (Arch User Repository) {#aur}

JuiceFS also provides an [AUR](https://aur.archlinux.org/packages/juicefs) repository, which makes it convenient to install the latest version of the client on Arch Linux and its derivatives.

For systems using the Yay package manager, execute the following command to install the client:

```shell
yay -S juicefs
```

:::info
There are multiple JuiceFS client packages available on AUR. The following are versions officially maintained by JuiceFS:

- [`aur/juicefs`](https://aur.archlinux.org/packages/juicefs): A stable compiled version that fetches the latest stable source code and compiles it during installation.
- [`aur/juicefs-bin`](https://aur.archlinux.org/packages/juicefs-bin): A stable pre-compiled version that directly downloads and installs the latest stable pre-compiled program.
- [`aur/juicefs-git`](https://aur.archlinux.org/packages/juicefs-git): A development version that fetches the latest development source code and compiles it during installation.

:::

Additionally, you can manually compile and install using `makepkg`, as shown for an Arch Linux system:

Install dependencies:

```shell
sudo pacman -S base-devel git go
```

Clone the AUR repository to be packaged:

```shell
git clone https://aur.archlinux.org/juicefs.git
```

Navigate to the repository directory:

```shell
cd juicefs
```

Compile and install:

```shell
makepkg -si
```

### Windows {#windows}

Since Windows does not natively support the FUSE interface, you need to download and install [WinFsp](https://winfsp.dev) first in order to implement FUSE support.

   :::tip
   **[WinFsp](https://github.com/winfsp/winfsp)** is an open source Windows file system agent. It provides a FUSE emulation layer that allows JuiceFS clients to mount file systems on Windows systems for use.
   :::

There are three ways to use JuiceFS on Windows systems.

- [Using the pre-compiled Windows client](#using-the-pre-compiled-windows-client)
- [Using Scoop](#scoop)
- [Using the Linux client in WSL](#using-the-linux-client-in-wsl)

#### Using the pre-compiled Windows client

The Windows client of JuiceFS is also a standalone binary. After you download and extract it, you can run it right away.

Take the Windows 10 system as an example, download the file with the file name `windows-amd64`, unzip it, and get `juicefs.exe` which is the JuiceFS client binary.

For convenience, you can move `juicefs.exe` to `C:\Windows\System32`, so you can run the `juicefs` command directly from any directory in the command line.

If you prefer more flexible management of the JuiceFS client, you can create a `juicefs` folder under the `C:\` drive, place `juicefs.exe` inside it, and add `C:\juicefs` to your system's PATH environment variable. After restarting your system, you can use the `juicefs` command directly in terminals such as Command Prompt or PowerShell.

![Windows ENV path](../images/windows-path-en.png)

#### Using Scoop {#scoop}

If you have [Scoop](https://scoop.sh) installed in your Windows system, you can use the following command to install the latest version of the JuiceFS client:

```shell
scoop install juicefs
```

#### Using the Linux client in WSL

[WSL](https://docs.microsoft.com/en-us/windows/wsl/about) is short for Windows Subsystem for Linux, which is supported from Windows 10 version 2004 onwards or Windows 11. It allows you to run most of the command-line tools, utilities, and applications of GNU/Linux natively on a Windows system without incurring the overhead of a traditional virtual machine or dual-boot setup.

For details, see [Using JuiceFS on WSL](../tutorials/juicefs_on_wsl.md).

### macOS {#macos}

Since macOS does not support the FUSE interface by default, you need to install [macFUSE](https://osxfuse.github.io) to enable FUSE mounting. If FUSE mounting is not your primary use case, installing macFUSE is not required. You can also conveniently read and write data using JuiceFS through [WebDAV](../deployment/webdav.md), [Gateway](../guide/gateway.md), or the [Python SDK](../deployment/python_sdk.md).

:::tip
[macFUSE](https://github.com/osxfuse/osxfuse) is an open source file system enhancement tool that allows macOS to mount third-party file systems. It enables JuiceFS clients to mount file systems on macOS systems.
:::

#### Homebrew

If you have the [Homebrew](https://brew.sh) package manager installed on your system, you can install the JuiceFS client by executing the following command:

```shell
brew install juicefs
```

*For more information about this command, please refer to [Homebrew Formulae](https://formulae.brew.sh/formula/juicefs#default) page.*

#### Pre-compiled binary

You can also download the binary with the file name `darwin-amd64`. After downloading, unzip the file and install the program to any executable path on your system using the `install` command, for example:

```shell
sudo install juicefs /usr/local/bin
```

### Docker {#docker}

For those interested in using JuiceFS in a Docker container, a `Dockerfile` for building a JuiceFS client image is provided below. It can be used as a base to build a JuiceFS client image alone or packaged together with other applications.

```dockerfile
FROM ubuntu:20.04

RUN apt update && apt install -y curl fuse && \
    apt-get autoremove && \
    apt-get clean && \
    rm -rf \
    /tmp/* \
    /var/lib/apt/lists/* \
    /var/tmp/*

RUN set -x && \
    mkdir /juicefs && \
    cd /juicefs && \
    JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \
    curl -s -L "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" \
    | tar -zx && \
    install juicefs /usr/bin && \
    cd .. && \
    rm -rf /juicefs

CMD [ "juicefs" ]
```

## Manually compiling {#manually-compiling}

If there is no pre-compiled client versions that are suitable for your operating system, such as FreeBSD, you can manually compile the JuiceFS client.

One advantage of manual compilation is that you have priority access to various new features in JuiceFS development, but it requires some basic knowledge of software compilation.

:::tip
For users in China, in order to speed up the acquisition of Go modules, it is recommended to set the `GOPROXY` environment variable to the domestic mirror server by executing `go env -w GOPROXY=https://goproxy.cn,direct`. For details, see [Goproxy China](https://github.com/goproxy/goproxy.cn).
:::

### Unix-like client

Compiling clients for Linux, macOS, BSD and other Unix-like systems requires the following dependencies:

- [Go](https://golang.org) 1.20+
- GCC 5.4+

1. Clone the source code:

   ```shell
   git clone https://github.com/juicedata/juicefs.git
   ```

2. Enter the source code directory:

   ```shell
   cd juicefs
   ```

3. Switch to the desired branch, such as release v1.0.0:

   The source code uses the `main` branch by default. You can switch to any official release, for example, to the release `v1.0.0`:

   ```shell
   git checkout v1.0.0
   ```

   :::caution
   The development branch often involves large changes, so do not use the clients compiled in the "development branch" for the production environment.
   :::

4. Compile:

   ```shell
   make
   ```

   The compiled `juicefs` binary is located in the current directory.

### Compiling on Windows

To compile the JuiceFS client on Windows systems, you need to install the following dependencies:

- [WinFsp](https://github.com/winfsp/winfsp)
- [Go](https://golang.org) 1.20+
- GCC 5.4+

Among them, WinFsp and Go can be downloaded and installed directly. GCC needs to use a version provided by a third party, which can use [MinGW-w64](https://www.mingw-w64.org) or [Cygwin](https://www.cygwin.com). Here we take MinGW-w64 as an example.

On the [MinGW-w64 download page](https://www.mingw-w64.org/downloads), select a precompiled version for Windows, such as [mingw-builds-binaries](https://github.com/niXman/mingw-builds-binaries/releases). After downloading, extract it to the root directory of the `C` drive, then find PATH in the system environment variable settings and add the `C:\mingw64\bin` directory. After restarting the system, execute the `gcc -v` command in the command prompt or PowerShell. If you can see version information, it means that MingGW-w64 is successfully installed, and you can start compiling.

1. Clone and enter the project directory:

   ```shell
   git clone https://github.com/juicedata/juicefs.git && cd juicefs
   ```

2. Copy WinFsp headers:

   ```shell
   mkdir "C:\WinFsp\inc\fuse"
   ```

   ```shell
   copy .\hack\winfsp_headers\* C:\WinFsp\inc\fuse\
   ```

   ```shell
   dir "C:\WinFsp\inc\fuse"
   ```

   ```shell
   set CGO_CFLAGS=-IC:/WinFsp/inc/fuse
   ```

   ```shell
   go env -w CGO_CFLAGS=-IC:/WinFsp/inc/fuse
   ```

3. Compile the client:

   ```shell
   go build -ldflags="-s -w" -o juicefs.exe .
   ```

The compiled `juicefs.exe` binary program is located in the current directory. For convenience, it can be moved to the `C:\Windows\System32` directory, so that the `juicefs.exe` command can be used directly anywhere.

### Cross-compiling Windows clients on Linux

Compiling a specific version of the client for Windows is essentially the same as [Unix-like Client](#unix-like-client) and can be done directly on a Linux system. However, in addition to `go` and `gcc`, you also need to install [MinGW-w64](https://www.mingw-w64.org/downloads).

The latest version can be installed from software repositories on many Linux distributions. For example, on Ubuntu 20.04+, you can install `mingw-w64` with the following command:

```shell
sudo apt install mingw-w64
```

Compile the Windows client:

```shell
make juicefs.exe
```

The compiled client is a binary file named `juicefs.exe`, located in the current directory.

### Cross-compiling Linux clients on macOS

1. Clone and enter the project directory:

   ```shell
   git clone https://github.com/juicedata/juicefs.git && cd juicefs
   ```

2. Install dependencies:

   ```shell
   brew install FiloSottile/musl-cross/musl-cross
   ```

3. Compile the client:

   ```shell
   make juicefs.linux
   ```

## Uninstall {#uninstall}

The JuiceFS client has only one binary file, so it can be easily deleted once you find the location of the program. For example, to uninstall the client that is installed on the Linux system as described above, you only need to execute the following command:

```shell
sudo rm /usr/local/bin/juicefs
```

You can also check where the program is located by using the `which` command:

```shell
which juicefs
```

The path returned by the command is the location where the JuiceFS client is installed on your system. The uninstallation of the JuiceFS client on other operating systems follows the same way.


================================================
FILE: docs/en/getting-started/standalone.md
================================================
---
sidebar_position: 2
pagination_next: getting-started/for_distributed
description: Learn how to use JuiceFS in standalone mode, combining object storage and databases for efficient file system management.
---

# Standalone Mode

The JuiceFS file system is driven by both ["Object Storage"](../reference/how_to_set_up_object_storage.md) and ["Database"](../reference/how_to_set_up_metadata_engine.md). In addition to object storage, it also supports using local disks, WebDAV, and HDFS as underlying storage options. Therefore, you can create a standalone file system using local disks and SQLite database to get a quick overview of how JuiceFS works.

## Install the client

For Linux distributions and macOS users, you can quickly install the JuiceFS client using a one-click installation script:

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

For other operating systems and installation methods, refer to [Installation](installation.md).

Once installed successfully, executing the `juicefs` command in the terminal will return a help message regardless of the operating system.

## Create a file system {#juicefs-format}

### Basic concepts

The JuiceFS client provides the [`format`](../reference/command_reference.mdx#format) command to create a file system as follows:

```shell
juicefs format [command options] META-URL NAME
```

To format a file system, three types of information are required:

- **[command options]**: Specifies the storage medium for the file system. By default, the **local disk** is used with the path set to `"$HOME/.juicefs/local"`(on darwin/macOS), `"/var/jfs"`(on Linux), or `"C:/jfs/local"`(on Windows).
- **META-URL**: Defines the metadata engine, typically a URL or the file path of a database.
- **NAME**: The name of the file system.

:::tip
JuiceFS supports a wide range of storage media and metadata storage engines. For more details, see [JuiceFS supported storage media](../reference/how_to_set_up_object_storage.md) and [JuiceFS supported metadata storage engines](../reference/how_to_set_up_metadata_engine.md).
:::

### Hands-on practice

For example, on a Linux system, you can create a file system named `myjfs` with the following command:

```shell
juicefs format sqlite3://myjfs.db myjfs
```

After executing the command, you will receive an output similar to the following:

```shell {1,4}
2021/12/14 18:26:37.666618 juicefs[40362] <INFO>: Meta address: sqlite3://myjfs.db
[xorm] [info]  2021/12/14 18:26:37.667504 PING DATABASE sqlite3
2021/12/14 18:26:37.674147 juicefs[40362] <WARNING>: The latency to database is too high: 7.257333ms
2021/12/14 18:26:37.675713 juicefs[40362] <INFO>: Data use file:///Users/herald/.juicefs/local/myjfs/
2021/12/14 18:26:37.689683 juicefs[40362] <INFO>: Volume is formatted as {Name:myjfs UUID:d5bdf7ea-472c-4640-98a6-6f56aea13982 Storage:file Bucket:/Users/herald/.juicefs/local/ AccessKey: SecretKey: BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

This output shows that SQLite is being used as the metadata storage engine. The database file named `myjfs.db` is located in the current directory. It creates a table to store all the metadata for the `myjfs` file system.

![SQLite-info](../images/sqlite-info.png)

Since no storage-related options were specified, the file system uses the local disk by default, with the storage path set to `file:///Users/herald/.juicefs/local/myjfs/`.

## Mount the file system

### Basic concepts

The JuiceFS client provides the [`mount`](../reference/command_reference.mdx#mount) command to mount file systems in the following format:

```shell
juicefs mount [command options] META-URL MOUNTPOINT
```

Similar to the command of creating a file system, the following information is also required to mount a file system:

- `[command options]`: Specifies file system-related options. For example, `-d` enables background mounting.
- `META-URL`: Defines the metadata storage, typically a URL or file path of a database.
- `MOUNTPOINT`: Specifies a mount point of the file system.

:::tip
The mount point (`MOUNTPOINT`) on Windows systems should be an unused drive letter, such as `Z:` or `Y:`.
:::

### Hands-on practice

:::note
As SQLite is a single-file database, please pay attention to the path of the database file when mounting it. JuiceFS supports both relative and absolute paths.
:::

To mount the `myjfs` file system to the `~/jfs` folder, use the following command:

```shell
juicefs mount sqlite3://myjfs.db ~/jfs
```

![SQLite-mount-local](../images/sqlite-mount-local.png)

The client mounts the file system in the foreground by default. As you can see in the above image, the program keeps running in the current terminal. To unmount the file system, press <kbd>Ctrl</kbd> + <kbd>C</kbd> or close the terminal window.

To keep the file system mounted in the background, specify the `-d` or `--background` option when mounting. This allows you to mount the file system in the daemon:

```shell
juicefs mount sqlite3://myjfs.db ~/jfs -d
```

Next, any files stored in the `~/jfs` mount point will be split into specific blocks according to [How JuiceFS Stores Files](../introduction/architecture.md#how-juicefs-store-files) and stored in the `$HOME/.juicefs/local/myjfs` directory; the corresponding metadata will be stored in the `myjfs.db` database.

To unmount `~/jfs`, execute the following command:

```shell
juicefs umount ~/jfs
```

## Further exploration

The example above offers a quick local experience with JuiceFS and a basic understanding of its operation. For a more practical use case, you can use SQLite for metadata storage while replacing local storage with "object storage."

### Object storage

Object storage is a web storage service based on the HTTP protocol that offers simple APIs for access. It has a flat structure and is easy to scale and cost-effective, particularly suitable for storing large amounts of unstructured data. Almost all mainstream cloud computing platforms provide object storage services, such as Amazon S3, Alibaba Cloud OSS, and Backblaze B2.

JuiceFS supports almost all object storage services, see [JuiceFS supported storage media](../reference/how_to_set_up_object_storage.md).

To set up object storage:

1. Create a **Bucket** and get the Endpoint address.
2. Create the **Access Key ID** and **Access Key Secret**, which serve as the access keys for the Object Storage API.

Taking AWS S3 as an example, the created resources would look like the following:

- **Bucket Endpoint**: `https://myjfs.s3.us-west-1.amazonaws.com`
- **Access Key ID**: `ABCDEFGHIJKLMNopqXYZ`
- **Access Key Secret**: `ZYXwvutsrqpoNMLkJiHgfeDCBA`

:::note
The process of creating object storage may vary slightly from platform to platform. It is recommended to check the help manual of the corresponding cloud platform. In addition, some platforms may provide different Endpoint addresses for internal and external networks. Please choose the external network access for your application. This document illustrates accessing object storage from a local environment.
:::

### Hands-on practice

To create a JuiceFS file system using SQLite and Amazon S3 object storage:

:::note
If the `myjfs.db` file already exists, delete it first and then execute the following command.
:::

```shell
# Replace relevant options with the actual object storage being used
juicefs format --storage s3 \
    --bucket https://myjfs.s3.us-west-1.amazonaws.com \
    --access-key ABCDEFGHIJKLMNopqXYZ \
    --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \
    sqlite3://myjfs.db myjfs
```

The command above creates a file system using the same database name and file system name with the object storage options provided.

- `--storage`: Specifies the storage type, such as `oss` or `s3`.
- `--bucket`: Specifies the Endpoint address of the object storage.
- `--access-key`: Specifies the Object Storage Access Key ID.
- `--secret-key`: Specifies the Object Storage Access Key Secret.

Once created, you can mount the file system:

```shell
juicefs mount sqlite3://myjfs.db ~/jfs
```

The mount command is exactly the same as using the local storage because JuiceFS has already written the metadata of the object storage to the `myjfs.db` database. Therefore, you do not need to provide it again when mounting.

Compared with using local disks, the combination of SQLite and object storage is more practical. From an application perspective, this approach is equivalent to plugging an object storage with almost unlimited capacity into your local computer, allowing you to use cloud storage as a local disk.

Further, all the data of the file system is stored in the cloud-based object storage. Therefore, the `myjfs.db` database can be copied to other computers where JuiceFS clients are installed for mounting, reading, and writing. That is, any computer that can read the metadata database can mount and read/write the file system.

Obviously, it is difficult for a single file database like SQLite to be accessed by multiple computers at the same time. If SQLite is replaced by databases like Redis, PostgreSQL, and MySQL, which can be accessed by multiple computers at the same time through the network, it is possible to achieve distributed reads and writes on the JuiceFS file system.


================================================
FILE: docs/en/grafana_template.json
================================================
{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "datasource",
          "uid": "grafana"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "gnetId": 20794,
  "graphTooltip": 0,
  "id": 9,
  "links": [],
  "panels": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${datasource}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "type": "auto"
            },
            "inspect": false
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "s"
        },
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "uptime"
            },
            "properties": [
              {
                "id": "custom.cellOptions",
                "value": {
                  "mode": "gradient",
                  "type": "color-background"
                }
              },
              {
                "id": "color",
                "value": {
                  "mode": "continuous-YlBl"
                }
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 7,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 31,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "showHeader": true
      },
      "pluginVersion": "10.2.3",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "P3DC81DD2E812B130"
          },
          "editorMode": "code",
          "exemplar": false,
          "expr": "sort(max(juicefs_uptime{vol_name=\"$name\"}) by (${node_label}, juicefs_version))",
          "format": "table",
          "instant": true,
          "legendFormat": "__auto",
          "range": false,
          "refId": "A"
        }
      ],
      "title": "Uptime",
      "transformations": [
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "Time": true
            },
            "includeByName": {},
            "indexByName": {
              "Time": 0,
              "Value": 3,
              "${node_label}": 1,
              "juicefs_version": 2
            },
            "renameByName": {
              "Value": "uptime",
              "${node_label}": "",
              "juicefs_version": "version"
            }
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 7
      },
      "id": 2,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "avg(juicefs_used_space{vol_name=\"$name\"})",
          "format": "time_series",
          "instant": false,
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "Data Size",
          "refId": "A"
        }
      ],
      "title": "Data Size",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 7
      },
      "id": 4,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "avg(juicefs_used_inodes{vol_name=\"$name\"})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "Files",
          "refId": "A"
        }
      ],
      "title": "Files",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 7
      },
      "id": 5,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "editorMode": "code",
          "expr": "count by (juicefs_version) (juicefs_uptime{vol_name=\"$name\"})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{juicefs_version}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "Client Sessions",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 13
      },
      "id": 8,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_fuse_ops_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval]) < 5000000000) by (${node_label})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "Ops {{${node_label}}}",
          "refId": "A"
        }
      ],
      "title": "Operations",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "binBps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 13
      },
      "id": 7,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_fuse_written_size_bytes_sum{vol_name=\"$name\"}[$__rate_interval]) < 5000000000) by (${node_label})",
          "format": "time_series",
          "instant": false,
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "Write {{${node_label}}}",
          "refId": "A"
        },
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_fuse_read_size_bytes_sum{vol_name=\"$name\"}[$__rate_interval]) < 5000000000) by (${node_label})",
          "format": "time_series",
          "hide": false,
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "Read {{${node_label}}}",
          "refId": "B"
        }
      ],
      "title": "IO Throughput",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "µs"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 13
      },
      "id": 18,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_fuse_ops_durations_histogram_seconds_sum{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label},mp) * 1000000 / sum(rate(juicefs_fuse_ops_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label},mp)",
          "format": "time_series",
          "hide": false,
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "IO Latency",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 19
      },
      "id": 13,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_transaction_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}",
          "refId": "A"
        }
      ],
      "title": "Transactions",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "µs"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 19
      },
      "id": 14,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_transaction_durations_histogram_seconds_sum{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label},mp) * 1000000 / sum(rate(juicefs_transaction_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Transaction Latency",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 19
      },
      "id": 20,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_transaction_restart{vol_name=~\"$name\"}[$__rate_interval])) by (${node_label})",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "Restarts {{${node_label}}}",
          "refId": "A"
        }
      ],
      "title": "Transaction Restarts",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 25
      },
      "id": 15,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval])) by  (method)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{method}}",
          "refId": "A"
        },
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_object_request_errors{vol_name=\"$name\"}[$__rate_interval])) ",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "errors",
          "refId": "B"
        }
      ],
      "title": "Objects Requests",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "Bps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 25
      },
      "id": 17,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"PUT\",vol_name=\"$name\"}[$__rate_interval])) by  (${node_label},method)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{method}} {{${node_label}}}",
          "refId": "A"
        },
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"GET\",vol_name=\"$name\"}[$__rate_interval])) by  (${node_label},method)",
          "format": "time_series",
          "hide": false,
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{method}} {{${node_label}}}",
          "refId": "B"
        },
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_object_request_data_bytes{method=\"GET\",vol_name=\"$name\"}[$__rate_interval]))",
          "hide": false,
          "interval": "",
          "legendFormat": "Total",
          "refId": "C"
        }
      ],
      "title": "Objects Throughput",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "µs"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 25
      },
      "id": 16,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(rate(juicefs_object_request_durations_histogram_seconds_sum{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label}) * 1000000 / sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval])) by  (${node_label})",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}",
          "refId": "A"
        }
      ],
      "title": "Objects Latency",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 31
      },
      "id": 10,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_cpu_usage{vol_name=\"$name\"}[$__rate_interval])*100 < 1000) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Client CPU Usage",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 31
      },
      "id": 11,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "expr": "sum(juicefs_memory{vol_name=\"$name\"}) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Client Memory Usage",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 31
      },
      "id": 21,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "editorMode": "code",
          "expr": "sum(juicefs_go_goroutines{vol_name=\"$name\"}) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "Go threads",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 37
      },
      "id": 22,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(juicefs_blockcache_bytes{vol_name=\"$name\"}) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Block Cache Size",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "none"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 37
      },
      "id": 23,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(juicefs_blockcache_blocks{vol_name=\"$name\"}) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Block Cache Count",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percent"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 37
      },
      "id": 24,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_blockcache_hits{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp) *100 / (sum(rate(juicefs_blockcache_hits{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp) + sum(rate(juicefs_blockcache_miss{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp))",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "Hits {{${node_label}}}:{{mp}}",
          "refId": "A"
        },
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_blockcache_hit_bytes{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp) *100 / (sum(rate(juicefs_blockcache_hit_bytes{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp) + sum(rate(juicefs_blockcache_miss_bytes{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp))",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "HitBytes {{${node_label}}}:{{mp}}",
          "refId": "B"
        }
      ],
      "title": "Block Cache Hit Ratio",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 43
      },
      "id": 25,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_compact_size_histogram_bytes_count{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Compaction",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "binBps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 43
      },
      "id": 26,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(rate(juicefs_compact_size_histogram_bytes_sum{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Compacted Data",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 43
      },
      "id": 27,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "exemplar": true,
          "expr": "sum(juicefs_fuse_open_handlers{vol_name=\"$name\"}) by (${node_label},mp)",
          "format": "time_series",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "refId": "A"
        }
      ],
      "title": "Open File Handlers",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "normal"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "short"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 0,
        "y": 49
      },
      "id": 28,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "editorMode": "code",
          "expr": "juicefs_staging_blocks{vol_name=\"$name\"}",
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "Juicefs Staging Blocks",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "normal"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bytes"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 8,
        "y": 49
      },
      "id": 29,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "editorMode": "code",
          "exemplar": false,
          "expr": "juicefs_staging_block_bytes{vol_name=\"$name\"}",
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "Juicefs Staging Block Usage",
      "type": "timeseries"
    },
    {
      "datasource": {
        "uid": "$datasource"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "insertNulls": false,
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 6,
        "w": 8,
        "x": 16,
        "y": 49
      },
      "id": 30,
      "options": {
        "legend": {
          "calcs": [
            "lastNotNull"
          ],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "10.4.0",
      "targets": [
        {
          "datasource": {
            "uid": "$datasource"
          },
          "editorMode": "code",
          "exemplar": true,
          "expr": "sum(rate(juicefs_staging_block_delay_seconds{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp) / sum(rate(juicefs_object_request_durations_histogram_seconds_count{vol_name=\"$name\"}[$__rate_interval])) by (${node_label},mp) ",
          "hide": false,
          "legendFormat": "{{${node_label}}}:{{mp}}",
          "range": true,
          "refId": "A"
        }
      ],
      "title": "Juicefs Staging Block Delay",
      "type": "timeseries"
    }
  ],
  "refresh": "",
  "schemaVersion": 39,
  "tags": [],
  "templating": {
    "list": [
      {
        "current": {
          "selected": false,
          "text": "prometheus",
          "value": "ef7836b8-b451-4d56-ad1e-1838d429b738"
        },
        "hide": 0,
        "includeAll": false,
        "multi": false,
        "name": "datasource",
        "options": [],
        "query": "prometheus",
        "queryValue": "",
        "refresh": 1,
        "regex": "",
        "skipUrlSync": false,
        "type": "datasource"
      },
      {
        "current": {
          "selected": false,
          "text": "instance",
          "value": "instance"
        },
        "description": "Select the node label type based on deployment environment",
        "hide": 0,
        "includeAll": false,
        "multi": false,
        "label": "node label",
        "name": "node_label",
        "options": [
          {
            "selected": true,
            "text": "instance",
            "value": "instance"
          },
          {
            "selected": false,
            "text": "node",
            "value": "node"
          }
        ],
        "query": "instance,node",
        "queryValue": "",
        "skipUrlSync": false,
        "type": "custom"
      },
      {
        "current": {
          "selected": true,
          "text": "test1",
          "value": "test1"
        },
        "datasource": {
          "uid": "${datasource}"
        },
        "definition": "label_values(juicefs_uptime, vol_name)",
        "hide": 0,
        "includeAll": false,
        "multi": false,
        "name": "name",
        "options": [],
        "query": {
          "query": "label_values(juicefs_uptime, vol_name)",
          "refId": "StandardVariableQuery"
        },
        "refresh": 2,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
        "type": "query"
      }
    ]
  },
  "time": {
    "from": "now-5m",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ],
    "time_options": [
      "5m",
      "15m",
      "1h",
      "6h",
      "12h",
      "24h",
      "2d",
      "7d",
      "30d"
    ]
  },
  "timezone": "",
  "title": "JuiceFS Dashboard",
  "uid": "-hm07csGk",
  "version": 2,
  "weekStart": ""
}


================================================
FILE: docs/en/guide/cache.md
================================================
---
title: Cache
sidebar_position: 3
---

For a file system driven by a combination of object storage and database, cache is an important medium for interacting efficiently between the local client and the remote service. Read and write data can be loaded into the cache in advance or asynchronously, and then the client uploads to or prefetches from the remote service in the background. The use of caching technology can significantly reduce the latency of storage operations and increase data throughput compared to interacting with remote services directly.

JuiceFS provides various caching mechanisms including metadata caching, data read/write caching, etc.

:::tip Does my application really need cache?
Data caching will effectively improve the performance of random reads. For applications that require high random read performance (e.g. Elasticsearch, ClickHouse), it is recommended to use faster storage medium and allocate more space for cache.

Meanwhile, cache improve performance only when application needs to repeatedly read files, so if you know for sure you're in a scenario where data is only accessed once (e.g. data cleansing during ETL), you can safely turn off cache to prevent overhead.
:::

## Consistency {#consistency}

Distributed systems often need to make tradeoffs between cache and consistency. Due to the decoupled architecture of JuiceFS, think consistency problems in terms of metadata, file data (in object storage), and file data local cache.

For [metadata](#metadata-cache), the default configuration offers a "close-to-open" consistency guarantee, i.e. after a client modified and closed a file, other clients will see the latest state when they open this file again. Also, default mount option uses 1s of kernel metadata cache which provides decent performance for the usual cases. If your scenario demands higher level of cache performance, learn how to tune cache settings in below sections. In particular, the client (mount point) initiating file modifications will enjoy a stronger consistency, read [consistency exceptions](#consistency-exceptions) for more.

As for object storage, JuiceFS clients split files into data blocks (default 4MiB), each is assigned an unique ID and uploaded to object storage. Subsequent modifications on the file are carried out on new data blocks, and the original blocks remain unchanged. This guarantees consistency of the object storage data, because once the file is modified, clients will then read from the new data blocks, while the stale ones which will be deleted through [Trash](../security/trash.md) or compaction.

[Local file data cache](#client-read-cache) is object storage blocks downloaded into local disks. So consistency depends on the reliability of the disks, if data are tempered, clients will read bad data. To resolve this concern, choose an appropriate [`--verify-cache-checksum`](../reference/command_reference.mdx#mount-data-cache-options) strategy to ensure data integrity.

## Metadata cache {#metadata-cache}

As a userspace filesystem, JuiceFS metadata cache is both managed as kernel cache (via FUSE API), and maintained in client memory space.

### Metadata cache in kernel {#kernel-metadata-cache}

JuiceFS Client controls these kinds of metadata as kernel cache: attribute (file name, size, permission, mtime, etc.), entry (inode, name, and type. The word "entry" and "dir-entry" is used in parameter names, to further distinguish between file and directory). Use the following parameters to control TTL through FUSE:

```
# File attribute cache TTL in seconds, default to 1, improves getattr performance
--attr-cache=1

# File entry cache TTL in seconds, default to 1, improves lookup performance
--entry-cache=1

# Directory entry cache TTL in seconds, default to 1, improves lookup performance
--dir-entry-cache=1

# Negative lookup (return ENOENT) cache TTL in seconds，default to 0，improves lookup performance for non-existent files  or directories
--negative-entry-cache=1
```

Caching these metadata in kernel for 1 second really speeds up `lookup` and `getattr` calls.

Do note that `entry` cache is gradually built upon file access and may not contain a complete file list under directory, so `readdir` calls or `ls` command cannot utilize this cache, that's why `entry` cache only improves `lookup` performance. The meaning of `direntry` here is different from [kernel directory entry](https://www.kernel.org/doc/html/latest/filesystems/ext4/directory.html), `direntry` does not tell you the files under a directory, but rather, it's the same concept as `entry`, just distinguished based on whether it's a directory.

Real world scenarios scarcely require setting different values for `--entry-cache` and `--dir-entry-cache`, these options exist for theoretical possibilities like when directories seldom change while files change a lot, in that situation, you can use a higher `--dir-entry-cache` than `--entry-cache`.

### Metadata cache in client memory {#client-memory-metadata-cache}

When JuiceFS Client `open` a file, its file attributes are cached in client memory, this attribute cache includes not only the kernel cached file attributes like size, mtime, but also information specific to JuiceFS like [the relationship between file and chunks and slices](../introduction/architecture.md#how-juicefs-store-files).

To maintain the default close-to-open consistency, `open` calls will always query metadata service, bypassing local cache, modifications done by client A isn't guaranteed available immediately for client B, but once A closes file, all other clients (across different nodes) will see the latest state. File attribute cache isn't necessarily obtained through `open`, for example `tail -f` will periodically query attributes, in this case, latest state is fetched without reopening the file.

To utilize the memory metadata cache, use [`--open-cache`](../reference/command_reference.mdx#mount-metadata-cache-options) to specify its TTL, so that before cache expiration, `getattr` and `open` calls directly uses the slice information in client memory. These cached information avoids the overhead of querying metadata service on every call.

With `--open-cache` enabled, JuiceFS no longer operates under close-to-open consistency, but similar to kernel metadata cache, the client initiating the modifications can also actively invalidate client memory metadata cache, while other clients can only wait for expiration. That's why in order to maintain semantics, `--open-cache` is disabled by default. For read intensive (or read-only) scenarios, such as AI model training, it is recommended to set `--open-cache` according to the situation to further improve the read performance.

In comparison, JuiceFS Enterprise Edition provides richer functionalities around memory metadata cache (supports active invalidation). Read [Enterprise Edition documentation](https://juicefs.com/docs/cloud/guide/cache/#client-memory-metadata-cache) for more.

### Consistency exceptions {#consistency-exceptions}

The metadata cache in discussed above really only pertain to multi-client situations, which can be deemed as a "minimum consistency guarantee". But for the client initiating file changes, it's not hard to imagine that due to changes happening "locally", the client initiating changes will enjoy a higher level of consistency:

* For the mount point initiating changes, kernel cache is automatically invalidated upon modification. But when different mount points access and modify the same file, active kernel cache invalidation is only effective on the client initiating the modifications, other clients can only wait for expiration.
* When concurrent operations are performed on multiple mount points, if one client deletes and then recreates a file with the same name, other clients may still use the old file’s inode due to the kernel’s entry cache. As a result, they may fail to find the file or may read old content (if the trash feature is enabled), and must wait for the entry cache to expire. This situation does not fall under traditional “close-to-open” consistency semantics, because the file is no longer the same object.
* When a `write` call completes, the mount point itself will immediately see the resulting file length change (e.g. use `ls -al` to verify that file size is growing)——but this doesn't mean the changes have been committed, before `flush` finishes, these modifications will not be reflected onto the object storage, and other mount points cannot see the latest writes. Using methods like `fsync, fdatasync, close` will all result in `flush` calls that will persist the file changes and make them visible to other clients.
* As an extreme case of the previous situation, if you `write` successfully and have observed file size change in the current mount point, but the eventual `flush` fails for some reason, for example file system usage exceeds global quota, then the previously growing file size will suddenly be reduced, for example, dropped from 10M to 0, this often leads to misunderstanding that JuiceFS just emptied your files, while the reality is that the files haven't been successfully written from the beginning, the file size change that's only available to the current mount point is simply a preview of things, not an actual committed state.
* The mount point initiating changes have access to file change events, and can use tools like [`fswatch`](https://emcrisostomo.github.io/fswatch/) or [`Watchdog`](https://python-watchdog.readthedocs.io/en/stable). But the scope is obviously limited to the files changed within the mount point itself, i.e. files modified by A cannot be monitored by mount point B.
* Due to the fact that FUSE doesn't yet support inotify API, if you'd like to monitor file change events using libraries like [Watchdog](https://python-watchdog.readthedocs.io/en/stable), you can only achieve this via polling (e.g. [`PollingObserver`](https://python-watchdog.readthedocs.io/en/stable/_modules/watchdog/observers/polling.html#PollingObserver)).

## Read/Write buffer {#buffer-size}

The Read/Write buffer is a memory space allocated to the JuiceFS Client, size controlled by [`--buffer-size`](../reference/command_reference.mdx#mount-data-cache-options) which defaults to 300 (in MiB). Read/Write data all pass through this buffer, making it crucial for all I/O operations, that's why under large scale scenarios, increase buffer size is often used as a first step of optimization.

### Readahead and prefetch {#readahead-prefetch}

:::tip
To accurately describe the internal mechanism of JuiceFS Client, we use the term "readahead" and "prefetch" to refer to the two different behaviors that both download data ahead of time to increase read performance.
:::

When a file is sequentially read, JuiceFS Client performs what's called "readahead", which involves downloading data ahead of the current read offset. In fact, the similar behavior is already being built into the [Linux Kernel](https://www.halolinux.us/kernel-architecture/page-cache-readahead.html): when reading files, kernel dynamically settles on a readahead window based on the actual read behavior, and load file into the page cache. With JuiceFS being a network file system, the classic kernel readahead mechanism is simply not enough to bring the desired performance increase, that's why JuiceFS performs its own readahead on top of kernel readahead, using algorithm to "guess" the size of the readahead window (more aggressive than kernel's), and then download the object storage data in advance. The maximum readahead window size can be controlled by the `--max-readahead` parameter. In random read scenarios, you may consider setting it to 0 to disable readahead.

![readahead](../images/buffer-readahead.svg)

Apparently readahead is only good for sequential reads, that's why there's another similar mechanism called "prefetch": when a block is randomly read by a small offset range, the whole block is scheduled for download asynchronously.

![prefetch](../images/buffer-prefetch.svg)

This mechanism assumes that if a file is randomly read at a given range, then its adjacent content is also more likely to get read momentarily. This isn't necessarily true for various different types of applications, for example, if an application decides to read read a huge file in a very sparse fashion, i.e. read offsets are far from each other. In such case, prefetch isn't really useful and can cause serious read amplification, so if you are already familiar with the file system access pattern of your application, and concluded that prefetch isn't really needed, you can disable by using [`--prefetch=0`](../reference/command_reference.mdx#mount-data-cache-options).

Readahead and prefetch effectively increase sequential read and random read performance, but it also comes with read amplification, read ["Read amplification"](../administration/troubleshooting.md#read-amplification) for more information.

### Write {#buffer-write}

A successful `write` does not mean data is persisted: that's actually `flush`'s job. This is true for both local file systems, and JuiceFS file systems. In JuiceFS, `write` only commits changes to the buffer, from the writing mount point's POV, you may notice that file size is changing, but do not mistake this for persistence (this behavior is also covered in detail in [consistency exceptions](#consistency-exceptions)). To sum up, before `flush` actually finishes, changes are only kept inside the client buffer. Applications may explicitly invoke `flush`, but even without this, `flush` is automatically triggered when a pending slice's size exceed its chunk border, or have waited in the buffer for a certain amount of time.

Together with the previously introduced readahead mechanism, buffer function can be described in below diagram:

![read write buffer](../images/buffer-read-write.svg)

Buffer is shared by both read & write, obviously write is treated with higher priority, this implies the possibility of write getting in the way of read. For instance, if object storage bandwidth isn't enough to support write load, there'll be congestion:

![buffer congestion](../images/buffer-congestion.svg)

As illustrated above, a high write load puts too much pending slices inside the buffer, leaving little buffer space for readahead, file read will hence slow down. Due to a low upload speed, write may also fail due to `flush` timeouts.

### Observation and optimization {#buffer-observation}

Buffer is crucial to both read & write, as is already introduced in above sections, making `--buffer-size` the first optimization target when faced with large scale scenarios. But simply increasing buffer size is not enough and might cause other problems (like buffer congestion, illustrated in the above section). The size of the buffer should be smartly decided along with other performance options.

Before making any adjustments, we recommend running a [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) command to check the current buffer usage, and read below content to guide your tuning.

If you wish to improve sequential read speed, use larger `--max-readahead` and `--buffer-size` to expand the readahead window, all data blocks within the window will be concurrently fetched from object storage. Also keep in mind that, reading a single large file will never consume the full buffer, the space reserved for readahead is between 1/4 to 1/2 of the total buffer size. So if you noticed that `juicefs stats` indicates `buf` is already half full, while performing sequential read on a single large file, then it's time to increase `--buffer-size` to set a larger readahead window.

If you wish to improve write speed, and have already increased [`--max-uploads`](../reference/command_reference.mdx#mount-data-storage-options) for more upload concurrency, with no noticeable increase in upload traffic, consider also increasing `--buffer-size` so that concurrent threads may easier allocate memory for data uploads. This also works in the opposite direction: if tuning up `--buffer-size` didn't bring out an increase in upload traffic, you should probably increase `--max-uploads` as well.

The `--buffer-size` also controls the data upload size for each `flush` operation, this means for clients working in a low bandwidth environment, you may need to use a lower `--buffer-size` to avoid `flush` timeouts. Refer to ["Connection problems with object storage"](../administration/troubleshooting.md#io-error-object-storage) for troubleshooting under low internet speed.

## Data cache {#data-cache}

To improve performance, JuiceFS also provides various caching mechanisms for data, including page cache in the kernel, local file system cache in client host, and read/write buffer in client process itself. Read requests will try the kernel page cache, the client process buffer, and the local disk cache in turn. If the data requested is not found in any level of the cache, it will be read from the object storage, and also be written into every level of the cache asynchronously to improve the performance of the next access.

![JuiceFS-cache](../images/juicefs-cache.png)

### Kernel page cache {#kernel-data-cache}

Kernel will build page cache for opened files. If this file is not updated (i.e. `mtime` doesn't change) afterwards, it will be read directly from the page cache to achieve the best performance.

JuiceFS Client tracks a list of recently opened files. If file is opened again, client will check if file has been modified to decide whether the kernel page cache is valid, if file is already modified, all relevant page cache is invalidated on the next open, this ensures that the client can always read the latest data.

Repeated reads of the same file in JuiceFS can be extremely fast, with latencies as low as a few microseconds and throughput up to several GiBs per second.

### Kernel writeback-cache mode {#fuse-writeback-cache}

Starting from Linux kernel 3.15, FUSE supports [writeback-cache](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt) mode, the kernel will consolidate high-frequency random small (10-100 bytes) write requests to significantly improve its performance, but this comes with a side effect: sequential writes are also turned into random writes, hence sequential write performance is hindered, so only use it on intensive random write scenarios.

To enable writeback-cache mode, use the [`-o writeback_cache`](../reference/fuse_mount_options.md#writeback_cache) option when you [mount JuiceFS](../reference/command_reference.mdx#mount). Note that writeback-cache mode is not the same as [Client write data cache](#client-write-cache), the former is a kernel implementation while the latter happens inside the JuiceFS Client, read the corresponding section to learn their intended scenarios.

### Read cache in client {#client-read-cache}

The client will perform prefetch and cache automatically to improve sequence read performance according to the read mode in the application. Data will be cached in local file system, which can be any local storage device like HDD, SSD or even memory.

Data downloaded from object storage, as well as small data (smaller than a single block) uploaded to object storage will be cached by JuiceFS Client, without compression or encryption. To achieve better performance on application's first read, use [`juicefs warmup`](../reference/command_reference.mdx#warmup) to cache data in advance.

When '--writeback' is not enabled, if the file system where the cache directory is located is not working properly, the JuiceFS client can return an error and downgrade to direct access to object storage. In the case of enable '--writeback', if the file system where the cache directory is located is abnormal and the read operation is stuck (such as some kernel-mode network file system), then JuiceFS will also get stuck together. This requires you to tune the underlying file system behavior of the cache directory to fail fast.

Below are some important options for cache configuration (see [`juicefs mount`](../reference/command_reference.mdx#mount) for complete reference):

* `--prefetch`

  Concurrent prefetch of N (1 by default) blocks. Prefetching refers to randomly reading a segment of a file's block, and the client asynchronously downloads the entire object storage block. Prefetching can often improve the performance of random reads. However, if the file access pattern in your scenario cannot effectively utilize the prefetched data (for example, reading large files randomly and sparsely), prefetching may lead to noticeable read amplification. In such cases, you can set it to 0 to disable the prefetch feature.

  JuiceFS is equipped with another internal similar mechanism called "readahead": when doing sequential reads, client will download nearby blocks in advance, improving sequential performance. The concurrency of readahead is affected by the size of ["Read/Write Buffer"](#buffer-size), the larger the read-write buffer, the higher the concurrency.

* `--cache-dir`

  Cache directory, default to `/var/jfsCache` or `$HOME/.juicefs/cache`. Please read ["Cache directory"](#cache-dir) for more information.

  If you are in urgent need to free up disk space, you can manually delete data under the cache directory, which is `<cache-dir>/<UUID>/raw/`.

* `--cache-size` and `--free-space-ratio`

  Cache size (in MiB, default to 102400) and minimum ratio of free size (default 0.1). Both parameters is able to control cache size, if any of the two criteria is met, JuiceFS Client will expire cache usage using an algorithm similar to LRU, i.e. remove older and less used blocks.

  Actual cache size may exceed configured value, because it is difficult to calculate the exact disk space taken by cache. Currently, JuiceFS takes the sum of all cached objects sizes using a minimum 4 KiB size, which is often different from the result of `du`.

* `--cache-partial-only`

  Only cache small files and random small reads, do not cache whole block. This applies to conditions where object storage throughput is higher than the local cache device. Default value is false.

  There are two main read patterns, sequential read and random read. Sequential read usually demands higher throughput while random reads needs lower latency. When local disk throughput is lower than object storage, consider enable `--cache-partial-only` so that sequential reads do not cache the whole block, but rather, only small reads (like footer of Parquet / ORC file) are cached. This allows JuiceFS to take advantage of low latency provided by local disk, and high throughput provided by object storage, at the same time.

### Client write data cache {#client-write-cache}

Enabling client write cache can improve performance when writing large amount of small files. Read this section to learn about client write cache.

Client write cache is disabled by default, data writes will be held in the [read/write buffer](#buffer-size) (in memory), and is uploaded to object storage when a chunk is filled full, or forced by application with `close()`/`fsync()` calls. To ensure data security, client will not commit file writes to the Metadata Service until data is uploaded to object storage.

You can see how the default "upload first, then commit" write process will not perform well when writing large amount of small files. After the client write cache is enabled, the write process becomes "commit first, then upload asynchronously", file writes will not be blocked by data uploads, instead it will be written to the local cache directory and committed to the metadata service, and then returned immediately. The file data in the cache directory will be asynchronously uploaded to the object storage.

If you need to use JuiceFS as a temporary storage, which doesn't require persistence and distributed access, use [`--upload-delay`](../reference/command_reference.mdx#mount-data-cache-options) to delay data upload, this saves the upload process if files are deleted during the delay. Meanwhile, compared with a local disk, JuiceFS uploads files automatically when the cache directory is running out of space, which keeps the applications away from unexpected failures.

Add `--writeback` to the mount command to enable client write cache, but this mode comes with some risks and caveats:

* Disk reliability is crucial to data integrity, if write cache data suffers loss before upload is complete, file data is lost forever. Use with caution when data reliability is critical.
* Write cache data by default is stored in `/var/jfsCache/<UUID>/rawstaging/`, do not delete files under this directory or data will be lost.
* Write cache size is controlled by [`--free-space-ratio`](#client-read-cache). By default, if the write cache is not enabled, the JuiceFS client uses up to 90% of the disk space of the cache directory (the calculation rule is `(1 - <free-space-ratio>) * 100`). After the write cache is enabled, a certain percentage of disk space will be overused. The calculation rule is `(1 - (<free-space-ratio> / 2)) * 100`, that is, by default, up to 95% of the disk space of the cache directory will be used.
* Write cache and read cache share cache disk space, so they affect each other. For example, if the write cache takes up too much disk space, the size of the read cache will be limited, and vice versa.
* If local disk write speed is lower than object storage upload speed, enabling `--writeback` will only result in worse write performance.
* If the file system of the cache directory raises error, client will fallback and write synchronously to object storage, which is the same behavior as [Read Cache in Client](#client-read-cache).
* If object storage upload speed is too slow (low bandwidth), local write cache can take forever to upload, meanwhile reads from other nodes will result in timeout error (I/O error). See [Connection problems with object storage](../administration/troubleshooting.md#io-error-object-storage).

Improper usage of client write cache can easily cause problems, that's why only recommend to temporarily enable this when writing large number of small files (e.g. extracting a compressed file containing a large number of small files).

When `--writeback` is enabled, apart from checking `/var/jfsCache/<UUID>/rawstaging/` directly, you can also view upload progress using:

```shell
# Assuming mount point is /jfs
$ cd /jfs
$ cat .stats | grep "staging"
juicefs_staging_block_bytes 1621127168  # The size of the data blocks to be uploaded
juicefs_staging_block_delay_seconds 46116860185.95535
juicefs_staging_blocks 394  # The number of data blocks to be uploaded
```

### Cache directory {#cache-dir}

Depending on the operating system, the default cache path for JuiceFS is as follows:

- **Linux**: `/var/jfsCache`
- **macOS**: `$HOME/.juicefs/cache`
- **Windows**: `%USERPROFILE%\.juicefs\cache`

For Linux, note that the default cache path requires administrator privileges and that normal users need to be granted to use `sudo` to set it up, e.g.:

```shell
sudo juicefs mount redis://127.0.0.1:6379/1 /mnt/myjfs
```

Alternatively, the `--cache-dir` option can be set to any storage path accessible to the current system when mounting the filesystem. For normal users who do not have permission to access the `/var` directory, the cache can be set in the user's `HOME` directory, e.g.:

```shell
juicefs mount --cache-dir ~/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

:::tip
It is recommended to use a high performance dedicated disk as the cache directory, avoid using the system disk, and do not share it with other applications. Sharing not only affects the performance of each other, but may also cause errors in other applications (such as insufficient disk space left). If it is unavoidable to share, you must estimate the disk capacity required by other applications, limit the size of the cache space (see below for details), and avoid JuiceFS's read cache or write cache takes up too much space.
:::

#### RAM disk

If a higher file read performance is required, you can set up the cache into the RAM disk. For Linux systems, check the `tmpfs` file system with the `df` command.

```shell
$ df -Th | grep tmpfs
tmpfs          tmpfs     362M  2.0M  360M    1% /run
tmpfs          tmpfs     3.8G     0  3.8G    0% /dev/shm
tmpfs          tmpfs     5.0M  4.0K  5.0M    1% /run/lock
```

Where `/dev/shm` is a typical memory disk that can be used as a cache path for JuiceFS, it is typically half the capacity of memory and can be manually adjusted as needed, for example, to 32GB.

```shell
sudo mount -o size=32000M -o remount /dev/shm
```

Then, using that path as a cache, mount the filesystem.

```shell
juicefs mount --cache-dir /dev/shm/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

Another way to use memory for cache is set `--cache-dir` option to `memory`, this puts cache directly in client process memory, which is simpler compared to `/dev/shm`, but obviously cache will be lost after process restart, use this for tests and evaluations.

#### Shared folders

Shared directories created via SMB or NFS can also be used as cache for JuiceFS. For the case where multiple devices on the LAN mount the same JuiceFS file system, using shared directories on the LAN as cache paths can effectively relieve the bandwidth pressure of duplicate caches for multiple devices.

But special attention needs to be paid. Usually, when the file system where the cache directory is located fails to work properly, the JuiceFS client can immediately return an error and downgrade to direct access to object storage. If the abnormality of the shared directory shows that the read operation is stuck (such as some network file system in kernel mode), then JuiceFS will also be stuck together. This requires you to tune the underlying file system behavior of the shared directory to achieve rapid failure.

Using SMB/CIFS as an example, mount the shared directories on the LAN by using the tools provided by the `cifs-utils` package.

```shell
sudo mount.cifs //192.168.1.18/public /mnt/jfscache
```

Using shared directories as JuiceFS caches:

```shell
sudo juicefs mount --cache-dir /mnt/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

#### Multiple cache directories

JuiceFS supports setting multiple cache directories at the same time, thus avoiding the problem of insufficient cache space by separating multiple paths using `:` (Linux, macOS) or `;` (Windows), e.g.:

```shell
sudo juicefs mount --cache-dir ~/jfscache:/mnt/jfscache:/dev/shm/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

When multiple cache directories are set, or multiple devices are used as cache disks, the `--cache-size` option represents the total size of data in all cache directories. The client will use the hash strategy to evenly write data to each cache path, and cannot perform special tuning for multiple cache disks with different capacities or performances.

Therefore, it is recommended that the available space of different cache directories/cache disks be consistent, otherwise it may cause the situation that the space of a certain cache directory cannot be fully utilized. For example, `--cache-dir` is `/data1:/data2`, where `/data1` has a free space of 1GiB, `/data2` has a free space of 2GiB, `--cache-size` is 3GiB, `--free-space-ratio` is 0.1. Because the cache write strategy is to write evenly, the maximum space allocated to each cache directory is `3GiB / 2 = 1.5GiB`, resulting in a maximum of 1.5GiB cache space in the `/data2` directory instead of `2GiB * 0.9 = 1.8GiB`.


================================================
FILE: docs/en/guide/clone.md
================================================
---
title: Clone Files or Directories
sidebar_position: 6
description: Learn how to use the juicefs clone command to efficiently clone files or directories by creating a metadata-only copy. 
---

Cloning specific data does not involve copying the actual object storage data but only copies metadata. Therefore, cloning is very fast regardless of the size of the file or directory. For JuiceFS, this command is a better alternative to `cp`. Moreover, for Linux clients using kernels with [`copy_file_range`](https://man7.org/linux/man-pages/man2/copy_file_range.2.html) support, using `cp` effectively performs the same metadata copy and is very fast.

![clone](../images/juicefs-clone.svg)

The clone result is a metadata copy only, where all the files still reference the same underlying object storage blocks. That is why a clone behaves the same in every way as its originals. Upon any file data modification, new data is written to new object storage blocks, with its associating metadata redirected to the new blocks as well (ROW, Redirect-on-Write), while the unchanged part of the files remains the same, still referencing the original blocks. And just like native JuiceFS files, random write on a clone can result in file fragmentations, you can merge them with `juicefs compact` to process the slice of the file,in order to improve read performance.

Note that system tools like disk-free or disk-usage (`df`, `du`) show the space used by the cloned data, but the underlying object storage space remains unchanged since the blocks are not duplicated. Cloning replicates metadata, so it will occupy the same metadata engine storage space as the original.

**Cloning impacts file system storage space, inodes, and metadata engine storage space.** Be cautious when cloning large directories.

```shell
juicefs clone SRC DST

# Clone a file
juicefs clone /mnt/jfs/file1 /mnt/jfs/file2

# Clone a directory
juicefs clone /mnt/jfs/dir1 /mnt/jfs/dir2
```

## Consistency {#consistency}

In terms of transaction consistency, cloning behaves as follows:

- The destination file is not visible until the `clone` command completes.
- For files: The `clone` command ensures atomicity, meaning that the cloned file will always be in a correct and consistent state.
- For directories: The `clone` command does not guarantee atomicity for directories. In other words, if the source directory changes during the cloning process, the target directory may be different from the source directory.
- Only one `clone` operation can succeed from the same source at the same time. Any failed clones will clean up the temporarily created directory tree.

The cloning operation is performed by the mount process. It will be interrupted, if the `clone` command is terminated. If a cloning operation fails or is interrupted, the `mount` process will clean up any created inodes. If this cleanup fails, it may lead to metadata leaks and potential object storage leaks, because the dangling tree continues to reference the underlying data blocks. They could be cleaned up by the [`juicefs gc --delete`](../reference/command_reference.mdx#gc) command.


================================================
FILE: docs/en/guide/dir-stats.md
================================================
---
title: Directory Statistics
sidebar_position: 5
description: Learn how to enable, check, and troubleshoot directory statistics in JuiceFS version 1.1.0 and later. 
---

From JuiceFS v1.1.0, the directory statistics feature is enabled by default when formatting a new volume. For existing volumes, this feature is disabled by default and must be enabled manually. The directory statistics feature accelerates the `quota`, `info` and `summary` subcommands, but it comes with a minor performance cost.

:::tip
Directory statistics rely on the mount process. Ensure all writable mount processes are upgraded to v1.1.0 before enabling this feature.
:::

## Enable directory statistics {#enable-directory-stats}

Run `juicefs config $URL --dir-stats` to enable directory statistics. After that, run `juicefs config $URL` to confirm the change:

```shell
$ juicefs config redis://localhost
2023/05/31 15:56:39.721188 juicefs[30626] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/05/31 15:56:39.723284 juicefs[30626] <INFO>: Ping redis latency: 159.226µs [redis.go:3566]
{
  "Name": "myjfs",
  "UUID": "82db28de-bf5f-43bf-bba3-eb3535a86c48",
  "Storage": "file",
  "Bucket": "/root/.juicefs/local/",
  "BlockSize": 4096,
  "Compression": "none",
  "EncryptAlgo": "aes256gcm-rsa",
  "TrashDays": 1,
  "MetaVersion": 1,
  "DirStats": true
}
```

If `"DirStats": true` appears, the directory statistics feature is successfully enabled. To disable it:

```shell
$ juicefs config redis://localhost --dir-stats=false
2023/05/31 15:59:39.046134 juicefs[30752] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/05/31 15:59:39.048301 juicefs[30752] <INFO>: Ping redis latency: 171.308µs [redis.go:3566]
 dir-stats: true -> false
```

:::tip
The [directory quota](./quota.md#directory-quota) functionality depends on directory statistics, so setting a quota automatically enables directory statistics. To disable directory statistics for such volumes, you need to remove all quotas.
:::

## Check directory statistics {#check-directory-stats}

Use `juicefs info $PATH` to check statistics for a single directory:

```shell
$ juicefs info /mnt/jfs/pjdfstest/
/mnt/jfs/pjdfstest/ :
  inode: 2
  files: 10
   dirs: 4
 length: 43.74 KiB (44794 Bytes)
   size: 92.00 KiB (94208 Bytes)
   path: /pjdfstest
```

Run `juicefs info -r $PATH` to recursively sum up:

```shell
/mnt/jfs/pjdfstest/: 278                       921.0/s
/mnt/jfs/pjdfstest/: 1.6 MiB (1642496 Bytes)   5.2 MiB/s
/mnt/jfs/pjdfstest/ :
  inode: 2
  files: 278
   dirs: 37
 length: 592.42 KiB (606638 Bytes)
   size: 1.57 MiB (1642496 Bytes)
   path: /pjdfstest
```

You can also use `juicefs summary $PATH` to list all directory statistics:

```shell
$ ./juicefs summary /mnt/jfs/pjdfstest/
/mnt/jfs/pjdfstest/: 315                       1044.4/s
/mnt/jfs/pjdfstest/: 1.6 MiB (1642496 Bytes)   5.2 MiB/s
+------------------+---------+------+-------+
|       PATH       |   SIZE  | DIRS | FILES |
+------------------+---------+------+-------+
| /                | 1.6 MiB |   37 |   278 |
| tests/           | 1.1 MiB |   18 |   240 |
| tests/open/      | 112 KiB |    1 |    26 |
| tests/...        | 328 KiB |    7 |    71 |
| .git/            | 432 KiB |   17 |    26 |
| .git/objects/    | 252 KiB |    3 |     2 |
| ...              |  12 KiB |    0 |     3 |
+------------------+---------+------+-------+
```

:::note
Directory statistics only track usage for individual directories. To get a recursive sum, use `juicefs info -r`. This could be a costly operation for large directories. If you need to frequently get the total statistics for particular directories, consider [setting an empty quota](./quota.md#limit-capacity-and-inodes-of-directory) for such directories to achieve recursive statistics.

Unlike the Community Edition, JuiceFS Enterprise Edition provides a [recursive sum](/docs/cloud/guide/quota#file-directory-size) in directory statistics. You can directly view the total usage by running `ls -lh`.
:::

## Troubleshoot {#troubleshooting}

Directory statistics are calculated asynchronously and can potentially produce inaccurate results when clients run into problems. `juicefs info`, `juicefs summary`, and `juicefs quota` all provide a `--strict` option to run in strict mode. This bypasses directory statistics, unlike the default fast mode.

When strict mode and fast mode produce different results, use `juicefs fsck` to diagnose:

```shell
$ juicefs info -r /jfs/d
/jfs/d: 1                             3.3/s
/jfs/d: 448.0 MiB (469766144 Bytes)   1.4 GiB/s
/jfs/d :
  inode: 2
  files: 1
   dirs: 1
 length: 448.00 MiB (469762048 Bytes)
   size: 448.00 MiB (469766144 Bytes)
   path: /d

$ juicefs info -r --strict /jfs/d
/jfs/d: 1                            3.3/s
/jfs/d: 1.0 GiB (1073745920 Bytes)   3.3 GiB/s
/jfs/d :
  inode: 2
  files: 1
   dirs: 1
 length: 1.00 GiB (1073741824 Bytes)
   size: 1.00 GiB (1073745920 Bytes)
   path: /d

# Check directory statistics for /d
$ juicefs fsck sqlite3://test.db --path /d --sync-dir-stat
2023/05/31 17:14:34.700239 juicefs[32667] <INFO>: Meta address: sqlite3://test.db [interface.go:494]
[xorm] [info]  2023/05/31 17:14:34.700291 PING DATABASE sqlite3
2023/05/31 17:14:34.701553 juicefs[32667] <WARNING>: usage stat of /d should be &{1073741824 1073741824 1}, but got &{469762048 469762048 1} [base.go:2010]
2023/05/31 17:14:34.701577 juicefs[32667] <WARNING>: Stat of path /d (inode 2) should be synced, please re-run with '--path /d --repair --sync-dir-stat' to fix it [base.go:2025]
2023/05/31 17:14:34.701615 juicefs[32667] <FATAL>: some errors occurred, please check the log of fsck [main.go:31]

# Fix directory statistics for /d
$ juicefs fsck -v sqlite3://test.db --path /d --sync-dir-stat --repair
2023/05/31 17:14:43.445153 juicefs[32721] <DEBUG>: maxprocs: Leaving GOMAXPROCS=8: CPU quota undefined [maxprocs.go:47]
2023/05/31 17:14:43.445289 juicefs[32721] <INFO>: Meta address: sqlite3://test.db [interface.go:494]
[xorm] [info]  2023/05/31 17:14:43.445350 PING DATABASE sqlite3
2023/05/31 17:14:43.462374 juicefs[32721] <DEBUG>: Stat of path /d (inode 2) is successfully synced [base.go:2018]

# Verify that statistics have been fixed
$ juicefs info -r /jfs/d
/jfs/d: 1                            3.3/s
/jfs/d: 1.0 GiB (1073745920 Bytes)   3.3 GiB/s
/jfs/d :
  inode: 2
  files: 1
   dirs: 1
 length: 1.00 GiB (1073741824 Bytes)
   size: 1.00 GiB (1073745920 Bytes)
   path: /d
```


================================================
FILE: docs/en/guide/gateway.md
================================================
---
title: JuiceFS S3 Gateway
sidebar_position: 5
description: JuiceFS S3 Gateway allows the JuiceFS file system to be accessed externally using the S3 protocol. This enables applications to access files stored on JuiceFS through Amazon S3 SDKs.
---

JuiceFS S3 Gateway is one of the various access methods supported by JuiceFS. It allows the JuiceFS file system to be accessed externally using the S3 protocol. This enables applications to access files stored on JuiceFS using Amazon S3 SDKs.

## Architecture and principles

In JuiceFS, [files are stored as objects and distributed in chunks within the underlying object storage](../introduction/architecture.md#how-juicefs-store-files). JuiceFS provides multiple access methods, including the FUSE POSIX, WebDAV, S3 Gateway, and CSI Driver. Among these options, S3 Gateway is particularly popular. Below is the S3 Gateway architecture:

![JuiceFS S3 Gateway architecture](../images/juicefs-s3-gateway-arch.png)

JuiceFS S3 Gateway implements its functionality through [MinIO S3 Gateway](https://github.com/minio/minio/tree/ea1803417f80a743fc6c7bb261d864c38628cf8d/docs/gateway). Leveraging MinIO's [`object` interface](https://github.com/minio/minio/blob/d46386246fb6db5f823df54d932b6f7274d46059/cmd/object-api-interface.go#L88), we integrate the JuiceFS file system as the backend storage for MinIO servers. This provides a user experience close to that of native MinIO usage while inheriting many advanced features of MinIO. In this architecture, JuiceFS acts as a local disk for the MinIO instance, and the principle is similar to the `minio server /data1` command.

Common application scenarios for JuiceFS S3 Gateway include:

- **Exposing the S3 API for JuiceFS:** Applications can access files stored on JuiceFS using S3 SDKs.
- **Using S3 clients:** Using tools like S3cmd, AWS CLI, and MinIO clients to easily access and manage files stored on JuiceFS.
- **Managing files in JuiceFS:** JuiceFS S3 Gateway provides a web-based file manager to manage files in JuiceFS directly from a browser.
- **Cluster replication:** In scenarios requiring cross-cluster data replication, JuiceFS S3 Gateway serves as a unified data export for clusters. This avoids cross-region metadata access and enhances data transfer performance. For details, see [Sync across regions using JuiceFS S3 Gateway](../guide/sync.md#sync-across-region).

## Quick start

JuiceFS S3 Gateway enables access to an existing JuiceFS volume. If you do not have one, follow the steps in this [guide](../getting-started/standalone.md) to create a JuiceFS file system.

The gateway is built on MinIO, so you must set the `MINIO_ROOT_USER` and `MINIO_ROOT_PASSWORD` environment variables. They serve as the access key and secret key for authentication when you access the S3 API. These credentials are administrator credentials with the highest privileges.

```shell
export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=12345678

# Use "set" on Windows
set MINIO_ROOT_USER=admin
```

Note that `MINIO_ROOT_USER` must be at least 3 characters long, and `MINIO_ROOT_PASSWORD` must be at least 8 characters long. If these requirements are not met, the gateway service will display an error: `MINIO_ROOT_USER should be specified as an environment variable with at least 3 characters`.

Start the gateway:

```shell
# The first argument is the metadata engine URL; the second argument is the address/port for JuiceFS S3 Gateway to listen on.
juicefs gateway redis://localhost:6379/1 localhost:9000

# Since v1.2, JuiceFS supports running services in the background, using --background or -d.
# When running in background, use --log to specify the log path.
juicefs gateway redis://localhost:6379 localhost:9000 -d --log=/var/log/juicefs-s3-gateway.log
```

By default, [multi-bucket support](#multi-bucket-support) is not enabled. You can enable it by adding the `--multi-buckets` option. Additionally, you can add [other options](../reference/command_reference.mdx#gateway) to `gateway` subcommands as needed. For example, you can set the default local cache to 20 GiB.

```shell
juicefs gateway --cache-size 20480 redis://localhost:6379/1 localhost:9000
```

This example assumes that the JuiceFS file system uses a local Redis database. When JuiceFS S3 Gateway is enabled, you can access the gateway's management interface at `http://localhost:9000` on the **current host**.

![S3-gateway-file-manager](../images/s3-gateway-file-manager.jpg)

To allow access to JuiceFS S3 Gateway from other hosts on the local network or the internet, adjust the listen address. For example:

```shell
juicefs gateway redis://localhost:6379/1 0.0.0.0:9000
```

This configuration makes JuiceFS S3 Gateway accept requests from all networks by default. Different S3 clients can access JuiceFS S3 Gateway using different addresses. For example:

- Third-party clients on the same host as JuiceFS S3 Gateway can use `http://127.0.0.1:9000` or `http://localhost:9000` for access.
- Third-party clients on the same local network as the JuiceFS S3 Gateway host can use `http://192.168.1.8:9000` for access (assuming the JuiceFS S3 Gateway host's internal IP address is `192.168.1.8`).
- Using `http://110.220.110.220:9000` to access JuiceFS S3 Gateway over the internet (assuming the JuiceFS S3 Gateway host's public IP address is `110.220.110.220`).

## Access JuiceFS S3 Gateway

Various S3 API-supported clients, desktop applications, and web applications can access JuiceFS S3 Gateway. Ensure you use the correct address and port for accessing JuiceFS S3 Gateway.

:::tip Note
The following examples assume accessing JuiceFS S3 Gateway running on the local host with third-party clients. Adjust JuiceFS S3 Gateway's address according to your specific scenario.
:::

### Use the AWS CLI

Download and install the AWS Command Line Interface (AWS CLI) from [https://aws.amazon.com/cli](https://aws.amazon.com/cli).

Configure it:

```bash
$ aws configure
AWS Access Key ID [None]: admin
AWS Secret Access Key [None]: 12345678
Default region name [None]:
Default output format [None]:
```

The program guides you interactively to add new configurations. Use the same values for `Access Key ID` as `MINIO_ROOT_USER` and `Secret Access Key` as `MINIO_ROOT_PASSWORD`. Leave the region name and output format blank.

Now you can use the `aws s3` command to access JuiceFS storage, for example:

```bash
# List buckets
$ aws --endpoint-url http://localhost:9000 s3 ls

# List objects in bucket
$ aws --endpoint-url http://localhost:9000 s3 ls s3://<bucket>
```

### Use the MinIO Client

To avoid compatibility issues, we recommend using the `RELEASE.2021-04-22T17-40-00Z` version of the MinIO Client (`mc`). You can find historical versions with different architectures of `mc` at this [address](https://dl.min.io/client/mc/release). For example, for the amd64 architecture, you can download the `RELEASE.2021-04-22T17-40-00Z` version of `mc` from this [link](https://dl.min.io/client/mc/release/linux-amd64/archive/mc.RELEASE.2021-04-22T17-40-00Z).

After installing `mc`, add a new alias:

```bash
mc alias set juicefs http://localhost:9000 admin 12345678
```

Then, you can freely copy, move, add, and delete files and folders between the local disk, JuiceFS storage, and other cloud storage services using the `mc` client.

```shell
$ mc ls juicefs/jfs
[2021-10-20 11:59:00 CST] 130KiB avatar-2191932_1920.png
[2021-10-20 11:59:00 CST] 4.9KiB box-1297327.svg
[2021-10-20 11:59:00 CST]  21KiB cloud-4273197.svg
[2021-10-20 11:59:05 CST]  17KiB hero.svg
[2021-10-20 11:59:06 CST] 1.7MiB hugo-rocha-qFpnvZ_j9HU-unsplash.jpg
[2021-10-20 11:59:06 CST]  16KiB man-1352025.svg
[2021-10-20 11:59:06 CST] 1.3MiB man-1459246.ai
[2021-10-20 11:59:08 CST]  19KiB sign-up-accent-left.07ab168.svg
[2021-10-20 11:59:10 CST]  11MiB work-4997565.svg
```

## Common features

### Multi-bucket support

By default, JuiceFS S3 Gateway only allows one bucket. The bucket name is the file system name. If you need multiple buckets, you can add `--multi-buckets` at startup to enable multi-bucket support. This parameter exports each subdirectory under the top-level directory of the JuiceFS file system as a separate bucket. Creating a bucket means creating a subdirectory with the same name at the top level of the file system.

```shell
juicefs gateway redis://localhost:6379/1 localhost:9000 --multi-buckets
```

### Retain ETags

By default, JuiceFS S3 Gateway does not save or return object ETag information. You can enable this with `--keep-etag`.

```shell
juicefs gateway myjfs localhost:9000 --keep-etag
```

Then if you upload the file through gateway into JuiceFS, you can get this etag by s3API `head-object`:

```shell
aws s3api --endpoint=http://localhost:9000 head-object --bucket myjfs --key test123/test.etag
{
    "AcceptRanges": "bytes",
    "LastModified": "Wed, 23 Apr 2025 00:17:16 GMT",
    "ContentLength": 7,
    "ETag": "\"d2fde576f44a6601b73201234b491904\"",
    "ContentType": "application/octet-stream",
    "Metadata": {}
}
```

This etag is calculated using the MD5 algorithm, and it's also `setXattr` to file with key `s3-tag`, if you mount the JuiceFS with `--enable-xattr` then you can use `getfattr` to get this etag:

```shell
getfattr -n s3-etag test.etag
# file: test.etag
s3-etag="d2fde576f44a6601b73201234b491904"
```

### Enable object tags

Object tags are not supported by default, but you can use `--object-tag` to enable them.

### Enable object metadata <VersionAdd>1.3</VersionAdd>

Object metadata is not supported by default, but you can use `--object-meta` to enable it. Refer to the [documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingMetadata.html) for usage.

### Enable virtual host-style requests

By default, JuiceFS S3 Gateway supports path-style requests in the format of `http://mydomain.com/bucket/object`. The `MINIO_DOMAIN` environment variable is used to enable virtual host-style requests. If the request's `Host` header information matches `(.+).mydomain.com`, the matched pattern `$1` is used as the bucket, and the path is used as the object.

For example:

```shell
export MINIO_DOMAIN=mydomain.com
```

### Adjust the IAM refresh interval

The default refresh interval for Identity and Access Management (IAM) caching is 5 minutes. You can adjust this using `--refresh-iam-interval`. The value of this parameter is a time string with a unit, such as "300ms", "-1.5h", or "2h45m." Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", and "h".

For example, to set a refresh interval of 1 minute:

```sh
juicefs gateway xxxx xxxx    --refresh-iam-interval 1m
```

### Multiple gateway instances

The distributed nature of JuiceFS allows multiple JuiceFS S3 gateway instances to be started on different nodes simultaneously. This can improve the availability and performance of S3 Gateway instances. In this scenario, each gateway instance independently handles requests, but all access the same JuiceFS file system. It is important to note the following:

- Ensure that all instances are started with the same user at initialization; use the same UID and GID for all instances.
- The IAM refresh time between nodes can vary, but it must be ensured that this interval is not too short to prevent excessive load on JuiceFS.
- Each instance's listening address and port can be freely configured. If multiple instances are started on the same machine, ensure that there is no conflict in port numbers.

### Run as a daemon service

JuiceFS S3 Gateway can be configured as a systemd unit.

```shell
cat > /lib/systemd/system/juicefs-gateway.service<<EOF
[Unit]
Description=Juicefs S3 Gateway
Requires=network.target
After=multi-user.target
StartLimitIntervalSec=0

[Service]
Type=simple
User=root
Environment="MINIO_ROOT_USER=admin"
Environment="MINIO_ROOT_PASSWORD=12345678"
ExecStart=/usr/local/bin/juicefs gateway redis://localhost:6379 localhost:9000
Restart=on-failure
RestartSec=60

[Install]
WantedBy=multi-user.target
EOF
```

To enable the service at startup:

```shell
systemctl daemon-reload
systemctl enable juicefs-gateway --now
systemctl status juicefs-gateway
```

To inspect logs:

```bash
journalctl -xefu juicefs-gateway.service
```

### Deploy S3 Gateway in Kubernetes {#deploy-in-kubernetes}

Installation requires Helm 3.1.0 or above, refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install).

```shell
helm repo add juicefs https://juicedata.github.io/charts/
helm repo update
```

The Helm chart supports both the Community and Enterprise Editions of JuiceFS. You can specify the version to use by configuring different fields in the [values file](https://github.com/juicedata/charts/blob/main/charts/juicefs-s3-gateway/values.yaml).

```yaml title="values-mycluster.yaml"
secret:
  name: "myjfs"
  # If the token field is populated, the deployment will be treated as an Enterprise Edition.
  token: "xxx"
  accessKey: "xxx"
  secretKey: "xxx"
```

If you want to deploy Ingress, append the following content and write the corresponding Ingress configuration:

```yaml title="values-mycluster.yaml"
ingress:
  enabled: true
```

:::tip
Be sure to include the `values-mycluster.yaml` file into your Git project (or using other source code management systems), so that all changes on the values file can be traced and rolled back.
:::

Once the values file is ready, run the following command to deploy:

```shell
# Use this command for both initial deployment and subsequent updates.
helm upgrade --install -f values-mycluster.yaml s3-gateway juicefs/juicefs-s3-gateway
```

After installation, follow the output instructions to get the Kubernetes service address and verify if it is working.

```shell
$ kubectl -n kube-system get svc -l app.kubernetes.io/name=juicefs-s3-gateway
NAME                 TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)    AGE
juicefs-s3-gateway   ClusterIP   10.101.108.42   <none>        9000/TCP   142m
```

The deployment will launch a Deploy named `juicefs-s3-gateway`. Run this command to check the Pod status:

```sh
$ kubectl -n kube-system get po -l app.kubernetes.io/name=juicefs-s3-gateway
NAME                                  READY   STATUS    RESTARTS   AGE
juicefs-s3-gateway-5c69d574cc-t92b6   1/1     Running   0          136m
```

## Advanced features

The core feature of JuiceFS S3 Gateway is to provide the S3 API. Now, the support for the S3 protocol is comprehensive. Version 1.2 supports IAM and bucket event notifications.

These advanced features require the `RELEASE.2021-04-22T17-40-00Z` version of the `mc` client. For the usage of these advanced features, see the [MinIO documentation](https://github.com/minio/minio/tree/e0d3a8c1f4e52bb4a7d82f7f369b6796103740b3/docs) or the `mc` command-line help information.

If you are unsure about the available features or how to use a specific feature, you can append `-h` to a subcommand to view the help information.

### Identity and access control

#### Regular users

Before version 1.2, `juicefs gateway` only created a superuser when starting, and this superuser belonged only to that process. Even if multiple gateway processes shared the same file system, their users were isolated between processes. You could set different superusers for each gateway process, and they were independent and unaffected by each other.

Starting from version 1.2, `juicefs gateway` still requires setting a superuser at startup, and this superuser remains isolated per process. However, it allows adding new users using `mc admin user add`. Newly added users are shared across the same file system. You can manage new users using `mc admin user`. This supports adding, disabling, enabling, and deleting users, as well as viewing all users and displaying user information and policies.

```Shell
$ mc admin user -h
NAME:
  mc admin user - manage users

USAGE:
  mc admin user COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add      add a new user
  disable  disable user
  enable   enable user
  remove   remove user
  list     list all users
  info     display info of a user
  policy   export user policies in JSON format
  svcacct  manage service accounts
```

An example of adding a user:

```Shell
# Add a new user.
$ mc admin user add myjfs user1 admin123

# List current users.
$ mc admin user list myjfs
enabled    user1

# List current users in JSON format.
$ mc admin user list myjfs --json
{
 "status": "success",
 "accessKey": "user1",
 "userStatus": "enabled"
}
```

### Service accounts

Service accounts are used to create a copy of an existing user with the same permissions, allowing different applications to use separate access keys. The privileges for service accounts inherit from their parent users. They can be managed using the command:

```Shell
$ mc admin user svcacct -h
NAME:
  mc admin user svcacct - manage service accounts

USAGE:
  mc admin user svcacct COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add      add a new service account
  ls       List services accounts
  rm       Remove a service account
  info     Get a service account info
  set      edit an existing service account
  enable   Enable a service account
  disable  Disable a services account
```

:::tip
Service accounts inherit privileges from their parent users and cannot be directly attached with permission policies.
:::

For example, consider a user named `user1`. You can create a service account named `svcacct1` for it using the following command:

```Shell
mc admin user svcacct add myjfs user1 --access-key svcacct1 --secret-key 123456abc
```

If the parent user, `user1`, has read-only permissions, then so will `svcacct1`. To grant different permissions to `svcacct1`, you would need to adjust the privileges of the parent user.

#### AssumeRole security token service

The S3 Gateway Security Token Service (STS) is a service that allows clients to request temporary credentials to access MinIO resources. The working principle of temporary credentials is almost the same as default administrator credentials but with some differences:

- **Temporary credentials are short-lived.** They can be configured to last from minutes to hours. After expiration, the gateway no longer recognizes them and does not allow any form of API request access.
- **Temporary credentials do not need to be stored with the application. They are dynamically generated and provided to the application when requested.** When temporary credentials expire, applications can request new credentials.

The `AssumeRole` operation returns a set of temporary security credentials. You can use them to access gateway resources. `AssumeRole` requires authorization credentials for an existing gateway user and returns temporary security credentials, including an access key, secret key, and security token. Applications can use these temporary security credentials to sign requests for gateway API operations. The policies applied to these temporary credentials inherit from gateway user credentials.

By default, `AssumeRole` creates temporary security credentials with a validity period of one hour. However, you can specify the duration of the credentials using the optional parameter `DurationSeconds`, which can range from 900 (15 minutes) to 604,800 (7 days).

##### API request parameters

- `Version`

    Indicates the STS API version information. The only supported value is '2011-06-15', borrowed from the AWS STS API documentation for compatibility.

    | Parameter  | Value  |
    | ---------- | ------ |
    | Type    | String |
    | Require | Yes    |

- `AUTHPARAMS`

    Indicates the STS API authorization information. If you are familiar with AWS Signature V4 authorization headers, this STS API supports the signature V4 authorization as described [here](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html).

- `DurationSeconds`

   Duration in seconds. This value can range from 900 seconds (15 minutes) to 7 days. If the value is higher than this setting, the operation fails. By default, this value is set to 3,600 seconds.

    | Parameter      | Value               |
    |-------------|---------------------|
    | *Type*      | Integer             |
    | Valid range | From 900 to 604,800 |
    | Required    | No                  |

- Policy

    A JSON-format IAM policy that you want to use as an inline session policy. This parameter is optional. Passing a policy to this operation returns new temporary credentials. The permissions of the generated session are the intersection of preset policy names and the policy set here. You cannot use this policy to grant more permissions than allowed by the assumed preset policy names.

    | Parameter      | Value             |
    |-------------|-------------------|
    | Type        | String            |
    | Valid range | From 1 to 2,048 |
    | Required    | No                |

##### Response elements

The XML response of this API is similar to [AWS STS `AssumeRole`](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html#API_AssumeRole_ResponseElements).

##### Errors

The XML error response of this API is similar to [AWS STS `AssumeRole`](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html#API_AssumeRole_Errors).

##### A `POST` request example

```
http://minio:9000/?Action=AssumeRole&DurationSeconds=3600&Version=2011-06-15&Policy={"Version":"2012-10-17","Statement":[{"Sid":"Stmt1","Effect":"Allow","Action":"s3:*","Resource":"arn:aws:s3:::*"}]}&AUTHPARAMS
```

##### A response example

```
<?xml version="1.0" encoding="UTF-8"?>
<AssumeRoleResponse xmlns="https://sts.amazonaws.com/doc/2011-06-15/">
  <AssumeRoleResult>
    <AssumedRoleUser>
      <Arn/>
      <AssumeRoleId/>
    </AssumedRoleUser>
    <Credentials>
      <AccessKeyId>Y4RJU1RNFGK48LGO9I2S</AccessKeyId>
      <SecretAccessKey>sYLRKS1Z7hSjluf6gEbb9066hnx315wHTiACPAjg</SecretAccessKey>
      <Expiration>2019-08-08T20:26:12Z</Expiration>
      <SessionToken>eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJZNFJKVTFSTkZHSzQ4TEdPOUkyUyIsImF1ZCI6IlBvRWdYUDZ1Vk80NUlzRU5SbmdEWGo1QXU1WWEiLCJhenAiOiJQb0VnWFA2dVZPNDVJc0VOUm5nRFhqNUF1NVlhIiwiZXhwIjoxNTQxODExMDcxLCJpYXQiOjE1NDE4MDc0NzEsImlzcyI6Imh0dHBzOi8vbG9jYWxob3N0Ojk0NDMvb2F1dGgyL3Rva2VuIiwianRpIjoiYTBiMjc2MjktZWUxYS00M2JmLTg3MzktZjMzNzRhNGNkYmMwIn0.ewHqKVFTaP-j_kgZrcOEKroNUjk10GEp8bqQjxBbYVovV0nHO985VnRESFbcT6XMDDKHZiWqN2vi_ETX_u3Q-w</SessionToken>
    </Credentials>
  </AssumeRoleResult>
  <ResponseMetadata>
    <RequestId>c6104cbe-af31-11e0-8154-cbc7ccf896c7</RequestId>
  </ResponseMetadata>
</AssumeRoleResponse>
```

##### Use the AWS CLI with the AssumeRole API

1. Start the gateway and create a user named `foobar`.

2. Configure the AWS CLI:

    ```
    [foobar]
    region = us-east-1
    aws_access_key_id = foobar
    aws_secret_access_key = foo12345
    ```

3. Use the AWS CLI to request the `AssumeRole` API.

    :::note Note
    In the command below, `--role-arn` and `--role-session-name` have no significance for the gateway. You can set them to any value that meets the command line requirements.
    :::

    ```sh
    $ aws --profile foobar --endpoint-url http://localhost:9000 sts assume-role --policy '{"Version":"2012-10-17","Statement":[{"Sid":"Stmt1","Effect":"Allow","Action":"s3:*","Resource":"arn:aws:s3:::*"}]}' --role-arn arn:xxx:xxx:xxx:xxxx --role-session-name anything
    {
        "AssumedRoleUser": {
            "Arn": ""
        },
        "Credentials": {
            "SecretAccessKey": "xbnWUoNKgFxi+uv3RI9UgqP3tULQMdI+Hj+4psd4",
            "SessionToken": "eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJLOURUSU1VVlpYRVhKTDNBVFVPWSIsImV4cCI6MzYwMDAwMDAwMDAwMCwicG9saWN5IjoidGVzdCJ9.PetK5wWUcnCJkMYv6TEs7HqlA4x_vViykQ8b2T_6hapFGJTO34sfTwqBnHF6lAiWxRoZXco11B0R7y58WAsrQw",
            "Expiration": "2019-02-20T19:56:59-08:00",
            "AccessKeyId": "K9DTIMUVZXEXJL3ATUOY"
        }
    }
    ```

##### Access the AssumeRole API in Go applications

See the [MinIO official example program](https://github.com/minio/minio/blob/master/docs/sts/assume-role.go).

:::note
Superusers defined by environment variables cannot use AssumeRole APIs; only users added by `mc admin user add` can use AssumeRole APIs.
:::

#### Permission management

By default, newly created users have no permissions and need to be granted permissions using `mc admin policy` before they can be used. This command supports adding, deleting, updating, and listing policies, as well as adding, deleting, and updating permissions for users.

```Shell
$ mc admin policy -h
NAME:
  mc admin policy - manage policies defined in the MinIO server

USAGE:
  mc admin policy COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add     add new policy
  remove  remove policy
  list    list all policies
  info    show info on a policy
  set     set IAM policy on a user or group
  unset   unset an IAM policy for a user or group
  update  Attach new IAM policy to a user or group
```

The gateway includes the following common policies:

- `readonly`: Read-only users.
- `readwrite`: Read-write users.
- `writeonly`: Write-only users.
- `consoleAdmin`: Read-write-admin users, where "admin" means the ability to use management APIs such as creating users.

For example, to set a user as a read-only user:

```Shell
# Set user1 as a read-only user.
$ mc admin policy set myjfs readonly user=user1

# Check user policy.
$ mc admin user list myjfs
enabled    user1                 readonly
```

For custom policies, use `mc admin policy add`:

```Shell
$ mc admin policy add -h
NAME:
  mc admin policy add - add new policy

USAGE:
  mc admin policy add TARGET POLICYNAME POLICYFILE

POLICYNAME:
  Name of the canned policy on the MinIO server.

POLICYFILE:
  Name of the policy file associated with the policy name.

EXAMPLES:
  1. Add a new canned policy 'writeonly'.
     $ mc admin policy add myjfs writeonly /tmp/writeonly.json
```

The policy file to be added here must be in JSON format with [IAM-compatible](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies.html) syntax, limited to 2,048 characters. This syntax allows for more fine-grained access control. If you are unfamiliar with this, you can first use the following command to see the simple policies and then modify them accordingly.

```Shell
$ mc admin policy info myjfs readonly
{
 "Version": "2012-10-17",
 "Statement": [
  {
   "Effect": "Allow",
   "Action": [
    "s3:GetBucketLocation",
    "s3:GetObject"
   ],
   "Resource": [
    "arn:aws:s3:::*"
   ]
  }
 ]
}
```

#### User group management

JuiceFS S3 Gateway supports creating user groups, similar to Linux user groups, and uses `mc admin group` for management. You can set one or more users to a group and grant permissions uniformly to the group. This usage is similar to user management.

```Shell
$ mc admin  group -h
NAME:
  mc admin group - manage groups

USAGE:
  mc admin group COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add      add users to a new or existing group
  remove   remove group or members from a group
  info     display group info
  list     display list of groups
  enable   enable a group
  disable  disable a group
```

#### Anonymous access management

In addition to user-specific permissions, anonymous access management is also possible. This allows specific objects or buckets to be accessible to anyone. You can use the `mc policy` command to manage this functionality.

```Shell
Name:
  mc policy - manage anonymous access to buckets and objects

USAGE:
  mc policy [FLAGS] set PERMISSION TARGET
  mc policy [FLAGS] set-json FILE TARGET
  mc policy [FLAGS] get TARGET
  mc policy [FLAGS] get-json TARGET
  mc policy [FLAGS] list TARGET

PERMISSION:
  Allowed policies are: [none, download, upload, public].

FILE:
  A valid S3 policy JSON filepath.

EXAMPLES:
  1. Set bucket to "download" on Amazon S3 cloud storage.
     $ mc policy set download s3/burningman2011

  2. Set bucket to "public" on Amazon S3 cloud storage.
     $ mc policy set public s3/shared

  3. Set bucket to "upload" on Amazon S3 cloud storage.
     $ mc policy set upload s3/incoming

  4. Set policy to "public" for bucket with prefix on Amazon S3 cloud storage.
     $ mc policy set public s3/public-commons/images

  5. Set a custom prefix based bucket policy on Amazon S3 cloud storage using a JSON file.
     $ mc policy set-json /path/to/policy.json s3/public-commons/images

  6. Get bucket permissions.
     $ mc policy get s3/shared

  7. Get bucket permissions in JSON format.
     $ mc policy get-json s3/shared

  8. List policies set to a specified bucket.
     $ mc policy list s3/shared

  9. List public object URLs recursively.
     $ mc policy --recursive links s3/shared/
```

The gateway has built-in support for four types of anonymous permissions by default:

- `none`: Disallows anonymous access (typically used to clear existing permissions).
- `download`: Allows anyone to read.
- `upload`: Allows anyone to write.
- `public`: Allows anyone to read and write.

The following example shows how to set an object to allow anonymous downloads:

```
# Set testbucket1/afile for anonymous access.
mc policy set download useradmin/testbucket1/afile

# View specific permissions.
mc policy get-json useradmin/testbucket1/afile

$ mc policy --recursive links useradmin/testbucket1/
http://127.0.0.1:9001/testbucket1/afile

# Directly download the object.
wget http://127.0.0.1:9001/testbucket1/afile

# Clear download permission for afile.
mc policy set none  useradmin/testbucket1/afile
```

#### Configuration effective time

All management API updates for JuiceFS S3 Gateway take effect immediately and are persisted to the JuiceFS file system. Clients that accept these API requests also immediately reflect these changes.

However, in a multi-server gateway setup, the situation is slightly different. This is because when the gateway handles request authentication, it uses in-memory cached information as the validation baseline. Otherwise, reading configuration file content for every request would pose unacceptable performance issues. However, caching also introduces potential inconsistencies between cached data and the configuration file.

Currently, JuiceFS S3 Gateway's cache refresh strategy involves forcibly updating the in-memory cache every 5 minutes (certain operations also trigger cache update operations). This ensures that configuration changes take effect within a maximum of 5 minutes in a multi-server setup. You can adjust this time by using the `--refresh-iam-interval` parameter. If immediate effect on a specific gateway is required, you can manually restart it.

### Bucket event notifications

You can use bucket event notifications to monitor events happening on objects within a storage bucket and trigger certain actions in response.

Currently supported object event types include:

- `s3:ObjectCreated:Put`
- `s3:ObjectCreated:CompleteMultipartUpload`
- `s3:ObjectAccessed:Head`
- `s3:ObjectCreated:Post`
- `s3:ObjectRemoved:Delete`
- `s3:ObjectCreated:Copy`
- `s3:ObjectAccessed:Get`

Supported global events include:

- `s3:BucketCreated`
- `s3:BucketRemoved`

You can use the `mc` client tool with the event subcommand to set up and monitor event notifications. Notifications sent by MinIO for publishing events are in JSON format. See the [JSON structure](https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html).

To reduce dependencies, JuiceFS S3 Gateway has cut support for certain event destination types. Currently, storage bucket events can be published to the following destinations:

- Redis
- MySQL
- PostgreSQL
- Webhooks

```Shell
$ mc admin config get myjfs | grep notify
notify_webhook        publish bucket notifications to webhook endpoints
notify_mysql          publish bucket notifications to MySQL databases
notify_postgres       publish bucket notifications to Postgres databases
notify_redis          publish bucket notifications to Redis datastores
```

:::note
Here, assuming the JuiceFS file system name is 'images', enable the S3 Gateway service and define its alias as 'myjfs' in mc. For the S3 Gateway, the JuiceFS file system name 'images' serves as a bucket name.
:::

#### Use Redis to publish events

Redis event destination supports two formats: `namespace` and `access`.

In the `namespace` format, the gateway synchronizes objects in the bucket to entries in a Redis hash. Each entry corresponds to an object in the storage bucket, with the key set to "bucket name/object name" and the value as JSON-formatted event data specific to that gateway object. Any updates or deletions of objects also update or delete corresponding entries in the hash.

In the `access` format, the gateway uses [RPUSH](https://redis.io/commands/rpush) to add events to a list. Each element in this list is a JSON-formatted list with two elements:

- A timestamp string
- A JSON object containing event data related to operations on the bucket

In this format, elements in the list are not updated or deleted.

To use notification destinations in `namespace` and `access` formats:

1. Configure Redis with the gateway.

    Use the `mc admin config set` command to configure Redis as the event notification destination:

    ```Shell
    # Command-line parameters
    # mc admin config set myjfs notify_redis[:name] address="xxx" format="namespace|access" key="xxxx" password="xxxx" queue_dir="" queue_limit="0"
    # An example
    $ mc admin config set myjfs notify_redis:1 address="127.0.0.1:6379/1" format="namespace" key="bucketevents" password="yoursecret" queue_dir="" queue_limit="0"
    ```

    You can use `mc admin config get myjfs notify_redis` to view the configuration options. Different types of destinations have different configuration options. For Redis type, it has the following configuration options:

    ```Shell
    $ mc admin config get myjfs notify_redis
    notify_redis enable=off format=namespace address= key= password= queue_dir= queue_limit=0
    ```

    Here are the meanings of each configuration option:

    ```Shell
    notify_redis[:name]               Supports setting multiple Redis instances with different names.
    address*     (address)            Address of the Redis server. For example: localhost:6379.
    key*         (string)             Redis key to store/update events. The key is created automatically.
    format*      (namespace*|access)  Determines the format type, either 'namespace' or 'access'; defaults to 'namespace'.
    password     (string)             Password for the Redis server.
    queue_dir    (path)               Directory to store unsent messages, for example, '/home/events'.
    queue_limit  (number)             Maximum limit of unsent messages. The default is '100000'.
    comment      (sentence)           Optional comment description.
    ```

    The gateway supports persistent event storage. Persistent storage backs up events when the Redis broker is offline and replays events when the broker comes back online. You can set the directory for event storage using the `queue_dir` field and the maximum limit for storage using `queue_limit`. For example, you can set `queue_dir` to `/home/events`, and you can set `queue_limit` to 1,000. By default, `queue_limit` is 100,000. Before updating the configuration, you can use the `mc admin config get` command to get the current configuration.

    ```Shell
    $ mc admin config get myjfs notify_redis
    notify_redis:1 address="127.0.0.1:6379/1" format="namespace" key="bucketevents" password="yoursecret" queue_dir="" queue_limit="0"

    # Effective after restart
    $ mc admin config set myjfs notify_redis:1 queue_limit="1000"
    Successfully applied new settings.
    Please restart your server: 'mc admin service restart myjfs'.
    # Note that the `mc admin service restart myjfs` command cannot be used to restart. JuiceFS S3 Gateway does not currently support this functionality. When you see this prompt after configuring with `mc`, you need to manually restart JuiceFS S3 Gateway.
    ```

    After using the `mc admin config set` command to update the configuration, restart JuiceFS S3 Gateway to apply the changes. JuiceFS S3 Gateway will output a line similar to `SQS ARNs: arn:minio:sqs::1:redis`.

    Based on your needs, you can add multiple Redis destinations by providing the identifier for each Redis instance (like the "1" in the example "notify_redis:1") along with the configuration parameters for each instance.

2. Enable bucket notifications.

    Now you can enable event notifications on a bucket named "images." When a JPEG file is created or overwritten, a new key is created or an existing key is updated in the previously configured Redis hash. If an existing object is deleted, the corresponding key is also removed from the hash. Therefore, the rows in the Redis hash map to `.jpg` objects in the "images" bucket.

    To configure bucket notifications, you need to use the Amazon Resource Name (ARN) information outputted by the gateway in the previous steps. See more information about [ARNs](http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html).

    You can use the `mc` tool to add these configuration details. Assuming the gateway service alias is myjfs, you can execute the following script:

    ```Shell
    mc event add myjfs/images arn:minio:sqs::1:redis --suffix .jpg
    mc event list myjfs/images
    arn:minio:sqs::1:redis   s3:ObjectCreated:*,s3:ObjectRemoved:*,s3:ObjectAccessed:*   Filter: suffix=".jpg"
    ```

3. Verify Redis.

    Start the `redis-cli` Redis client program to check the content in Redis. Running the `monitor` Redis command will output every command executed on Redis.

    ```Shell
    redis-cli -a yoursecret
    127.0.0.1:6379> monitor
    OK
    ```

    Upload a file named `myphoto.jpg` to the `images` bucket.

    ```Shell
    mc cp myphoto.jpg myjfs/images
    ```

    In the previous terminal, you can see the operations performed by the gateway on Redis:

    ```Shell
    127.0.0.1:6379> monitor
    OK
    1712562516.867831 [1 192.168.65.1:59280] "hset" "bucketevents" "images/myphoto.jpg" "{\"Records\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"minio:s3\",\"awsRegion\":\"\",\"eventTime\":\"2024-04-08T07:48:36.865Z\",\"eventName\":\"s3:ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"admin\"},\"requestParameters\":{\"principalId\":\"admin\",\"region\":\"\",\"sourceIPAddress\":\"127.0.0.1\"},\"responseElements\":{\"content-length\":\"0\",\"x-amz-request-id\":\"17C43E891887BA48\",\"x-minio-origin-endpoint\":\"http://127.0.0.1:9001\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"Config\",\"bucket\":{\"name\":\"images\",\"ownerIdentity\":{\"principalId\":\"admin\"},\"arn\":\"arn:aws:s3:::images\"},\"object\":{\"key\":\"myphoto.jpg\",\"size\":4,\"eTag\":\"40b134ab8a3dee5dd9760a7805fd495c\",\"userMetadata\":{\"content-type\":\"image/jpeg\"},\"sequencer\":\"17C43E89196AE2A0\"}},\"source\":{\"host\":\"127.0.0.1\",\"port\":\"\",\"userAgent\":\"MinIO (darwin; arm64) minio-go/v7.0.11 mc/RELEASE.2021-04-22T17-40-00Z\"}}]}"
    ```

    Here, you can see that the gateway executed the `HSET` command on the `minio_events` key.

    In the `access` format, `minio_events` is a list, and the gateway calls `RPUSH` to add it to the list. In the `monitor` command, you can see:

    ```Shell
    127.0.0.1:6379> monitor
    OK
    1712562751.922469 [1 192.168.65.1:61102] "rpush" "aceesseventskey" "[{\"Event\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"minio:s3\",\"awsRegion\":\"\",\"eventTime\":\"2024-04-08T07:52:31.921Z\",\"eventName\":\"s3:ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"admin\"},\"requestParameters\":{\"principalId\":\"admin\",\"region\":\"\",\"sourceIPAddress\":\"127.0.0.1\"},\"responseElements\":{\"content-length\":\"0\",\"x-amz-request-id\":\"17C43EBFD35A53B8\",\"x-minio-origin-endpoint\":\"http://127.0.0.1:9001\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"Config\",\"bucket\":{\"name\":\"images\",\"ownerIdentity\":{\"principalId\":\"admin\"},\"arn\":\"arn:aws:s3:::images\"},\"object\":{\"key\":\"myphoto.jpg\",\"size\":4,\"eTag\":\"40b134ab8a3dee5dd9760a7805fd495c\",\"userMetadata\":{\"content-type\":\"image/jpeg\"},\"sequencer\":\"17C43EBFD3DACA70\"}},\"source\":{\"host\":\"127.0.0.1\",\"port\":\"\",\"userAgent\":\"MinIO (darwin; arm64) minio-go/v7.0.11 mc/RELEASE.2021-04-22T17-40-00Z\"}}],\"EventTime\":\"2024-04-08T07:52:31.921Z\"}]"
    ```

#### Use MySQL to publish events

The MySQL notification destination supports two formats: `namespace` and `access`.

If you use the `namespace` format, the gateway synchronizes objects in the bucket to rows in the database table. Each row has two columns:

- `key_name`. It is the bucket name plus the object name.
- `value`. It is the JSON-formatted event data about that gateway object.

If objects are updated or deleted, the corresponding rows in the table are also updated or deleted.

If you use the `access` format, the gateway adds events to the table. Rows have two columns:

- `event_time`. It is the time the event occurred on the gateway server.
- `event_data`. It is the JSON-formatted event data about that gateway object.

In this format, rows are not deleted or modified.

The following steps show how to use the notification destination in `namespace` format. The `access` format is similar and not further described here.

1. Ensure the MySQL version meets the minimum requirements.

    JuiceFS S3 Gateway requires MySQL version 5.7.8 or above, because it uses the [JSON](https://dev.mysql.com/doc/refman/5.7/en/json.html) data type introduced in MySQL 5.7.8.

2. Configure MySQL to the gateway.

    Use the `mc admin config set` command to configure MySQL as the event notification destination.

    ```Shell
    mc admin config set myjfs notify_mysql:myinstance table="minio_images" dsn_string="root:123456@tcp(172.17.0.1:3306)/miniodb"
    ```

    You can use `mc admin config get myjfs notify_mysql` to view the configuration options. Different destination types have different configuration options. For MySQL type, the following configuration options are available:

    ```shell
    $ mc admin config get myjfs notify_mysql
    format=namespace dsn_string= table= queue_dir= queue_limit=0 max_open_connections=2
    ```

    Here are the meanings of each configuration item:

    ```Shell
    KEY:
    notify_mysql[:name]  Publish bucket notifications to the MySQL database. When multiple MySQL server endpoints are required, you can add a user-specified "name" to each configuration, for example, "notify_mysql:myinstance."

    ARGS:
    dsn_string*  (string)             MySQL data source name connection string, for example, "<user>:<password>@tcp(<host>:<port>)/<database>".
    table*       (string)             Name of the database table to store/update events. The table is automatically created.
    format*      (namespace*|access)  'namespace' or 'access.' The default is 'namespace.'
    queue_dir    (path)               The directory for storing unsent messages, for example, '/home/events'.
    queue_limit  (number)             The maximum limit of unsent messages. The default is '100000'.
    comment      (sentence)           Optional comment description.
    ```

    `dsn_string` is required and must be in the format `<user>:<password>@tcp(<host>:<port>)/<database>`.

    MinIO supports persistent event storage. Persistent storage backs up events when the MySQL connection is offline and replays events when the broker comes back online. You can set the storage directory for events using the `queue_dir` field, and the maximum storage limit using `queue_limit`. For example, you can set `queue_dir` to `/home/events`, and `queue_limit` to 1,000. By default, `queue_limit` is set to 100,000.

    Before updating the configuration, you can use the `mc admin config get` command to get the current configuration.

    ```Shell
    $ mc admin config get myjfs/ notify_mysql
    notify_mysql:myinstance enable=off format=namespace host= port= username= password= database= dsn_string= table= queue_dir= queue_limit=0
    ```

    Update the MySQL notification configuration using the `mc admin config set` command with the `dsn_string` parameter:

    ```Shell
    mc admin config set myjfs notify_mysql:myinstance table="minio_images" dsn_string="root:xxxx@tcp(127.0.0.1:3306)/miniodb"
    ```

    You can add multiple MySQL server endpoints as needed, by providing the identifier of the MySQL instance (for example, "myinstance") and the configuration parameter information for each instance.

    After updating the configuration with the `mc admin config set` command, restart the gateway to apply the configuration changes. The gateway server will output a line during startup similar to `SQS ARNs: arn:minio:sqs::myinstance:mysql`.

3. Enable bucket notifications.

    Now you can enable event notifications on a bucket named "images." When a file is uploaded to the bucket, a new record is inserted into MySQL, or an existing record is updated. If an existing object is deleted, the corresponding record is also deleted from the MySQL table. Therefore, each row in the MySQL table corresponds to an object in the bucket.

    To configure bucket notifications, you need to use the ARN information outputted by MinIO in previous steps. See more information about [ARNs](http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html).

    Assuming the gateway service alias is myjfs, you can execute the following script:

    ```Shell
    # Add notification configuration to the 'images' bucket using the MySQL ARN. The --suffix parameter is used to filter events.
    mc event add myjfs/images arn:minio:sqs::myinstance:mysql --suffix .jpg
    # Print the notification configuration on the 'images' bucket.
    mc event list myjfs/images
    arn:minio:sqs::myinstance:mysql s3:ObjectCreated:*,s3:ObjectRemoved:*,s3:ObjectAccessed:* Filter: suffix=”.jpg”
    ```

4. Verify MySQL.

    Open a new terminal and upload a JPEG image to the `images` bucket:

    ```Shell
    mc cp myphoto.jpg myjfs/images
    ```

    Open a MySQL terminal and list all records in the `minio_images` table. You will find a newly inserted record.

#### Use PostgreSQL to publish events

The method of publishing events using PostgreSQL is similar to publishing MinIO events using MySQL, with PostgreSQL version 9.5 or above required. The gateway uses PostgreSQL 9.5's [`INSERT ON CONFLICT`](https://www.postgresql.org/docs/9.5/static/sql-insert.html#SQL-ON-CONFLICT) (aka `UPSERT`) feature and 9.4's [`jsonb`](https://www.postgresql.org/docs/9.4/static/datatype-json.html) data type.

#### Use a webhook to publish events

[Webhooks](https://en.wikipedia.org/wiki/Webhook) use a push model to get data instead of continually pulling.

1. Configure a webhook to the gateway.

    The gateway supports persistent event storage. Persistent storage backs up events when the webhook is offline and replays events when the broker comes back online. You can set the directory for event storage using the `queue_dir` field, and the maximum storage limit using `queue_limit`. For example, you can set `queue_dir` to `/home/events` and `queue_limit` to 1,000. By default, `queue_limit` is 100,000.

    ```Shell
    KEY:
    notify_webhook[:name]  Publish bucket notifications to webhook endpoints.

    ARGS:
    endpoint*    (url)       Webhook server endpoint, for example, http://localhost:8080/minio/events.
    auth_token   (string)    Opaque token or JWT authorization token.
    queue_dir    (path)      The directory for storing unsent messages, for example, '/home/events'.
    queue_limit  (number)    The maximum limit of unsent messages. The default is '100000'.
    client_cert  (string)    The client certificate for mTLS authentication of the webhook.
    client_key   (string)    The client certificate key for mTLS authentication of the webhook.
    comment      (sentence)  Optional comment description.
    ```

    Use the `mc admin config set` command to update the configuration. The endpoint here is the service that listens for webhook notifications. Save the configuration file and restart the MinIO service to apply the changes. Note that when restarting MinIO, this endpoint must be up and accessible.

    ```Shell
    mc admin config set myjfs notify_webhook:1 queue_limit="0"  endpoint="http://localhost:3000" queue_dir=""
    ```

2. Enable bucket notifications.

    Now you can enable event notifications. When a file is uploaded to the bucket, an event is triggered. Here, the ARN value is `arn:minio:sqs::1:webhook`. See more information about [ARNs](http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html).

    ```Shell
    mc mb myjfs/images-thumbnail
    mc event add myjfs/images arn:minio:sqs::1:webhook --event put --suffix .jpg
    ```

    If the command report cannot create a bucket, please check if the S3 Gateway has enabled [multi-bucket support](#multi-bucket-support).

3. Use Thumbnailer to verify.

    [Thumbnailer](https://github.com/minio/thumbnailer) is a project that generates thumbnails using MinIO's `listenBucketNotification` API. JuiceFS uses Thumbnailer to listen to gateway notifications. If a file is uploaded to the gateway service, Thumbnailer listens to that notification, generates a thumbnail, and uploads it to the gateway service.

    To install Thumbnailer:

    ```Shell
    git clone https://github.com/minio/thumbnailer/
    npm install
    ```

    Open the Thumbnailer's `config/webhook.json` configuration file, add the configuration for the MinIO server, and start Thumbnailer using:

    ```Shell
    NODE_ENV=webhook node thumbnail-webhook.js
    ```

    Thumbnailer runs on `http://localhost:3000/`.

    Next, configure the MinIO server to send messages to this URL (mentioned in step 1) and set up bucket notifications using `mc` (mentioned in step 2). Then upload an image to the gateway server:

    ```Shell
    mc cp ~/images.jpg myjfs/images
    .../images.jpg:  8.31 KB / 8.31 KB ┃▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓┃ 100.00% 59.42 KB/s 0s
    ```

    After a moment, use `mc ls` to check the content of the bucket. You will see a thumbnail.

    ```Shell
    mc ls myjfs/images-thumbnail
    [2017-02-08 11:39:40 IST]   992B images-thumbnail.jpg
    ```


================================================
FILE: docs/en/guide/quota.md
================================================
---
title: Storage Quota
sidebar_position: 4
---

JuiceFS supports both total file system quota and subdirectory quota, both of which can be used to limit the available capacity and the number of available inodes. Both file system quota and directory quota are hard limits. When the total file system quota is exhausted, subsequent writes will return `ENOSPC` (No space left) error; and when the directory quota is exhausted, subsequent writes will return `EDQUOT` (Disk quota exceeded) error.

:::tip
The storage quota settings are stored in the metadata engine for all mount points to read, and the client of each mount point will also cache its own used capacity and inodes and synchronize them with the metadata engine once per second. Meanwhile the client will read the latest usage value from the metadata engine every 10 seconds to synchronize the usage information among each mount point, but this information synchronization mechanism cannot guarantee that the usage data is counted accurately.
:::

## File system quota {#file-system-quota}

For Linux, the default capacity of a JuiceFS type file system is identified as `1.0P` by using the `df` command.

```shell
$ df -Th | grep juicefs
JuiceFS:ujfs   fuse.juicefs  1.0P  682M  1.0P    1% /mnt
```

:::note
The capacity of underlying object storage is usually unlimited, i.e., JuiceFS storage is unlimited. Therefore, the displayed capacity is just an estimate rather than the actual storage limit.
:::

The `config` command that comes with the client allows you to view the details of a file system.

```shell
$ juicefs config $METAURL
{
  "Name": "ujfs",
  "UUID": "1aa6d290-279b-432f-b9b5-9d7fd597dec2",
  "Storage": "minio",
  "Bucket": "127.0.0.1:9000/jfs1",
  "AccessKey": "herald",
  "SecretKey": "removed",
  "BlockSize": 4096,
  "Compression": "none",
  "Shards": 0,
  "Partitions": 0,
  "Capacity": 0,
  "Inodes": 0,
  "TrashDays": 0
}
```

### Limit total capacity {#limit-total-capacity}

The capacity limit (in GiB) can be set with `--capacity` when creating a file system, e.g. to create a file system with an available capacity of 100 GiB:

```shell
juicefs format --storage minio \
    --bucket 127.0.0.1:9000/jfs1 \
    ... \
    --capacity 100 \
    $METAURL myjfs
```

You can also set a capacity limit for a created file system with the `config` command:

```shell
$ juicefs config $METAURL --capacity 100
2022/01/27 12:31:39.506322 juicefs[16259] <INFO>: Meta address: postgres://herald@127.0.0.1:5432/jfs1
2022/01/27 12:31:39.521232 juicefs[16259] <WARNING>: The latency to database is too high: 14.771783ms
  capacity: 0 GiB -> 100 GiB
```

For file systems that have been set with storage quota, the identification capacity becomes the quota capacity:

```shell
$ df -Th | grep juicefs
JuiceFS:ujfs   fuse.juicefs  100G  682M  100G    1% /mnt
```

### Limit the total number of inodes {#limit-total-number-of-inodes}

On Linux systems, each file (a folder is also a type of file) has an inode regardless of size, so limiting the number of inodes is equivalent to limiting the number of files.

The quota can be set with `--inodes` when creating the file system, e.g.

```shell
juicefs format --storage minio \
    --bucket 127.0.0.1:9000/jfs1 \
    ... \
    --inodes 100 \
    $METAURL myjfs
```

The file system created by the above command allows only 100 files to be stored. However, there is no limit to the size of individual files. For example, it will still work if a single file is equivalent or even larger than 1 TB as long as the total number of files does not exceed 100.

You can also set a capacity quota for a created file system by using the `config` command:

```shell
$ juicefs config $METAURL --inodes 100
2022/01/27 12:35:37.311465 juicefs[16407] <INFO>: Meta address: postgres://herald@127.0.0.1:5432/jfs1
2022/01/27 12:35:37.322991 juicefs[16407] <WARNING>: The latency to database is too high: 11.413961ms
    inodes: 0 -> 100
```

### Combine `--capacity` and `--inodes` {#limit-total-capacity-and-inodes}

You can combine `--capacity` and `--inodes` to set the capacity quota of a file system with more flexibility. For example, to create a file system that the total capacity limits to 100 TiB with only 100000 files to be stored:

```shell
juicefs format --storage minio \
    --bucket 127.0.0.1:9000/jfs1 \
    ... \
    --capacity 102400 \
    --inodes 100000 \
    $METAURL myjfs
```

Similarly, for the file systems that have been created, you can follow the settings below separately.

```shell
juicefs config $METAURL --capacity 102400
```

```shell
juicefs config $METAURL --inodes 100000
```

:::tip
The client reads the latest storage quota settings from the metadata engine periodically to update the local settings. The refresh interval is controlled by the `--heartbeat` option (default: 12 seconds). Other mount points may take up to the heartbeat interval to update the quota setting.
:::

## Directory quota {#directory-quota}

JuiceFS began to support directory-level storage quota since v1.1, and you can use the `juicefs quota` subcommand for directory quota management and query.

:::tip
The usage statistic relies on the mount process, please do not use this feature until all writable mount processes are upgraded to v1.1.0.
:::

### Limit directory capacity {#limit-directory-capacity}

You can use `juicefs quota set $METAURL --path $DIR --capacity $N` to set the directory capacity limit in GiB. For example, to set a capacity quota of 1GiB for the directory `/test`:

```shell
$ juicefs quota set $METAURL --path /test --capacity 1
+-------+---------+---------+------+-----------+-------+-------+
|  Path |   Size  |   Used  | Use% |   Inodes  | IUsed | IUse% |
+-------+---------+---------+------+-----------+-------+-------+
| /test | 1.0 GiB | 1.6 MiB |   0% | unlimited |   314 |       |
+-------+---------+---------+------+-----------+-------+-------+
```

After the setting is successful, you can see a table describing the current quota setting directory, quota size, current usage and other information.

:::tip
The use of the `quota` subcommand does not require a local mount point, and it is expected that the input directory path is a path relative to the JuiceFS root directory rather than a local mount path. It may take a long time to set a quota for a large directory, because the current usage of the directory needs to be calculated.
:::

If you need to query the quota and current usage of a certain directory, you can use the `juicefs quota get $METAURL --path $DIR` command:

```shell
$ juicefs quota get $METAURL --path /test
+-------+---------+---------+------+-----------+-------+-------+
|  Path |   Size  |   Used  | Use% |   Inodes  | IUsed | IUse% |
+-------+---------+---------+------+-----------+-------+-------+
| /test | 1.0 GiB | 1.6 MiB |   0% | unlimited |   314 |       |
+-------+---------+---------+------+-----------+-------+-------+
```

You can also use the `juicefs quota ls $METAURL` command to list all directory quotas.

### Limit the total number of directory inodes

You can use `juicefs quota set $METAURL --path $DIR --inodes $N` to set the directory inode quota, the unit is one. For example, to set a quota of 400 inodes for the directory `/test`:

```shell
$ juicefs quota set $METAURL --path /test --inodes 400
+-------+---------+---------+------+--------+-------+-------+
|  Path |   Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+---------+---------+------+--------+-------+-------+
| /test | 1.0 GiB | 1.6 MiB |   0% |    400 |   314 |   78% |
+-------+---------+---------+------+--------+-------+-------+
```

### Limit capacity and inodes of directory {#limit-capacity-and-inodes-of-directory}

You can combine `--capacity` and `--inodes` to set the capacity limit of the directory more flexibly. For example, to set a quota of 10GiB and 1000 inodes for the `/test` directory:

```shell
$ juicefs quota set $METAURL --path /test --capacity 10 --inodes 1000
+-------+--------+---------+------+--------+-------+-------+
|  Path |  Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+--------+---------+------+--------+-------+-------+
| /test | 10 GiB | 1.6 MiB |   0% |  1,000 |   314 |   31% |
+-------+--------+---------+------+--------+-------+-------+
```

In addition, you can also not limit the capacity of the directory and the number of inodes (set to `0` means unlimited), and only use the `quota` command to count the current usage of the directory:

```shell
$ juicefs quota set $METAURL --path /test --capacity 0 --inodes 0
+-------+-----------+---------+------+-----------+-------+-------+
|  Path |    Size   |   Used  | Use% |   Inodes  | IUsed | IUse% |
+-------+-----------+---------+------+-----------+-------+-------+
| /test | unlimited | 1.6 MiB |      | unlimited |   314 |       |
+-------+-----------+---------+------+-----------+-------+-------+
```

### Nested quota {#nested-quota}

JuiceFS allows nested quota to be set on multiple levels of directories, client performs recursive lookup to ensure quota settings take effect on every level of directory. This means even if the parent directory is allocated a smaller quota, you can still set a larger quota on the child directory.

### Subdirectory mount {#subdirectory-mount}

JuiceFS supports mounting arbitrary subdirectories using [`--subdir`](../reference/command_reference.mdx#mount-metadata-options). If the directory quota is set for the mounted subdirectory, you can use the `df` command that comes with the system to view the directory quota and current usage. For example, the file system quota is 1PiB and 10M inodes, while the quota for the `/test` directory is 1GiB and 400 inodes. The output of the `df` command when mounted using the root directory is:

```shell
$ df -h
Filesystem      Size  Used Avail Use% Mounted on
...
JuiceFS:myjfs   1.0P  1.6M  1.0P   1% /mnt/jfs

$ df -i -h
Filesystem     Inodes IUsed IFree IUse% Mounted on
...
JuiceFS:myjfs     11M   315   10M    1% /mnt/jfs
```

When mounted using the `/test` subdirectory, the output of the `df` command is:

```shell
$ df -h
Filesystem      Size  Used Avail Use% Mounted on
...
JuiceFS:myjfs   1.0G  1.6M 1023M   1% /mnt/jfs

$ df -i -h
Filesystem     Inodes IUsed IFree IUse% Mounted on
...
JuiceFS:myjfs     400   314    86   79% /mnt/jfs
```

:::note
When there is no quota set for the mounted subdirectory, JuiceFS will query up to find the nearest directory quota and return it to `df`. If directory quotas are set for multiple levels of parent directories, JuiceFS will return the minimum available capacity and number of inodes after calculation.
:::

### Usage check and fix {#usage-check-and-fix}

Since directory usage updates are laggy and asynchronous, loss may occur under unusual circumstances (such as a client exiting unexpectedly). We can use the `juicefs quota check $METAURL --path $DIR` command to check or fix it:

```shell
$ juicefs quota check $METAURL --path /test
2023/05/23 15:40:12.704576 juicefs[1638846] <INFO>: quota of /test is consistent [base.go:839]
+-------+--------+---------+------+--------+-------+-------+
|  Path |  Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+--------+---------+------+--------+-------+-------+
| /test | 10 GiB | 1.6 MiB |   0% |  1,000 |   314 |   31% |
+-------+--------+---------+------+--------+-------+-------+
```

When the directory usage is correct, the current directory quota usage will be output; if it fails, the error log will be output:

```shell
$ juicefs quota check $METAURL --path /test
2023/05/23 15:48:17.494604 juicefs[1639997] <WARNING>: /test: quota(314, 4.0 KiB) != summary(314, 1.6 MiB) [base.go:843]
2023/05/23 15:48:17.494644 juicefs[1639997] <FATAL>: quota of /test is inconsistent, please repair it with --repair flag [main.go:31]
```

At this point you can use the `--repair` option to repair directory usage:

```shell
$ juicefs quota check $METAURL --path /test --repair
2023/05/23 15:50:08.737086 juicefs[1640281] <WARNING>: /test: quota(314, 4.0 KiB) != summary(314, 1.6 MiB) [base.go:843]
2023/05/23 15:50:08.737123 juicefs[1640281] <INFO>: repairing... [base.go:852]
+-------+--------+---------+------+--------+-------+-------+
|  Path |  Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+--------+---------+------+--------+-------+-------+
| /test | 10 GiB | 1.6 MiB |   0% |  1,000 |   314 |   31% |
+-------+--------+---------+------+--------+-------+-------+
```


================================================
FILE: docs/en/guide/sync.md
================================================
---
title: Data Synchronization
sidebar_position: 7
description: Learn how to use juicefs sync for efficient data synchronization across supported storage systems, including object storage, JuiceFS, and local file systems.
---

[`juicefs sync`](../reference/command_reference.mdx#sync) is a powerful data synchronization tool that can copy data across all supported storage systems, including object storage, JuiceFS, and local file systems. You can freely copy data between any of these systems. It also supports syncing remote directories accessed via SSH, HDFS, and WebDAV. Advanced features include incremental synchronization, pattern matching (like rsync), and distributed syncing.

:::tip Mixing Community and Enterprise Editions
`juicefs sync` shares code between Community and Enterprise Editions. Therefore, even when you use different editions of the JuiceFS client, `sync` works normally. The only exception is when the [`jfs://`](#sync-without-mount-point) protocol header is involved. Due to the different metadata engine implementations in the Community and Enterprise Editions, clients from different editions cannot be mixed when using the `jfs://` protocol header.
:::

`juicefs sync` works like this:

```shell
juicefs sync [command options] SRC DST

# Sync object from OSS to S3
juicefs sync oss://mybucket.oss-cn-shanghai.aliyuncs.com s3://mybucket.s3.us-east-2.amazonaws.com

# Sync objects from S3 to JuiceFS
juicefs sync s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://VOL_NAME/

# Copy all files ending with .gz
juicefs sync --match-full-path --include='**.gz' --exclude='*' s3://xxx jfs://VOL_NAME/

# Copy all files that do not end with .gz
juicefs sync --match-full-path --exclude='**.gz' s3://xxx/ jfs://VOL_NAME/

# Copy all files except the subdirectory named tempdir
juicefs sync --match-full-path --include='*' s3://xxx/ jfs://VOL_NAME/
```

## Pattern matching {#pattern-matching}

You can use `--exclude` and `--include` for filtering. If no filtering rules are provided, all files are scanned and copied (`--include='*'` is the default). However, if you use the `--include` filter to match files with a specific pattern, you must also use `--exclude` to exclude other files. See the examples above for reference.

:::tip
When using multiple matching patterns, it may be difficult to determine whether a file will be synchronized due to the filtering logic. In such cases, it is recommended to add the `--dry --debug` option to preview the files selected for synchronization. If the results are not as expected, adjust the matching patterns accordingly.
:::

### Matching rules {#matching-rules}

You can use any word or substring for filtering, as well as these special patterns (similar to shell wildcards):

+ A single `*` matches any character, but terminates at `/`.
+ `**` matches any character, including `/`.
+ `?` matches any single character except `/`.
+ `[...]` matches a set of characters, such as `[a-z]` for any lowercase letter.
+ `[^...]` excludes specified characters. For example, `[^abc]` matches any character except `a`, `b`, and `c`.

In addition:

- If the matching pattern does not contain regex patterns, it tries to match the full file name. For example, `foo` matches `foo` and `xx/foo` but not `foo1`, `2foo`, or `foo/xx`, since none of them is a file named exactly `foo`.
- If the matching pattern ends with `/`, it only matches directories, not files.
- A pattern that starts with `/` stands for absolute path, so `/foo` matches the `foo` file at the root.

Here are some examples of matching patterns:

+ `--exclude='*.o'` excludes all files matching `*.o`.
+ `--exclude='/foo/*/bar'` excludes `bar` files located two levels under `/foo`, such as `/foo/spam/bar`, but not `/foo/spam/eggs/bar`.
+ `--exclude='/foo/**/bar'` excludes `bar` files at any level under `/foo`.

The `sync` command supports two filtering modes: *full path filtering* and *layer-by-layer filtering*. Both use `--include` and `--exclude` to filter files, but their behaviors are different. By default, `sync` employs the layer-by-layer filtering mode, which is more complicated but resembles rsync's usage. Therefore, it is only recommended for users familiar with rsync. For most people, `--match-full-path` is recommended because it is much easier to understand.

### Full path filtering (recommended) <VersionAdd>1.2.0</VersionAdd> {#full-path-filtering-mode}

Since v1.2.0, JuiceFS supports the `--match-full-path` option. This mode directly matches the full path of an object against all specified filters sequentially. Once a pattern matches, the result is returned (either "include" or "exclude"), and subsequent patterns are ignored.

Below is the workflow of full path filtering mode:

![Full path filtering workflow](../images/sync-full-path-filtering-mode-flow-chart.svg)

For example, consider a file located at `a1/b1/c1.txt` and three matching patterns `--include 'a*.txt' --include 'c1.txt' --exclude 'c*.txt'`. In full path filtering mode:
The string `a1/b1/c1.txt` is first matched against `--include 'a*.txt'`. This fails because `*` does not match the `/` character (see [matching rules](#matching-rules)).
`a1/b1/c1.txt` is then matched against `--include 'c1.txt'`, which succeeds. According to the mode's logic, subsequent patterns, such as `--exclude 'c*.txt'`, are ignored once a match is found. This file will be handled by the `sync` command.

Here are some more examples:

- `--exclude '/foo**'` excludes all files or directories whose root directory name starts with `foo`.
- `--exclude '**foo/**'` excludes all directories ending with `foo`.
- `--include '*/' --include '*.c' --exclude '*'` includes all directories and files with the `.c` extension while excluding everything else.
- `--include 'foo/bar.c' --exclude '*'` includes only the `foo` directory and the `foo/bar.c` file.

### Layer-by-layer filtering mode {#layer-by-layer-filtering-mode}

In layer-by-layer filtering mode, the full path is split into hierarchical levels, generating a sequence of strings. For example, a path like `a1/b1/c1.txt` is split into the sequence `a1`, `a1/b1`, and `a1/b1/c1.txt`. Each element in this sequence is processed as though in ["full path filtering"](#full-path-filtering-mode) mode.

If an element matches a certain pattern, two outcomes are possible:

- If it is an exclude pattern, the *exclude* behavior is immediately returned as the final result.
- If it is an include pattern, remaining patterns for that layer are skipped and the process moves on to the next layer.

If no patterns match at a particular layer, the process moves on to the next layer. **If "exclude" is not returned after all layers are processed, the scanned files are included (be "handled" by the `sync` command) by default.**

Below is the workflow for layer-by-layer filtering mode:

![Layer-by-layer filtering workflow](../images/sync-layer-by-layer-filtering-mode-flow-chart.svg)

For example, given the file `a1/b1/c1.txt` and the patterns `--include 'a*.txt' --include 'c1.txt' --exclude 'c*.txt'`, in layer-by-layer filtering mode, the sequence is `a1`, `a1/b1`, and `a1/b1/c1.txt`. The specific matching steps are:

1. At the first layer `a1`, no patterns match. Move on to the next layer.
2. At the second layer `a1/b1`, no patterns match. Move to the next level.
3. At the third layer `a1/b1/c1.txt`, the `--inlude 'c1.txt'` pattern matches. So as for the current state, this file will be handled, and the process will continue to the next layer.
4. Since there is no next layer, `a1/b1/c1.txt` will be included and handled by this command.

In the example above, the matching is successful until the last layer. In addition, there may be two situations:

- If the match is successful before the last layer, and the matching pattern is an exclude filter, the file is excluded as a final state, skipping all subsequent layers.
- If all layers are processed but no matches occur, this file will be included.

Essentially, this mode processes paths hierarchically, applying full path filtering at each layer. Each layer comes out with either a hit to exclude or a miss and continue to the next layer. The only way to get the file included is to process all layers of filtering.

Some more examples:

+ `--exclude /foo` excludes all files or directories named `foo` under the root directory.
+ `--exclude foo/` excludes all directories named `foo`.
+ For multi-level directories such as `dir_name/.../.../...`, all paths under `dir_name` will be processed according to the directory hierarchy. If the parent directory of a file is "excluded," the file will not be handled, even if an include rule is subsequently specified for it. If you want this file to be included, you must guarantee that all its parent directories are not excluded. For example, `/some/path/this-file-will-not-be-synced` in the following example will not be included because its parent directory `some` has been excluded by the rule `--exclude '*'`:

  ```shell
  --include '/some/path/this-file-will-not-be-synced' \
  --exclude '*'
  ```

One solution is to include all directories in the directory hierarchy by using the `--include '*/'` rule (which needs to be placed before the `--exclude '*'` rule). Alternatively, you can add include rules to each parent directory, for example:

  ```shell
  --include '/some/' \
  --include '/some/path/' \
  --include '/some/path/this-file-will-be-synced' \
  --exclude '*'
  ```

## Storage protocols {#storage-protocols}

You can sync data between any [supported storage system](../reference/how_to_set_up_object_storage.md), but note that if one of the endpoint is a JuiceFS volume, it it then recommended to [sync without mount point](#sync-without-mount-point) since it runs without FUSE overhead.

### Sync without mount point <VersionAdd>1.1</VersionAdd> {#sync-without-mount-point}

For data migrations that involve JuiceFS, it's recommended use the `jfs://` protocol, rather than mount JuiceFS and access its local directory, which bypasses the FUSE mount point and access JuiceFS directly. Under large scale scenarios, bypassing FUSE can save precious resources and increase performance.

```shell
myfs=redis://10.10.0.8:6379/1 juicefs sync s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/movies/ jfs://myfs/movies/
```

### Synchronize between object storage and JuiceFS {#synchronize-between-object-storage-and-juicefs}

The following command synchronizes `movies` directory from object storage to JuiceFS.

```shell
# mount JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# synchronize
juicefs sync s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/movies/ /mnt/jfs/movies/
```

The following command synchronizes `images` directory from JuiceFS to object storage.

```shell
# mount JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# synchronization
juicefs sync /mnt/jfs/images/ s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/images/
```

### Synchronize between object storages {#synchronize-between-object-storages}

The following command synchronizes all of the data from object storage to another bucket.

```shell
juicefs sync s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com oss://ABCDEFG:HIJKLMN@bbb.oss-cn-hangzhou.aliyuncs.com
```

### Synchronize between local and remote servers {#synchronize-between-local-and-remote-servers}

To copy files between directories on a local computer, simply specify the source and destination paths. For example, to synchronize the `/media/` directory with the `/backup/` directory:

```shell
juicefs sync /media/ /backup/
```

If you need to synchronize between servers, you can access the target server using the SFTP/SSH protocol. For example, to synchronize the local `/media/` directory with the `/backup/` directory on another server:

```shell
juicefs sync /media/ username@192.168.1.100:/backup/
# Specify password (optional)
juicefs sync /media/ "username:password"@192.168.1.100:/backup/
```

When using the SFTP/SSH protocol, if no password is specified, the sync task will prompt for the password. If you want to explicitly specify the username and password, you need to enclose them in double quotation marks, with a colon separating the username and password.

## Sync behavior {#sync-behavior}

### Incremental and full synchronization {#incremental-and-full-synchronization}

By default, `juicefs sync` performs incremental synchronization. It only overwrites files if their sizes are different. You can also use [`--update`](../reference/command_reference.mdx#sync) to overwrite files when the `mtime` of the source file has been updated. For scenarios with higher demand for data integrity, use [`--check-new`](../reference/command_reference.mdx#sync) or [`--check-all`](../reference/command_reference.mdx#sync) to perform byte-by-byte comparison between the source and the destination.

For full synchronization (where all files are synchronized regardless of their presence on the destination path), use [`--force-update`](../reference/command_reference.mdx#sync).

### Directory structure and file permissions {#directory-structure-and-file-permissions}

By default, empty directories are not synchronized. To include them, use the `--dirs` option.

In addition, when migrating data between file systems such as local, SFTP, and HDFS, use the `--perms` option to synchronize file permissions.

### Copy symbolic links {#copy-symbolic-links}

For synchronization between **local directories**, the `--links` option allows symbolic links to be copied as is, instead of resolving their targets. The synchronized symbolic link retains the original path stored in the source, regardless of whether the path is valid before or after the synchronization.

Note:

* The `mtime` of a symbolic link is not synchronized.
* The `--check-new` and `--perms` options will be ignored when synchronizing symbolic links.

### Data sync and compaction {#sync-and-compaction}

For sequential write scenarios, ensure each file write has at least a 4M (the default block size) buffer available. If the write concurrency is too high or the buffer size is too small, the client will not be able to maintain the desired "writing by large chunks" pattern. Instead, it could only write by small slices, which combined with compaction, could really deteriorate performance due to write amplification.

Compaction can be monitored using `juicefs_compact_size_histogram_bytes`, If compaction traffic is substantial during a `sync` operation, consider the following optimizations:

* If the object storage bandwidth is limited, avoid setting high concurrency (`--threads`). Instead, start with low concurrency and gradually increase it until you get the desired speed.

* When the destination is a JuiceFS file system, use the `jfs://` protocol, because it bypasses the FUSE mount point (reducing overhead) and is already optimized for file fragmentation problems. See the next point for details.

* When the destination is a JuiceFS file system, ensure the destination has sufficient available [buffer](https://github.com/juicedata/docs/pull/662/cache.md#buffer-size) capacity. Each write file handler must have at least 4MB of reserved memory. This means the `--buffer-size` should be at least 4 times the `--threads` value. If higher write concurrency is needed, consider setting it to 8 or 12 times the value. Depending on the destination file system's deployment model, you will use different methods to configure buffer size:

  * When the destination starts with the `jfs://` protocol, the JuiceFS client is part of the `juicefs sync` command itself. In this case, `--buffer-size` needs to be appended to the `juicefs sync` command.
  * When the destination is a FUSE mount point, the JuiceFS client runs as the `juicefs mount` process on the host machine. In this case, `--buffer-size` needs to be added directly to the mount command.

* If you need to limit the bandwidth via `--bwlimit`, you must also lower the `--threads` value to avoid write fragmentation caused by concurrency congestion. Since storage systems come with different performance levels, exact calculations cannot be provided here. Therefore, it is recommended to start with low concurrency and adjust as needed.

### Delete selected files

Using filters, you can even delete files by pattern via `juicefs sync`, the trick is to create an empty directory and use it as `SRC`.

Below are some examples which uses `--dry --debug` just to be cautious, they will not delete anything as long as `--dry` is specified, after the behavior is verified, remove the option to actually execute.

```shell
mkdir empty-dir
# Delete all objects in mybucket except the .gz files
juicefs sync ./empty-dir/ s3://mybucket.s3.us-east-2.amazonaws.com/ --match-full-path --delete-dst --exclude='**.gz' --include='*' --dry --debug
# Delete all files ending with .gz in mybucket
juicefs sync ./empty-dir/ s3://mybucket.s3.us-east-2.amazonaws.com/ --match-full-path --delete-dst --include='**.gz' --exclude='*' --dry --debug
```

## Accelerate synchronization {#accelerate-sync}

By default, `juicefs sync` starts 10 threads to run syncing jobs. You can set the `--threads` option to increase or decrease the number of threads as needed. However, adding threads beyond a system's resource limits may cause issues like out-of-memory errors. If performance is still insufficient, consider:

* Check if `SRC` or `DST` storage systems have reached bandwidth limits. If either is constrained, increasing concurrency will not help.

* Performing `juicefs sync` on a single host may be limited by host resources, such as CPU or network throttle. If this is the case, consider the following:

  * If a node with better hardware resources (such as CPU or network bandwidth) is available in your environment, consider using that node to run `juicefs sync` and access the source data via SSH. For example, `juicefs sync root@src:/data /jfs/data`.
  * Use [distributed synchronization](#distributed-sync) (introduced below).

* If the synchronized data is mainly small files, and the `list` API of `SRC` storage system has excellent performance, the default single-threaded `list` of `juicefs sync` may become a bottleneck. You can enable [concurrent `list`](#concurrent-list) (introduced below).

### Concurrent `list` {#concurrent-list}

If `Pending objects` in `juicefs sync` output remains 0, it means consumption is faster than production. You can increase `--list-threads` to enable concurrent `list` and then use `--list-depth` to control directory depth of `list`.

For example, if you are dealing with an object storage bucket used by JuiceFS, the directory structure is `/<vol-name>/chunks/xxx/xxx/...`. In this case, setting `--list-depth=2` enables concurrent listing on `<vol-name>/chunks`.

### Distributed synchronization {#distributed-sync}

Synchronizing between two object storage services is essentially pulling data from one and pushing it to the other. The efficiency of the synchronization depends on the bandwidth between the client and the cloud.

![JuiceFS-sync-single](../images/juicefs-sync-single.png)

When copying large scale data, node bandwidth can easily bottleneck the synchronization process. For this scenario, `juicefs sync` provides a multi-machine concurrent solution, as shown in the figure below.

![JuiceFS-sync-worker](../images/juicefs-sync-worker.png)

The manager node executes the `sync` command as the master and defines multiple worker nodes by setting the `--worker` option (the manager node also serves as a worker node). JuiceFS splits the workload and distributes it to workers for distributed synchronization. This increases the amount of data that can be processed per unit time, and the total bandwidth is also multiplied.

When using distributed syncing, you should configure SSH logins so that the manager can access all worker nodes without a password. If the SSH port is not the default 22, you need to include that in the manager's `~/.ssh/config`. The manager will distribute the JuiceFS Client to all worker nodes, so they should all use the same architecture to avoid compatibility problems.

For example, to synchronize data between two object storage services:

```shell
juicefs sync --worker bob@192.168.1.20,tom@192.168.8.10 s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com oss://ABCDEFG:HIJKLMN@bbb.oss-cn-hangzhou.aliyuncs.com
```

The synchronization workload between the two object storage services is shared by the manager machine and two workers, `bob@192.168.1.20` and `tom@192.168.8.10`.

The above command demonstrates object → object synchronization, if you need to sync via FUSE mount points, then you need to mount the file system in all worker nodes, and then run the following command to achieve distributed sync:

```shell
# Source file system needs better read performance, increase its buffer-size
parallel-ssh -h hosts.txt -i juicefs mount -d redis://10.10.0.8:6379/1 /jfs-src --buffer-size=1024 --cache-size=0

# Destination file system needs better write performance
parallel-ssh -h hosts.txt -i juicefs mount -d redis://10.10.0.8:6379/1 /jfs-dst --buffer-size=1024 --cache-size=0 --max-uploads=50

# Copy data
juicefs sync --worker host1,host2 /jfs-src /jfs-dst
```

## Observation {#observation}

When using `sync` to transfer large files, the progress bar might move slowly or get stuck. If this happens, you can observe the progress using other methods.

`sync` is designed for scenarios involving a large number of files. Its progress bar only updates when a file has been transferred. In a large file scenario, each file is transferred slowly, so the progress bar updates infrequently or even appears stuck. This is worse for destinations without multipart upload support (such as `file`, `sftp`, and `jfs` schemes), where each file is transferred using a single thread.

If you notice the progress bar is not changing, use the methods below for monitoring and troubleshooting:

* Add the [`--verbose` or `--debug`](../reference/command_reference.mdx#global-options) option to the `juicefs sync` command to print debug logs.

* If either end is a JuiceFS mount point:

  * Use [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) to quickly check current I/O status.
  * Review the [client log](../administration/fault_diagnosis_and_analysis.md#client-log) (default path: `/var/log/juicefs.log`) for [slow requests or timeout errors](../administration/troubleshooting.md#io-error-object-storage).

* If the destination is a local disk, check the directory for temporary files with `.jfs.xxx.tmp.xxx`. During the synchronization process, the transfer results are written to these temporary files. Once the transfer is complete, they are renamed to finalize the write. By monitoring the size changes of the temporary files, you can determine the current I/O status.

* If both the source and destination are object storage systems, use tools like `nethogs` to check network I/O.

* If none of the above methods provide useful debug information, please collect its goroutine and send it to Juicedata engineers:

    ```shell
    # Replace <PID> with the actual PID of the stuck sync process
    # This command will print its pprof listen port
    lsof -p <PID> | grep TCP | grep LISTEN
    # pprof port is typically 6061, but in the face of port conflict,
    # port number will be automatically increased
    curl -s localhost:6061/debug/pprof/goroutine?debug=1
    ```

## Application scenarios {#application-scenarios}

### Geo-disaster recovery backup {#geo-disaster-recovery-backup}

Geo-disaster recovery backup backs up files, and thus the files stored in JuiceFS should be synchronized to other object storages. For example, synchronize files from JuiceFS to object storage:

```shell
# mount JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# synchronization
juicefs sync /mnt/jfs/ s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/
```

### Build a JuiceFS data copy {#build-a-juicefs-data-copy}

Unlike the file-oriented disaster recovery backup, the purpose of creating a copy of JuiceFS data is to establish a mirror with exactly the same content and structure as the JuiceFS data storage. When the object storage in use fails, you can switch to the data copy by modifying the configurations. Note that only the file data of the JuiceFS file system is replicated, and the metadata stored in the metadata engine still needs to be backed up.

This requires manipulating the underlying object storage directly to synchronize it with the target object storage. For example, to take the object storage as the data copy of a JuiceFS volume:

```shell
juicefs sync cos://ABCDEFG:HIJKLMN@ccc-125000.cos.ap-beijing.myqcloud.com oss://ABCDEFG:HIJKLMN@bbb.oss-cn-hangzhou.aliyuncs.com
```

### Sync across regions using S3 Gateway {#sync-across-region}

When transferring a large number of small files across different regions via FUSE mount points, clients will inevitably talk to the metadata service in the opposite region via the public internet (or dedicated network connection with limited bandwidth). In such cases, metadata latency can become the bottleneck of the data transfer:

![sync via public metadata service](../images/sync-public-metadata.svg)

JuiceFS S3 Gateway is the solution in these scenarios: by deploying a gateway in the source region, metadata is accessed over a private network, minimizing metadata latency and delivering optimal performance for small-file-intensive workloads.

![sync via gateway](../images/sync-via-gateway.svg)

Read [S3 Gateway](../guide/gateway.md) to learn its deployment and use.


================================================
FILE: docs/en/introduction/README.md
================================================
---
title: Introduction to JuiceFS
sidebar_position: 1
slug: .
pagination_next: introduction/architecture
---

[**JuiceFS**](https://github.com/juicedata/juicefs) is an open-source, high-performance distributed file system designed for the cloud, released under the Apache License 2.0. By providing full [POSIX](https://en.wikipedia.org/wiki/POSIX) compatibility, it allows almost all kinds of object storage to be used as massive local disks and to be mounted and accessed on different hosts across platforms and regions.

JuiceFS separates "data" and "metadata" storage. Files are split into chunks and stored in [object storage](../reference/how_to_set_up_object_storage.md#supported-object-storage) like Amazon S3. The corresponding metadata can be stored in various [databases](../reference/how_to_set_up_metadata_engine.md) such as Redis, MySQL, TiKV, and SQLite, based on the scenarios and requirements.

JuiceFS provides rich APIs for various forms of data management, analysis, archiving, and backup. It seamlessly interfaces with big data, machine learning, artificial intelligence and other application platforms without modifying code, and delivers massive, elastic, and high-performance storage at low cost. With JuiceFS, you do not need to worry about availability, disaster recovery, monitoring, and scalability. This greatly reduces maintenance work and makes it an excellent choice for DevOps.

## Features {#features}

- **POSIX Compatible**: JuiceFS can be used like a local file system, making it easy to integrate with existing applications.
- **HDFS Compatible**: JuiceFS is fully compatible with the [HDFS API](../deployment/hadoop_java_sdk.md), which can enhance metadata performance.
- **S3 Compatible**: JuiceFS provides an [S3 gateway](../guide/gateway.md) to implement an S3-compatible access interface.
- **Cloud-Native**: It is easy to use JuiceFS in Kubernetes via the [CSI Driver](../deployment/how_to_use_on_kubernetes.md).
- **Distributed**: Each file system can be mounted on thousands of servers at the same time with high-performance concurrent reads and writes and shared data.
- **Strong Consistency**: Any changes committed to files are immediately visible on all servers.
- **Outstanding Performance**: JuiceFS achieves millisecond-level latency and nearly unlimited throughput depending on the object storage scale (see [performance test results](../benchmark/benchmark.md)).
- **Data Security**: JuiceFS supports encryption in transit and encryption at rest (view [Details](../security/encryption.md)).
- **File Lock**: JuiceFS supports BSD lock (flock) and POSIX lock (fcntl).
- **Data Compression**: JuiceFS supports the [LZ4](https://lz4.github.io/lz4) and [Zstandard](https://facebook.github.io/zstd) compression algorithms to save storage space.

## Scenarios {#scenarios}

JuiceFS is designed for massive data storage and can be used as an alternative to many distributed file systems and network file systems, especially in the following scenarios:

- **Big Data**: JuiceFS is compatible with HDFS and can be seamlessly integrated with mainstream computing engines such as Spark, Presto, and Hive, bringing much better performance than directly using object storage.
- **Machine Learning**: JuiceFS is compatible with POSIX and supports all machine learning and deep learning frameworks. As a shareable file storage, JuiceFS can improve the efficiency of team management and data usage.
- **Kubernetes**: JuiceFS supports Kubernetes CSI, providing decoupled persistent storage for pods so that your application can be stateless, also great for data sharing among containers.
- **Shared Workspace**: JuiceFS file system can be mounted on any host, allowing concurrent read/write operations without limitations. Its POSIX compatibility ensures smooth data flow and supports scripting operations.
- **Data Backup**: JuiceFS provides scalable storage space for backing up all kinds of data. With its shared mount feature, data from multiple hosts can be aggregated into one place and then backed up together.

## Data privacy {#data-privacy}

JuiceFS is an open-source software available on [GitHub](https://github.com/juicedata/juicefs). When using JuiceFS to store data, the data is split into chunks according to specific rules and stored in custom object storage or other storage media, and the corresponding metadata is stored in a custom database.

## More info {#more-info}

* **Use case**: For more use cases of similar scenarios, please visit [User Stories](https://juicefs.com/en/blog/user-stories).
* **Join the community**: Welcome to join [Slack](https://go.juicefs.com/slack) to discuss with JuiceFS users.
* **AI assistant**: If you encounter any problems, you are welcome to use the "Ask AI" feature (in the bottom right corner) to get assistance from the AI assistant. The knowledge base of the AI ​​assistant comes from documentation and related content on GitHub.


================================================
FILE: docs/en/introduction/architecture.md
================================================
---
title: Architecture
sidebar_position: 2
slug: /architecture
description: This article introduces the technical architecture of JuiceFS and its technical advantages.
---

The JuiceFS file system consists of three parts:

![JuiceFS-arch](../images/juicefs-arch.svg)

**JuiceFS Client**: The JuiceFS client handles all file I/O operations, including background tasks like data compaction and trash file expiration. It communicates with both the object storage and metadata engine. The client supports multiple access methods:

- **FUSE**: JuiceFS file system can be mounted on a host in a POSIX-compatible manner, allowing the massive cloud storage to be used as local storage. For details, see [this document](https://juicefs.com/docs/community/getting-started/installation).
- **Python SDK**: In scenarios where FUSE mounting is not feasible or where direct file system access from within a Python process is required, the Python SDK can read and write the file system directly. Furthermore, the Python SDK natively implements fsspec for easy integration with frameworks like Ray. For details, see [Python_SDK](https://juicefs.com/docs/community/deployment/python_sdk).
- **Windows Client**: You can experience a file system performance close to that of a local one. For details, see [Use JuiceFS on Windows](https://juicefs.com/docs/community/tutorials/windows).
- **Hadoop Java SDK**: JuiceFS can replace HDFS, providing Hadoop with cost-effective and abundant storage capacity.
For details, see [Use JuiceFS on Hadoop Ecosystem](https://juicefs.com/docs/community/hadoop_java_sdk).
- **Kubernetes CSI Driver**: JuiceFS provides shared storage for containers in Kubernetes through its CSI Driver. For details, see [Introduction to JuiceFS CSI Driver](https://juicefs.com/docs/csi/introduction).
- **S3 Gateway**: Applications using S3 as the storage layer can directly access the JuiceFS file system, and tools such as AWS CLI, s3cmd, and MinIO client can be used to access the JuiceFS file system at the same time. For details, see [JuiceFS S3 Gateway](https://juicefs.com/docs/community/guide/gateway).
- **WebDAV Server**: Files in JuiceFS can be operated directly using the HTTP protocol.

**Data Storage**: File data is split and stored in object storage. JuiceFS supports virtually all types of object storage, including typical self-hosted solutions like OpenStack Swift, Ceph, and MinIO.

**Metadata Engine**: The Metadata Engine stores file metadata, which contains:

- Common file system metadata: file name, size, permission information, creation and modification time, directory structure, file attribute, symbolic link, file lock.
- JuiceFS-specific metadata: file data mapping, reference counting, client session, etc.

JuiceFS supports a variety of common databases as the metadata engine, like Redis, TiKV, MySQL/MariaDB, PostgreSQL, and SQLite, and the list is still expanding. [Submit an issue](https://github.com/juicedata/juicefs/issues) if your favorite database is not supported.

## How JuiceFS stores files {#how-juicefs-store-files}

Traditional file systems use local disks to store both file data and metadata. However, JuiceFS formats data first and then stores it in the object storage, with the corresponding metadata being stored in the metadata engine.

In JuiceFS, each file is composed of one or more *chunks*. Each chunk has a maximum size of 64 MB. Regardless of the file's size, all reads and writes are located based on their offsets (the position in the file where the read or write operation occurs) to the corresponding chunk. This design enables JuiceFS to achieve excellent performance even with large files. As long as the total length of the file remains unchanged, the chunk division of the file remains fixed, regardless of how many modifications or writes the file undergoes.

![File and chunks](../images/file-and-chunks.svg)

Chunks exist to optimize lookup and positioning, while the actual file writing is performed on *slices*. In JuiceFS, each slice represents a single continuous write, belongs to a specific chunk, and cannot overlap between adjacent chunks. This ensures that the slice length never exceeds 64 MB.

For example, if a file is generated through a continuous sequential write, each chunk contains only one slice. The figure above illustrates this scenario: a 160 MB file is sequentially written, resulting in three chunks, each containing only one slice.

File writing generates slices, and invoking `flush` persists these slices. `flush` can be explicitly called by the user, and even if not invoked, the JuiceFS client automatically performs `flush` at the appropriate time to prevent buffer overflow (refer to [buffer-size](../guide/cache.md#buffer-size)). When persisting to the object storage, slices are further split into individual *blocks* (default maximum size of 4 MB) to enable multi-threaded concurrent writes, thereby enhancing write performance. The previously mentioned chunks and slices are logical data structures, while blocks represent the final physical storage form and serve as the smallest storage unit for the object storage and disk cache.

![Split slices to blocks](../images/slice-to-block.svg)

After writing a file to JuiceFS, you cannot find the original file directly in the object storage. Instead, the storage bucket contains a `chunks` folder and a series of numbered directories and files. These numerically named object storage files are the blocks split and stored by JuiceFS. The mapping between these blocks, chunks, slices, and other metadata information (such as file names and sizes) is stored in the metadata engine. This decoupled design makes JuiceFS a high-performance file system.

![How JuiceFS stores files](../images/how-juicefs-stores-files.svg)

Regarding logical data structures, if a file is not generated through continuous sequential writes but through multiple append writes, each append write triggers a `flush` to initiate the upload, resulting in multiple slices. If the data size for each append write is less than 4 MB, the data blocks eventually stored in the object storage are smaller than 4 MB blocks.

![Small append writes](../images/small-append.svg)

Depending on the writing pattern, the arrangement of slices can be diverse:

- If a file is repeatedly modified in the same part, it results in multiple overlapping slices.
- If writes occur in non-overlapping parts, there will be gaps between slices.

However complex the arrangement of slices may be, when reading a file, the most recent written slice is read for each file position. The figure below illustrates this concept: while slices may overlap, reading the file always occurs "from top to bottom." This ensures that you see the latest state of the file.

![Complicate pattern](../images/complicate-pattern.svg)

Due to the potential overlapping of slices, JuiceFS [marks the valid data offset range for each slice](../development/internals.md#sliceref) in the reference relationship between chunks and slices. This approach informs the file system of the valid data in each slice.

However, it is not difficult to imagine that looking up the "most recently written slice within the current read range" during file reading, especially with a large number of overlapping slices as shown in the figure, can significantly impact read performance. This leads to what we call "file fragmentation." File fragmentation not only affects read performance but also increases space usage at various levels (object storage, metadata). Hence, whenever a write occurs, the client evaluates the file's fragmentation and runs the fragmentation compaction asynchronously, merging all slices within the same chunk into one.

![File fragmentation compaction](../images/compaction.svg)

Additional technical aspects of JuiceFS storage design:

* Irrespective of the file size, JuiceFS avoids storage merging to prevent read amplification and ensure optimal performance.
* JuiceFS provides strong consistency guarantees while allowing tuning options with caching mechanisms tailored to specific use cases. For example, by configuring more aggressive metadata caching, a certain level of consistency can be traded for enhanced performance. For more details, see [Metadata cache](../guide/cache.md#metadata-cache).
* JuiceFS supports the ["Trash"](../security/trash.md) functionality and enables it by default. After a file is deleted, it is retained for a certain period before being permanently cleared. This helps you avoid data loss caused by accidental deletion.


================================================
FILE: docs/en/introduction/comparison/_category_.yml
================================================
position: 4
label: "Comparing with Others"
# collapsible: true 
# collapsed: true 

================================================
FILE: docs/en/introduction/comparison/juicefs_vs_3fs.md
================================================
---
slug: /comparison/juicefs_vs_3fs
description: This article compares the architectures, features, and innovations of DeepSeek 3FS and JuiceFS in AI storage scenarios.
---

# JuiceFS vs. 3FS

3FS (Fire-Flyer File System) is a high-performance distributed file system designed for AI training and inference workloads, open-sourced by DeepSeek. It uses NVMe SSDs and RDMA networks to provide a shared storage layer, optimized for the demanding I/O requirements of large-scale AI applications.

JuiceFS is a cloud-native distributed file system that stores data in object storage. The Community Edition, open-sourced on GitHub in 2021, integrates with multiple metadata services and supports diverse use cases. The Enterprise Edition, tailored for high-performance scenarios, is widely adopted in large-scale AI tasks, including generative AI, autonomous driving, quantitative finance, and biotechnology.

This document provides a comprehensive comparison between 3FS and JuiceFS in terms of architecture, file distribution, RPC framework, and features.

## Architecture comparison

### 3FS

3FS employs an architecture designed for AI workloads with the following key components:

- **Cluster manager**: Handles node membership changes and distributes cluster configurations to other components. Multiple cluster managers are deployed with one elected as primary for high availability.
- **Metadata service**: Stateless services that handle file metadata operations, backed by FoundationDB—a transactional key-value database for storing metadata.
- **Storage service**: Manages data storage using local NVMe SSDs with CRAQ (Chain Replication with Apportioned Queries) for data consistency.
- **Clients**: Provides both FUSE client for POSIX compatibility and native client API for high-performance zero-copy operations.

All components communicate via RDMA for networking. Cluster configurations are stored in reliable distributed services like ZooKeeper or etcd.

![3FS architecture](https://static1.juicefs.com/images/3FS_JiaGou.original.png)

### JuiceFS

JuiceFS uses a modular, cloud-native architecture that comprises three core components:

- **Metadata engine**: Stores file metadata, including standard file system metadata and file data indexes. The Community Edition supports various databases including Redis, TiKV, MySQL, PostgreSQL, and FoundationDB. The Enterprise Edition uses a self-developed distributed metadata service.
- **Data storage**: Generally an object storage service, which can be public cloud object storage or on-premises deployed object storage service. Supports integration with various storage backends.
- **JuiceFS client**: Provides different access methods such as POSIX (FUSE), Hadoop SDK, CSI Driver, and S3 Gateway.

![JuiceFS Community Edition architecture](../../images/juicefs-arch.svg)

### Architectural differences

#### Storage module

3FS employs local NVMe SSDs for data storage and utilizes the CRAQ (Chain Replication with Apportioned Queries) algorithm to ensure data consistency. Replicas are organized into a chain where write requests start from the head and propagate sequentially to the tail. A write operation is confirmed only after reaching the tail. For read requests, any replica in the chain can be queried.

![CRAQ consistency algorithm](https://static1.juicefs.com/images/CRAQ_YiZhiXingSuanFa.original.png)

While this design introduces higher write latency due to sequential propagation, it prioritizes read performance, which is crucial for read-intensive AI workloads.

In contrast, JuiceFS uses object storage as its data storage solution, inheriting key advantages such as data reliability and consistency. The storage module provides standard object operation interfaces (GET/PUT/HEAD/LIST), enabling seamless integration with various storage backends. JuiceFS Community Edition provides local cache for AI scenario bandwidth requirements, while the Enterprise Edition uses distributed cache for larger aggregate read bandwidth needs.

#### Metadata module

In 3FS, file attributes are stored as key-value pairs within a stateless, high-availability metadata service, backed by FoundationDB. FoundationDB ensures global ordering of keys and evenly distributes data across nodes via consistent hashing. To optimize directory listing efficiency, 3FS constructs dentry keys by combining a "DENT" prefix with the parent directory's inode number and file name.

JuiceFS Community Edition provides a metadata module that offers a set of interfaces for metadata operations, supporting integration with various metadata services including key-value databases (Redis, TiKV), relational databases (MySQL, PostgreSQL), and FoundationDB. The Enterprise Edition employs a proprietary high-performance metadata service that dynamically balances data and hot operations based on workload patterns.

#### Client

3FS provides both a FUSE client and a native client API to bypass FUSE for direct data operations. The native client eliminates data copying introduced by the FUSE layer, reducing I/O latency and memory bandwidth overhead through zero-copy communication using shared memory and semaphores.

![3FS native client API](https://static1.juicefs.com/images/3FS_NATIVE_Client_API.original.png)

3FS uses `hf3fs_iov` to store shared memory attributes and `IoRing` for inter-process communication. The system creates virtual files and uses semaphores to facilitate communication between the user process and FUSE process.

JuiceFS' FUSE client offers a more comprehensive implementation with features such as:

- Immediate file length updates after successful object upload
- BSD locks (flock) and POSIX locks (fcntl)
- Advanced interfaces like `file_copy_range`, `readdirplus`, and `fallocate`

Beyond the FUSE client, JuiceFS Community Edition also provides Java SDK, Python SDK, S3 Gateway, and CSI Driver for user-space execution, with the Enterprise Edition offering additional enterprise-grade features.

## File distribution comparison

### 3FS file distribution

3FS uses fixed-size chunks, allowing clients to calculate which chunks an I/O request targets based on the file inode and request offset/length, avoiding database queries for each I/O operation. The chunk index is obtained through `offset/chunk_size`, and the chain index through `chunk_id%stripe`.

To address data imbalance, the first chain of each file is selected in a round-robin manner. When a file is created, chains are randomly sorted and stored in metadata.

![3FS file distribution](https://static1.juicefs.com/images/3FS_WenJianFenBu.original.png)

### JuiceFS file distribution

JuiceFS manages data blocks according to chunk, slice, and block rules. Each chunk is fixed at 64MB for optimizing data search and positioning. Actual file write operations are performed on slices, which represent continuous write processes within chunks. Blocks (default 4MB) are the basic unit of physical storage in object storage and disk cache.

![JuiceFS file distribution](../../images/file-and-chunks.svg)

Slice is a unique structure in JuiceFS that records file write operations and persists them in object storage. Since object storage doesn't support in-place file modification, JuiceFS uses slices to update file content without rewriting entire files. All slices are written once, reducing reliance on underlying object storage consistency and simplifying cache system complexity.

## 3FS RPC framework

3FS implements a custom RPC framework using RDMA as the underlying network communication protocol, which JuiceFS currently doesn't support. The framework provides capabilities such as serialization and packet merging, using templates to implement reflection for data structure serialization.

![3FS FUSE client RPC process](https://static1.juicefs.com/images/3FS_FUSE_Client_DiaoYong_MetadataFuWuDe_RPC_Guo.original.png)

The 3FS cache system consists of TLS (Thread-Local Storage) queues and global queues. Memory allocation from TLS queues requires no locks, while global queue access requires locking. Multiple RPC requests may be merged into one InfiniBand request for efficiency.

## Feature comparison

| Features | 3FS | JuiceFS Community | JuiceFS Enterprise |
|----------|-----|-------------------|-------------------|
| Metadata | Stateless metadata service + FoundationDB | External database | Self-developed high-performance distributed metadata engine (horizontally scalable) |
| Data storage | Self-managed | Object storage | Object storage |
| Redundancy | Multi-replica | Provided by object storage | Provided by object storage |
| Data caching | None | Local cache | Self-developed high-performance multi-copy distributed cache |
| Encryption | Not supported | Supported | Supported |
| Compression | Not supported | Supported | Supported |
| Quota management | Not supported | Supported | Supported |
| Network protocol | RDMA | TCP | TCP |
| Snapshots | Not supported | Supports cloning | Supports cloning |
| POSIX ACL | Not supported | Supported | Supported |
| POSIX compliance | Partial | Fully compatible | Fully compatible |
| CSI Driver | No official support | Supported | Supported |
| Clients | FUSE + native client | POSIX (FUSE), Java SDK, Python SDK, S3 Gateway | POSIX (FUSE), Java SDK, S3 Gateway, Python SDK |
| Multi-cloud mirroring | Not supported | Not supported | Supported |
| Cross-cloud/region replication | Not supported | Not supported | Supported |
| Main maintainer | DeepSeek | Juicedata | Juicedata |
| Development language | C++, Rust (local storage engine) | Go | Go |
| License | MIT | Apache License 2.0 | Commercial |

## Summary

For large-scale AI training, 3FS adopts a performance-first design approach:

- **Local storage**: Uses local NVMe SSDs, requiring users to manage underlying storage infrastructure
- **Zero-copy optimization**: Achieves zero-copy from client to NIC, reducing I/O latency and memory bandwidth usage via shared memory and semaphores
- **RDMA networking**: Leverages RDMA for better networking performance
- **Optimized I/O**: Enhances small I/O and metadata operations with TLS-backed I/O buffer pools and merged network requests

While this approach can deliver performance improvements, it comes with higher costs and greater maintenance complexity.

JuiceFS uses object storage as its backend, significantly reducing costs and simplifying maintenance. To meet AI workloads' performance demands:

- **Enterprise Edition features**: Distributed caching, distributed metadata service, and Python SDK
- **Upcoming optimizations**: v5.2 adds zero-copy over TCP for faster data transfers
- **Cloud-native advantages**: Full POSIX compatibility, mature open-source ecosystem, and Kubernetes CSI support
- **Enterprise capabilities**: Quotas, security management, and disaster recovery features


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_alluxio.md
================================================
---
slug: /comparison/juicefs_vs_alluxio
description: This article compares the main features of Alluxio and JuiceFS.
---

# JuiceFS vs. Alluxio

Alluxio (/əˈlʌksio/) is a data access layer in the big data and machine learning ecosystem. Initially as the research project "Tachyon," it was created at the University of California, Berkeley's [AMPLab](https://en.wikipedia.org/wiki/AMPLab) as creator's Ph.D. thesis in 2013. Alluxio was open sourced in 2014.

The following table compares the main features of Alluxio and JuiceFS.

| Features                  | Alluxio            | JuiceFS            |
| --------                  | -------            | -------            |
| Storage format            | Object             | Block              |
| Cache granularity         | 64 MiB             | 4 MiB               |
| Multi-tier cache          | ✓                  | ✓                  |
| Hadoop-compatible         | ✓                  | ✓                  |
| S3-compatible             | ✓                  | ✓                  |
| Kubernetes CSI Driver     | ✓                  | ✓                  |
| Hadoop data locality      | ✓                  | ✓                  |
| Fully POSIX-compatible    | ✕                  | ✓                  |
| Atomic metadata operation | ✕                  | ✓                  |
| Consistency               | ✕                  | ✓                  |
| Data compression          | ✕                  | ✓                  |
| Data encryption           | ✕                  | ✓                  |
| Zero-effort operation     | ✕                  | ✓                  |
| Language                  | Java               | Go                 |
| Open source license       | Apache License 2.0 | Apache License 2.0 |
| Open source date          | 2014               | 2021.1             |

## Storage format

JuiceFS has its own storage format, where files are divided into blocks, and they can be optionally encrypted and compressed before being uploaded to the object storage. For more details, see [How JuiceFS stores files](../architecture.md#how-juicefs-store-files).

In contrast, Alluxio stores files as _objects_ into UFS and does not split them into blocks like JuiceFS does.

## Cache granularity

JuiceFS has a smaller [default block size](../architecture.md#how-juicefs-store-files) of 4 MiB, which results in a finer granularity compared to Alluxio's 64 MiB. The smaller block size of JuiceFS is more beneficial for workloads involving random reads (e.g., Parquet and ORC), as it improves cache management efficiency.

## Hadoop-compatible

JuiceFS is [HDFS-compatible](../../deployment/hadoop_java_sdk.md), supporting not only Hadoop 2.x and Hadoop 3.x, but also various components in the Hadoop ecosystem.

## Kubernetes CSI Driver

JuiceFS provides [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver) for easy integration with Kubernetes environments. While Alluxio also offers [Kubernetes CSI Driver](https://github.com/Alluxio/alluxio-csi), it seems to have limited activity and lacks official support from Alluxio.

## Fully POSIX-compatible

JuiceFS is [fully POSIX-compatible](../../reference/posix_compatibility.md). A pjdfstest from [JD.com](https://www.slideshare.net/Alluxio/using-alluxio-posix-fuse-api-in-jdcom) shows that Alluxio did not pass the POSIX compatibility test. For example, Alluxio does not support symbolic links, truncate, fallocate, append, xattr, mkfifo, mknod and utimes. Besides the things covered by pjdfstest, JuiceFS also provides close-to-open consistency, atomic metadata operations, mmap, fallocate with punch hole, xattr, BSD locks (flock), and POSIX record locks (fcntl).

## Atomic metadata operation

In Alluxio, a metadata operation involves two steps: modifying the state of the Alluxio master and sending a request to the UFS. This process is not atomic, and the state is unpredictable during execution or in case of failures. Additionally, Alluxio relies on UFS to implement metadata operations. For example, rename file operations will become copy and delete operations.

Thanks to [Redis transactions](https://redis.io/topics/transactions), **most metadata operations in JuiceFS are atomic**, for example, file renaming, file deletion, and directory renaming. You do not have to worry about the consistency and performance.

## Consistency

Alluxio loads metadata from the UFS as needed. It lacks information about UFS at startup. By default, Alluxio expects all modifications on UFS to be completed through Alluxio. If changes are made directly on UFS, you need to sync metadata between Alluxio and UFS either manually or periodically. As we have mentioned in [Atomic metadata operation](#atomic-metadata-operation) section, the two-step metadata operation may result in inconsistency.

JuiceFS provides strong consistency for both metadata and data. **The metadata service of JuiceFS is the single source of truth, not a mirror of UFS.** The metadata service does not rely on object storage to obtain metadata, and object storage is just treated as unlimited block storage. This ensures there are no inconsistencies between JuiceFS and object storage.

## Data compression

JuiceFS supports data compression using [LZ4](https://lz4.github.io/lz4) or [Zstandard](https://facebook.github.io/zstd) for all your data, while Alluxio does not offer this feature.

## Data encryption

JuiceFS supports data encryption both in transit and at rest. Alluxio community edition lacks this feature, while it is available in the [enterprise edition](https://docs.alluxio.io/ee/user/stable/en/operation/Security.html#end-to-end-data-encryption).

## Zero-effort operations

Alluxio's architecture can be divided into three components: master, worker and client. A typical cluster consists of a single leading master, standby masters, a job master, standby job masters, workers, and job workers. You need to maintain all these masters and workers by yourself.

JuiceFS uses Redis or [other databases](../../reference/how_to_set_up_metadata_engine.md) as the metadata engine. You could easily use the service managed by a public cloud provider as JuiceFS' metadata engine, without any operational overhead.


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_cephfs.md
================================================
---
slug: /comparison/juicefs_vs_cephfs
description: Ceph is a unified system that provides object storage, block storage and file storage. This article compares the similarities and differences between JuiceFS and Ceph.
---

# JuiceFS vs. CephFS

This document offers a comprehensive comparison between JuiceFS and CephFS. You will learn their similarities and differences in their system architectures and features.

## Similarities

Both are highly reliable, high-performance, resilient distributed file systems with good POSIX compatibility, suitable for various scenarios.

## Differences

### System architecture

Both JuiceFS and CephFS employ an architecture that separates data and metadata, but they differ greatly in implementations.

#### CephFS

CephFS is a complete and independent system used mainly for private cloud deployments. Through CephFS, all file metadata and data are persistently stored in Ceph's distributed object store (RADOS).

- Metadata
  - Metadata Server (MDS): Stateless and theoretically horizontally scalable. There are mature primary-secondary mechanisms, while concerns about performance and stability still exist in multi-primary deployments. Production environments typically adopt one-primary-multiple-secondary or multi-primary static isolation.
  - Persistent: Independent RADOS storage pools, usually used with SSDs or higher-performance hardware storage.
- Data: Stored in one or more RADOS storage pools, supporting different configurations through _Layout_, such as chunk size (default 4 MiB) and redundancy (multi-copy, EC).
- Client: Supports kernel client (`kcephfs`), user-state client (`ceph-fuse`) and libcephfs-based SDKs for C++, Python, etc.; recently the community has also provided a Windows client (`ceph-dokan`). VFS object for Samba and an FSAL module for NFS-Ganesha are also available in the ecosystem.

#### JuiceFS

JuiceFS provides a libjfs library, a FUSE client application, Java SDK, etc. It supports various metadata engines and object storages, and can be deployed in public, private, or hybrid cloud environments.

- Metadata: Supports [various databases](../../reference/how_to_set_up_metadata_engine.md), including:
  - Redis and various variants of the Redis-compatible protocol (transaction supports are required)
  - SQL family: MySQL, PostgreSQL, SQLite, etc.
  - Distributed K/V storage: TiKV, FoundationDB, etcd
  - A self-developed engine: a JuiceFS fully managed service used on the public cloud.
- Data: Supports over 30 types of [object storage](../../reference/how_to_set_up_object_storage.md) on the public cloud and can also be used with MinIO, Ceph RADOS, Ceph RGW, etc.
- Clients: Supports Unix user-state mounting, Windows mounting, Java SDK with full HDFS semantic compatibility, [Python SDK](https://github.com/megvii-research/juicefs-python), and a built-in S3 gateway.

### Features

| Comparison basis                | CephFS                | JuiceFS               |
| ------------------------------- | --------------------- | --------------------- |
| File chunking<sup> [1]</sup>    | ✓                     | ✓                     |
| Metadata transactions           | ✓                     | ✓                     |
| Strong consistency              | ✓                     | ✓                     |
| Kubernetes CSI Driver           | ✓                     | ✓                     |
| Hadoop-compatible               | ✓                     | ✓                     |
| Data compression<sup> [2]</sup> | ✓                     | ✓                     |
| Data encryption<sup> [3]</sup>  | ✓                     | ✓                     |
| Snapshot                        | ✓                     | ✕                     |
| Client data caching             | ✕                     | ✓                     |
| Hadoop data locality            | ✕                     | ✓                     |
| S3-compatible                   | ✕                     | ✓                     |
| Quota                           | Directory level quota | Directory level quota |
| Languages                       | C++                   | Go                    |
| License                         | LGPLv2.1 & LGPLv3     | Apache License 2.0    |

#### [1] File chunking

CephFS splits files by [`object_size`](https://docs.ceph.com/en/latest/cephfs/file-layouts/#reading-layouts-with-getfattr) (default 4MiB). Each chunk corresponds to a RADOS object.  In contrast, JuiceFS splits files into 64MiB chunks and it further divides each chunk into logical slices during writing according to the actual situation. These slices are then split into logical blocks when writing to the object store, with each block corresponding to an object in the object storage. When handling overwrites, CephFS modifies corresponding objects directly, which is a complicated process. Especially, when the redundancy policy is EC or the data compression is enabled, part of the object content needs to be read first, modified in memory, and then written. This leads to great performance overhead. In comparison, JuiceFS handles overwrites by writing the updated data as new objects and modifying the metadata at the same time, which greatly improves the performance. Any redundant data generated during the process will go to garbage collection asynchronously.

#### [2] Data compression

Strictly speaking, CephFS itself does not provide data compression but relies on the BlueStore compression on the RADOS layer. JuiceFS, on the other hand, has already compressed data once before uploading a block to the object storage to reduce the capacity cost in the object storage. In other words, if you use JuiceFS to interact with RADOS, you compress a block both before and after it enters RADOS, twice in total. Also, as mentioned in **File chunking**, to guarantee overwrite performance, CephFS usually does not enable the BlueStore compression.

#### [3] Data encryption

On network transport layer, Ceph encrypts data by using **Messenger v2**, while on data storage layer, the data encryption is done at OSD creation, which is similar to data compression.

JuiceFS encrypts objects before uploading and decrypts them after downloading. This is completely transparent to the object storage.


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_glusterfs.md
================================================
---
title: JuiceFS vs. GlusterFS
slug: /comparison/juicefs_vs_glusterfs
description: This document compares the design and features of GlusterFS and JuiceFS, helping you make an informed decision for selecting a storage solution.
---

[GlusterFS](https://github.com/gluster/glusterfs) is an open-source software-defined distributed storage solution. It can support data storage of PiB levels within a single cluster.

JuiceFS is an open-source, high-performance distributed file system designed for the cloud. It delivers massive, elastic, and high-performance storage at low cost.

This document compares the key attributes of JuiceFS and GlusterFS in a table and then explores them in detail, offering insights to aid your team in the technology selection process. You can easily see their main differences in the table below and delve into specific topics you're interested in within this article.

## A quick summary of GlusterFS vs. JuiceFS {#a-quick-summary-of-glusterfs-vs-juicefs}

The table below provides a quick overview of the differences between GlusterFS and JuiceFS:

| **Comparison basis** | **GlusterFS** | **JuiceFS** |
| :--- | :--- | :--- |
| Metadata | Purely distributed | Independent database |
| Data storage | Self-managed | Relies on object storage |
| Large file handling | Doesn't split files | Splits large files |
| Redundancy protection | Replication, erasure coding | Relies on object storage |
| Data compression | Partial support | Supported |
| Data encryption | Partial support | Supported |
| POSIX compatibility | Full | Full |
| NFS protocol | Not directly supported | Not directly supported |
| CIFS protocol | Not directly supported | Not directly supported |
| S3 protocol | Supported (but not updated) | Supported |
| HDFS compatibility | Supported (but not updated) | Supported |
| CSI Driver | Supported | Supported |
| POSIX ACLs | Supported | Supported |
| Cross-cluster replication | Supported | Relies on external service |
| Directory quotas | Supported | Supported |
| Snapshots | Supported | Not supported (but supports cloning) |
| Trash | Supported | Supported |
| Primary maintainer | Red Hat, Inc | Juicedata, Inc |
| Development language | C | Go |
| Open source license | GPLv2 and LGPLv3+ | Apache License 2.0 |

## System architecture comparison {#system-architecture-comparison}

### GlusterFS' architecture {#glusterfs-architectire}

GlusterFS employs a fully distributed architecture without centralized nodes. A GlusterFS cluster consists of the server and the client. The server side manages and stores data, often referred to as the Trusted Storage Pool. This pool comprises a set of server nodes, each running two types of processes:

* glusterd: One per node, which manages and distributes configuration.
* glusterfsd: One per [brick](https://docs.gluster.org/en/latest/glossary/#Brick) (storage unit), which handles data requests and interfaces with the underlying file system.

All files on each brick can be considered a subset of GlusterFS. File content accessed directly through the brick or via GlusterFS clients is typically consistent. If GlusterFS experiences an exception, users can partially recover original data by integrating content from multiple bricks. Additionally, for fault tolerance during deployment, data is often redundantly protected. In GlusterFS, multiple bricks form a redundancy group, protecting data through replication or erasure coding. When a node experiences a failure, recovery can only be performed within the redundancy group, which may result in longer recovery times. When scaling a GlusterFS cluster, the scaling is typically performed on a redundancy group basis.

The client side, which mounts GlusterFS, presents a unified namespace to applications. The architecture diagram is as follows (source: [GlusterFS Architecture](https://docs.gluster.org/en/latest/Quick-Start-Guide/Architecture)):

![GlusterFS architecture](../../images/glusterfs-architecture.jpg)

### JuiceFS' architecture {#juicefs-architecture}

JuiceFS adopts an architecture that separates its data and metadata storage. File data is split and stored in object storage systems like Amazon S3, while metadata is stored in a user-selected database like Redis or MySQL. By sharing the same database and object storage, JuiceFS achieves a strongly consistent distributed file system with features like full POSIX compatibility and high performance. For a more detailed introduction, see [the documentation](../architecture.md).

![JuiceFS architecture](../../images/juicefs-arch-new.png)

## Metadata management comparison {#metadata-management-comparison}

### GlusterFS {#glusterfs}

Metadata in GlusterFS is purely distributed, lacking a centralized metadata service. Clients use file name hashing to determine the associated brick. When requests require access across multiple bricks, for example, `mv` and `ls`, the client is responsible for coordination. While this design is simple, it can lead to performance bottlenecks as the system scales. For instance, listing a large directory might require accessing multiple bricks, and any latency in one brick can slow down the entire request. Additionally, ensuring metadata consistency when performing cross-brick modifications in the event of failures can be challenging, and severe failures may lead to split-brain scenarios, requiring [manual data recovery](https://docs.gluster.org/en/latest/Troubleshooting/resolving-splitbrain) to achieve a consistent version.

### JuiceFS {#juicefs}

JuiceFS metadata is stored in an independent database, which is called the metadata engine. Clients transform file metadata operations into transactions within this database, leveraging its transactional capabilities to ensure operation atomicity. This design simplifies JuiceFS implementation but places higher demands on the metadata engine. JuiceFS currently supports three categories of transactional databases. For details, see the [metadata engine document](../../reference/how_to_set_up_metadata_engine.md).

## Data management comparison {#data-management-comparison}

GlusterFS stores data by integrating multiple server nodes' bricks (typically built on local file systems like XFS). Therefore, it provides certain data management features, including distribution management, redundancy protection, fault switching, and silent error detection.

JuiceFS, on the other hand, does not use physical disks directly but manages data through integration with various object storage systems. Most of its features rely on the capabilities of its object storage.

### Large file splitting {#large-file-splitting}

In distributed systems, splitting large files into smaller chunks and storing them on different nodes is a common optimization technique. This often leads to higher concurrency and bandwidth when applications access such files.

* GlusterFS does not split large files (although it used to support Striped Volumes for large files, this feature is no longer supported).
* JuiceFS splits files into 64 MiB chunks by default, and each chunk is further divided into 4 MiB blocks based on the write pattern. For details, see [How JuiceFS stores files](../architecture.md#how-juicefs-store-files).

### Redundancy protection {#redundancy-protection}

GlusterFS supports both replication (Replicated Volume) and erasure coding (Dispersed Volume).

JuiceFS relies on the redundancy capabilities of the underlying object storage it uses.

### Data compression {#data-compression}

GlusterFS:

* Supports only transport-layer compression. Files are compressed by clients, transmitted to the server, and decompressed by the bricks.
* Does not implement storage-layer compression but depends on the underlying file system used by the bricks, such as [ZFS](https://docs.gluster.org/en/latest/Administrator-Guide/Gluster-On-ZFS).

JuiceFS supports both transport-layer and storage-layer compression. Data compression and decompression are performed on the client side.

### Data encryption {#data-encryption}

GlusterFS:

* Supports only [transport-layer encryption](https://docs.gluster.org/en/latest/Administrator-Guide/SSL), relying on SSL/TLS.
* Previously supported [storage-layer encryption](https://github.com/gluster/glusterfs-specs/blob/master/done/GlusterFS%203.5/Disk%20Encryption.md), but it is no longer supported.

JuiceFS supports both [transport-layer and storage-layer encryption](../../security/encryption.md). Data encryption and decryption are performed on the client side.

## Access protocols {#access-protocols}

### POSIX compatibility {#posix-compatibility}

Both [GlusterFS](https://docs.gluster.org/en/latest/glossary) and [JuiceFS](../../reference/posix_compatibility.md) offer POSIX compatibility.

### NFS protocol {#nfs-protocol}

GlusterFS previously had embedded support for NFSv3 but now it is [no longer recommended](https://github.com/gluster/glusterfs-specs/blob/master/done/GlusterFS%203.8/gluster-nfs-off.md). Instead, it is suggested to export the mount point using an NFS server.

JuiceFS does not provide direct support for NFS and requires mounting followed by [export via another NFS server](../../deployment/nfs.md).

### CIFS protocol {#cifs-protocol}

GlusterFS embeds support for Windows, Linux Samba clients, and macOS CLI access (excluding macOS Finder). However, it is recommended to [use Samba for exporting mount points](https://docs.gluster.org/en/latest/Administrator-Guide/Setting-Up-Clients/#testing-mounted-volumes).

JuiceFS does not offer direct support for CIFS and requires mounting followed by [export via Samba](../../deployment/samba.md).

### S3 protocol {#s3-protocol}

GlusterFS supports S3 through the [`gluster-swift`](https://github.com/gluster/gluster-swift) project, but the project hasn't seen recent updates since November 2017.

JuiceFS supports S3 through the [S3 gateway](../../guide/gateway.md).

### HDFS compatibility {#hdfs-compatibility}

GlusterFS offers HDFS compatibility through the [`glusterfs-hadoop`](https://github.com/gluster/glusterfs-hadoop) project, but the project hasn't seen recent updates since May 2015.

JuiceFS provides [full compatibility with the HDFS API](../../deployment/hadoop_java_sdk.md).

### CSI Driver {#csi-driver}

GlusterFS [previously supported CSI Driver](https://github.com/gluster/gluster-csi-driver) but the latest version was released in November 2018, and the repository is marked as DEPRECATED.

JuiceFS supports CSI Driver. For details, see the [document](https://juicefs.com/docs/csi/introduction).

## Extended features {#extended-features}

### POSIX ACLs {#posix-acls}

In Linux, file access permissions are typically controlled by three entities: the file owner, the group owner, and others. However, when more complex requirements arise, such as the need to assign specific permissions to a particular user within the others category, this standard mechanism does not work. POSIX Access Control Lists (ACLs) offer enhanced permission management capabilities, allowing you to assign permissions to any user or user group as needed.

GlusterFS [supports ACLs](https://docs.gluster.org/en/main/Administrator-Guide/Access-Control-Lists), including access ACLs and default ACLs.

JuiceFS supports the [POSIX ACLs](../../security/posix_acl.md) feature starting from v1.2.

### Cross-cluster replication {#cross-cluster-replication}

Cross-cluster replication indicates replicating data between two independent clusters, often used for geographically distributed disaster recovery.

GlusterFS [supports one-way asynchronous incremental replication](https://docs.gluster.org/en/main/Administrator-Guide/Geo-Replication) but requires both sides to use the same version of Gluster cluster.

JuiceFS depends on the capabilities of the metadata engine and the object storage, allowing one-way replication.

### Directory quotas {#directory-quotas}

Both [GlusterFS](https://docs.gluster.org/en/main/Administrator-Guide/Directory-Quota) and [JuiceFS](../../guide/quota.md#directory-quota) support directory quotas, including capacity and/or file count limits.

### Snapshots {#snapshots}

GlusterFS supports [volume-level snapshots](https://docs.gluster.org/en/main/Administrator-Guide/Managing-Snapshots) and requires all bricks to be deployed on LVM thinly provisioned volumes.

JuiceFS does not support snapshots but offers [directory-level cloning](../../guide/clone.md).

### Trash {#trash}

GlusterFS [supports the trash functionality](https://docs.gluster.org/en/main/Administrator-Guide/Trash), which is disabled by default.

JuiceFS [supports the trash functionality](../../security/trash.md), which is enabled by default.


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_lustre.md
================================================
---
slug: /comparison/juicefs_vs_lustre
description: This article compares the architecture, file distribution, and features of Lustre and JuiceFS.
---

# JuiceFS vs. Lustre

Lustre is a parallel distributed file system designed for HPC environments. Initially developed under U.S. government funding by national laboratories to support large-scale scientific and engineering computations, it's now maintained primarily by DataDirect Networks (DDN). Lustre is widely adopted in supercomputing centers, research institutions, and enterprise HPC clusters.

JuiceFS is a cloud-native distributed file system that uses object storage to store data. The Community Edition supports integration with multiple metadata services and caters to diverse use cases. Its Enterprise Edition is specifically optimized for high-performance scenarios, with extensive applications in large-scale AI workloads including generative AI, autonomous driving, quantitative finance, and biotechnology.

This document provides a comprehensive comparison between Lustre and JuiceFS in terms of architecture, file distribution, and features.

## Architecture comparison

### Lustre

Lustre employs a traditional client-server architecture with the following core components:

- **Metadata Servers (MDS)**: Handle namespace operations, such as file creation, deletion, and permission checks. Starting with version 2.4, Lustre introduced Distributed Namespace (DNE) to enable horizontal scaling by distributing different directories across multiple MDS within a single file system.
- **Object Storage Servers (OSS)**: Manage actual data reads and writes, delivering high-performance large-scale read and write operations.
- **Management Server (MGS)**: Acts as a global configuration registry, storing and distributing Lustre file system configuration information while remaining functionally independent of any specific Lustre instance.
- **Clients**: Provides applications with access to the Lustre file system through a standard POSIX file operations interface.

All components are interconnected via LNet, Lustre's dedicated networking protocol, forming a unified and high-performance file system architecture.

![Lustre architecture](https://static1.juicefs.com/images/Lustre_JiaGouTu_SWMlRaK.original.png)

### JuiceFS

JuiceFS uses a modular architecture that comprises three core components:

- **Metadata engine**: Stores file system metadata, including standard file system metadata and file data indexes. The Community Edition supports various databases including Redis, TiKV, MySQL, PostgreSQL, and FoundationDB. The Enterprise Edition uses a self-developed high-performance metadata service.
- **Data storage**: Primarily utilizes object storage services, which can be a public cloud object storage or an on-premises deployed object storage service. Supports over 30 types of object storage including AWS S3, Azure Blob, Google Cloud Storage, MinIO, and Ceph RADOS.
- **Clients**: Provides multiple access protocols, such as POSIX (FUSE), Hadoop SDK, CSI Driver, S3 Gateway, and Python SDK.

![JuiceFS Community Edition architecture](../../images/juicefs-arch.svg)

### Architectural differences

#### Client implementation

Lustre employs a C-language, kernel-space client architecture, while JuiceFS adopts a Go-based, user-space approach through file system in Userspace (FUSE). Because the Lustre client runs in kernel space, there is no need to perform context switching between user mode and kernel mode or additional memory copying when accessing the MDS or OSS. This significantly reduces the performance overhead caused by system calls and has certain advantages in throughput and latency.

However, kernel-mode implementation also brings complexity to operation, maintenance, and debugging. Compared with user-mode development environments and debugging tools, kernel-mode tools have a higher threshold and are not easy for ordinary developers to master. JuiceFS's Go-based user-space implementation is easier to learn, maintain, and develop, with higher development efficiency and maintainability.

#### Storage module

Lustre requires one or more shared disks to store file data. This design stems from the fact that its early versions did not support file level redundancy (FLR). To achieve high availability (HA), when a node goes offline, its file system must be mounted to a peer node, otherwise the data chunks on the node will be inaccessible. Therefore, the reliability of the data depends on the high availability mechanism of the shared storage itself or the software RAID implementation configured by the user.

JuiceFS uses object storage as a data storage solution, thus enjoying several advantages brought by object storage, such as data reliability and consistency. Users can connect to specific storage systems according to their needs, including both object storage of mainstream cloud vendors and on-premises deployed object storage systems such as MinIO and Ceph RADOS. JuiceFS Community Edition provides local cache to cope with bandwidth requirements in AI scenarios, and the Enterprise Edition uses distributed cache to meet the needs of larger aggregate read bandwidth.

#### Metadata module

Lustre's MDS high availability relies on the coordinated implementation of software and hardware:

- **Hardware level**: The disks used by MDS need to be configured with RAID to avoid service unavailability due to single-point disk failure; the disks also need to have sharing capabilities so that when the primary node fails, the backup node can take over the disk resources.
- **Software level**: Use Pacemaker and Corosync to build a high-availability cluster to ensure that only one MDS instance is active at any time.

JuiceFS Community Edition provides a set of metadata operation interfaces that can access different metadata services, including databases like Redis, TiKV, MySQL, PostgreSQL, and FoundationDB. JuiceFS Enterprise Edition uses self-developed high-performance metadata services, which can balance data and hotspot operations according to load conditions to avoid the problem of metadata service hotspots being concentrated on certain nodes in large-scale training.

## File distribution comparison

### Lustre file distribution

#### Normal file layout (NFL)

Lustre's initial file distribution mechanism segments files into multiple chunks distributed across object storage targets (OSTs) in a RAID 0-like striping pattern.

Key distribution parameters:

- **stripe count**: Determines the number of OSTs across which a file is striped. Higher values improve parallel access but increase scheduling and management overhead.
- **stripe size**: Defines the chunk size written to each OST before switching to the next OST. This determines the granularity of each chunk.

![Lustre NFL file distribution](https://static1.juicefs.com/images/Lustre_NFL_WenJianFenBuShiLi.original.png)

The figure above shows how a file with `stripe count = 3` and `stripe size = 1 MB` is distributed across multiple OSTs. Each data block (stripe) is allocated to different OSTs sequentially via round-robin scheduling.

Key limitations include:

- Configuration parameters are immutable after file creation
- Can lead to ENOSPC (no space left) if any target OST runs out of space
- May result in storage imbalance over time

#### Progressive file layout (PFL)

To address the constraints of NFL, Lustre introduced progressive file layout (PFL), which allows defining different layout policies for different segments of the same file.

![Lustre PFL file distribution](https://static1.juicefs.com/images/Lustre_PFL_WenJianFenBuShiLi.original.png)

PFL provides advantages such as:

- Dynamic adaptation to file growth
- Mitigation of storage imbalance
- Improved space efficiency and flexibility

While PFL provides more adaptive layout strategies, Lustre integrates lazy initialization technology for more efficient dynamic resource scheduling to further address storage imbalance issues.

#### File level redundancy (FLR)

Lustre introduced file level redundancy to simplify HA architecture and enhance fault tolerance. FLR allows configuring one or more replicas for each file to achieve file-level redundancy protection. During write operations, data is initially written to only one replica, while the others are marked as STALE. The system ensures data consistency through a synchronization process called Resync.

### JuiceFS file distribution

JuiceFS manages data blocks according to the rules of chunk, slice, and block. The size of each chunk is fixed at 64 MB, which optimizes data search and positioning. The actual file write operation is performed on slices. Each slice represents a continuous write process within a chunk, with length not exceeding 64 MB. A block (4 MB by default) is the basic unit of physical storage that implements the final storage of data in object storage and disk cache.

![JuiceFS file distribution](../../images/file-and-chunks.svg)

Slice in JuiceFS is a structure that is not common in other file systems. It records file write operations and persists them in object storage. Since object storage does not support in-place file modification, JuiceFS allows file content to be updated without rewriting the entire file by introducing the slice structure. When a file is modified, the system creates a new slice and updates the metadata after the slice is uploaded, pointing the file content to the new slice.

All slices of JuiceFS are written once, which reduces the reliance on the consistency of the underlying object storage and greatly simplifies the complexity of the cache system, making data consistency easier to ensure.

## Feature comparison

| Features | Lustre | JuiceFS Community | JuiceFS Enterprise |
|----------|--------|-------------------|-------------------|
| Metadata | Distributed metadata service | Independent database service | Proprietary high-performance distributed metadata engine (horizontally scalable) |
| Metadata redundancy | Requires storage device support | Depends on the database used | Triple replication |
| Data storage | Self-managed | Uses object storage | Uses object storage |
| Data redundancy | Storage device or async replication | Provided by object storage | Provided by object storage |
| Data caching | Client local cache | Client local cache | Proprietary high-performance multi-replica distributed cache |
| Data encryption | Supported | Supported | Supported |
| Data compression | Supported | Supported | Supported |
| Quota management | Supported | Supported | Supported |
| Network protocol | Multiple protocols supported | TCP | TCP |
| Snapshots | File system-level snapshots | File-level snapshots | File-level snapshots |
| POSIX ACL | Supported | Supported | Supported |
| POSIX compliance | Compatible | Fully compatible | Fully compatible |
| CSI Driver | Unofficially supported | Supported | Supported |
| Client access | POSIX | POSIX (FUSE), Java SDK, S3 Gateway, Python SDK | POSIX (FUSE), Java SDK, S3 Gateway, Python SDK |
| Multi-cloud mirroring | Not supported | Not supported | Supported |
| Cross-cloud/region replication | Not supported | Not supported | Supported |
| Primary maintainer | DDN | Juicedata | Juicedata |
| Development language | C | Go | Go |
| License | GPL 2.0 | Apache License 2.0 | Commercial software |

## Summary

Lustre is a high-performance parallel distributed file system where clients run in kernel space, interacting directly with the MDS and OSS. This architecture eliminates context switching between user and kernel space, enabling exceptional performance in high-bandwidth I/O scenarios when combined with high-performance storage devices.

However, running clients in kernel space increases operational complexity, requiring administrators to possess deep expertise in kernel debugging and underlying system troubleshooting. Additionally, Lustre's fixed-capacity storage approach and complex file distribution design demand meticulous planning and configuration for optimal resource utilization.

JuiceFS is a cloud-native, user-space distributed file system that tightly integrates with object storage and natively supports Kubernetes CSI, simplifying deployment and management in cloud environments. Users can achieve elastic scaling and highly available data services in containerized environments without needing to manage underlying storage hardware or complex scheduling mechanisms. For performance optimization, JuiceFS Enterprise Edition employs distributed caching to significantly reduce object storage access latency and improve file operation responsiveness.

From a cost perspective, Lustre requires high-performance dedicated storage hardware, resulting in substantial upfront investment and long-term maintenance expenses. In contrast, object storage offers greater cost efficiency, inherent scalability, and pay-as-you-go flexibility.

Both systems have their strengths: Lustre excels in traditional HPC environments requiring maximum performance, while JuiceFS provides better flexibility, easier management, and cost efficiency for cloud-native and AI workloads.


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_s3fs.md
================================================
---
slug: /comparison/juicefs_vs_s3fs
description: This document compares S3FS and JuiceFS, examining their product positioning, architecture, caching, and features.
---

# JuiceFS vs. S3FS

[S3FS](https://github.com/s3fs-fuse/s3fs-fuse) is an open source tool developed in C++ that mounts S3 object storage locally via FUSE for read and write access as a local disk. In addition to Amazon S3, it supports all S3 API-compatible object stores.

While both S3FS and JuiceFS share the basic functionality of mounting object storage buckets locally via FUSE and using them through POSIX interfaces, they differ significantly in functional details and technical implementation.

## Product positioning

S3FS is a utility that allows users to mount object storage buckets locally and read and write in a way that the users used to. It targets general use scenarios that are not sensitive to performance and network latency.

JuiceFS is a distributed file system with a unique approach to data management and a series of technical optimizations for high performance, reliability, and security. It primarily addresses the storage needs of large volumes of data.

## Architecture

S3FS does not do special optimization for files. It acts as an access channel between local and object storage, allowing the same content to be seen on the local mount point and the object storage browser. This makes it easy to use cloud storage locally. On the other hand, with this simple architecture, retrieving, reading, and writing files with S3FS require direct interaction with the object store, and network latency can impact strongly on performance and user experience.

JuiceFS uses a architecture that separates data and metadata. Files are split into data blocks according to specific rules before being uploaded to object storage, and the corresponding metadata is stored in a separate database. The advantage of this is that retrieval of files and modification of metadata such as file names can directly interact with the database with a faster response, bypassing the network latency impact of interacting with the object store.

In addition, when processing large files, although S3FS can solve the problem of transferring large files by uploading them in chunks, the nature of object storage dictates that appending files requires rewriting the entire object. For large files of tens or hundreds of gigabytes or even terabytes, repeated uploads waste a lot of time and bandwidth resources.

JuiceFS avoids such problems by splitting individual files into chunks locally according to specific rules (default 4MiB) before uploading, regardless of their size. The rewriting and appending operations will eventually become new data blocks instead of modifying already generated data blocks. This greatly reduces the waste of time and bandwidth resources.

For a detailed description of the JuiceFS architecture, refer to the [documentation](../../introduction/architecture.md).

## Caching

S3FS supports disk caching, but it is disabled by default. Local caching can be enabled by specifying a cache path with `-o use_cache`. When caching is enabled, any file reads or writes will be written to the cache before the operation is actually performed. S3FS detects data changes via MD5 to ensure data correctness and reduce duplicate file downloads. Since all operations involved with S3FS require interactions with S3, whether the cache is enabled or not impacts significantly on its application experience.

S3FS does not limit the cache capacity by default, which may cause the cache to fill up the disk when working with large buckets. You need to define the reserved disk space by `-o ensure_diskfree`. In addition, S3FS does not have a cache expiration and cleanup mechanism, so users need to manually clean up the cache periodically. Once the cache space is full, uncached file operations need to interact directly with the object storage, which will impact large file handling.

JuiceFS uses a completely different caching approach than S3FS. First, JuiceFS guarantees data consistency. Secondly, JuiceFS defines a default disk cache usage limit of 100GiB, which can be freely adjusted by users as needed, and by default ensures that no more space is used when disk free space falls below 10%. When the cache usage limit reaches the upper limit, JuiceFS will automatically do cleanup using an LRU-like algorithm to ensure that cache is always available for subsequent read and write operations.

For more information on JuiceFS caching, see the [documentation](../../guide/cache.md).

## Features

| Comparison basis          | S3FS                                                           | JuiceFS                                      |
|---------------------------|----------------------------------------------------------------|----------------------------------------------|
| Data Storage              | S3                                                             | S3, other object storage, WebDAV, local disk |
| Metadata Storage          | No                                                             | Database                                     |
| Operating System          | Linux, macOS                                                   | Linux, macOS, Windows                        |
| Access Interface          | POSIX                                                          | POSIX, HDFS API, S3 Gateway and CSI Driver   |
| POSIX Compatibility       | Partially compatible                                           | Fully compatible                             |
| Shared Mounts             | Supports but does not guarantee data integrity and consistency | Guarantee strong consistency                 |
| Local Cache               | ✓                                                              | ✓                                            |
| Symbol Links              | ✓                                                              | ✓                                            |
| Standard Unix Permissions | ✓                                                              | ✓                                            |
| Strong Consistency        | ✕                                                              | ✓                                            |
| Extended Attributes       | ✕                                                              | ✓                                            |
| Hard Links                | ✕                                                              | ✓                                            |
| File Chunking             | ✕                                                              | ✓                                            |
| Atomic Operations         | ✕                                                              | ✓                                            |
| Data Compression          | ✕                                                              | ✓                                            |
| Client-side Encryption    | ✕                                                              | ✓                                            |
| Development Language      | C++                                                            | Go                                           |
| Open Source License       | GPL v2.0                                                       | Apache License 2.0                           |

## Additional notes

[OSSFS](https://github.com/aliyun/ossfs), [COSFS](https://github.com/tencentyun/cosfs), and [OBSFS](https://github.com/huaweicloud/huaweicloud-obs-obsfs) are all derivatives based on S3FS and have essentially the same functional features and usage as S3FS.


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_s3ql.md
================================================
---
slug: /comparison/juicefs_vs_s3ql
---

# JuiceFS vs. S3QL

Similar to JuiceFS, S3QL is also an open source network file system driven by object storage and database. All data will be split into blocks and stored in object storage services such as Amazon S3, Backblaze B2, or OpenStack Swift, and the corresponding metadata will be stored in the database.

## Common ground

- Both support the standard POSIX file system interface through the FUSE module, so that massive cloud storage can be mounted locally and used like local storage.
- Both provide standard file system features: hard links, symbolic links, extended attributes, file permissions.
- Both support data compression and encryption, but the algorithms used are different.
- Both support metadata backup, S3QL automatically backs up SQLite databases to object storage, and JuiceFS automatically exports metadata to JSON format files every hour and backs them up to object storage for easy recovery and migration between various metadata engines.

## Differences

- S3QL only supports SQLite. But JuiceFS supports more databases, such as Redis, TiKV, MySQL, PostgreSQL, and SQLite.
- S3QL has no distributed capability and **does not** support multi-host shared mounting. JuiceFS is a typical distributed file system. When using a network-based database, it supports multi-host distributed mount read and write.
- S3QL commits a data block to S3 when it has not been accessed for more than a few seconds. After a file closed or even fsynced, it is only guaranteed to stay in system memory, which may result in data loss if node fails. JuiceFS ensures high data durability, uploading all blocks synchronously when a file is closed.
- S3QL provides data deduplication. Only one copy of the same data is stored, which can reduce the storage usage, but it will also increase the performance overhead of the system. JuiceFS pays more attention to performance, and it is too expensive to perform deduplication on large-scale data, so this function is temporarily not provided.

|                           | **S3QL**              | **JuiceFS**                   |
| :------------------------ | :-------------------- | :---------------------------- |
| Project status            | Active development    | Active development            |
| Metadata engine           | SQLite                | Redis, MySQL, SQLite, TiKV    |
| Storage engine            | Object Storage, Local | Object Storage, WebDAV, Local |
| Operating system          | Unix-like             | Linux, macOS, Windows         |
| Compression algorithm     | LZMA, bzip2, gzip     | LZ4, zstd                     |
| Encryption algorithm      | AES-256               | AES-GCM, RSA                  |
| POSIX compatible          | ✓                     | ✓                             |
| Hard link                 | ✓                     | ✓                             |
| Symbolic link             | ✓                     | ✓                             |
| Extended attributes       | ✓                     | ✓                             |
| Standard Unix permissions | ✓                     | ✓                             |
| Data block                | ✓                     | ✓                             |
| Local cache               | ✓                     | ✓                             |
| Elastic storage           | ✓                     | ✓                             |
| Metadata backup           | ✓                     | ✓                             |
| Data deduplication        | ✓                     | ✕                             |
| Immutable trees           | ✓                     | ✕                             |
| Snapshots                 | ✓                     | ✕                             |
| Share mount               | ✕                     | ✓                             |
| Hadoop SDK                | ✕                     | ✓                             |
| Kubernetes CSI Driver     | ✕                     | ✓                             |
| S3 gateway                | ✕                     | ✓                             |
| Language                  | Python                | Go                            |
| Open source license       | GPLv3                 | Apache License 2.0                        |
| Open source date          | 2011                  | 2021.1                        |

## Usability

This part mainly evaluates the ease of installing and using the two products.

### Installation

During the installation process, we use Rocky Linux 8.4 operating system (kernel version 4.18.0-305.12.1.el8_4.x86_64).

#### S3QL

S3QL is developed in Python and requires `python-devel` 3.7 or higher to be installed. In addition, at least the following dependencies must be satisfied: `fuse3-devel`, `gcc`, `pyfuse3`, `sqlite-devel`, `cryptography`, `defusedxml`, `apsw`, `dugong`. In addition, you need to pay special attention to Python's package dependencies and location issues.

S3QL will install 12 binary programs in the system, and each program provides an independent function, as shown in the figure below.

![S3QL-bin](../../images/s3ql-bin.jpg)

#### JuiceFS

JuiceFS is developed in Go and can be used directly by downloading the pre-compiled binary file. The JuiceFS client has only one binary program `juicefs`. You can just copy it to any executable path of the system, for example: `/usr/local/bin`.

### Create and Mount a file system

Both S3QL and JuiceFS use database to store metadata. S3QL only supports SQLite databases, while JuiceFS supports databases such as Redis, TiKV, MySQL, MariaDB, PostgreSQL, and SQLite.

Here we create a file system using S3QL and JuiceFS separately with locally created MinIO as object storage:

#### S3QL

S3QL uses `mkfs.s3ql` to create a file system:

```shell
mkfs.s3ql --plain --backend-options no-ssl -L s3ql s3c://127.0.0.1:9000/s3ql/
```

Mount a file system using `mount.s3ql`:

```shell
mount.s3ql --compress none --backend-options no-ssl s3c://127.0.0.1:9000/s3ql/ mnt-s3ql
```

S3QL needs the access key of the object storage API to be interactively provided through the command line when creating and mounting a file system.

#### JuiceFS

JuiceFS uses the `format` subcommand to create a file system:

```shell
juicefs format --storage minio \
    --bucket http://127.0.0.1:9000/myjfs \
    --access-key minioadmin \
    --secret-key minioadmin \
    sqlite3://myjfs.db \
    myjfs
```

Mount a file system using `mount` subcommand:

```shell
sudo juicefs mount -d sqlite3://myjfs.db mnt-juicefs
```

JuiceFS only sets the object storage API access key when creating a file system, and the relevant information will be written into the metadata engine. After created, there is no need to repeatedly provide the object storage url, access key and other information.

## Summary

**S3QL** adopts the storage structure of object storage + SQLite. Storing data in blocks can not only improve the read and write efficiency of the file but also reduce the resource overhead when the file is modified. The advanced features such as snapshots, data deduplication, and data retention, as well as the default data compression and data encryption make S3QL very suitable for individuals to store files in cloud storage at a lower cost and with higher security.

**JuiceFS** supports object storage, HDFS, WebDAV, and local disks as data storage engines, and supports popular databases such as Redis, TiKV, MySQL, MariaDB, PostgreSQL, and SQLite as metadata storage engines. It provides a standard POSIX file system interface through FUSE and a Java API, which can directly replace HDFS to provide storage for Hadoop. At the same time, it also provides [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver), which can be used as the storage layer of Kubernetes for data persistent storage. JuiceFS is a file system designed for enterprise-level distributed data storage scenarios. It is widely used in various scenarios such as big data analysis, machine learning, container shared storage, data sharing, and backup.


================================================
FILE: docs/en/introduction/comparison/juicefs_vs_seaweedfs.md
================================================
---
title: JuiceFS vs. SeaweedFS
slug: /comparison/juicefs_vs_seaweedfs
description: This document compares JuiceFS and SeaweedFS, covering their architecture, storage mechanisms, client protocols, and other advanced features.
---

[SeaweedFS](https://github.com/seaweedfs/seaweedfs) and [JuiceFS](https://github.com/juicedata/juicefs) are both open-source high-performance distributed file storage systems. They operate under the business-friendly Apache License 2.0. However, JuiceFS comes in two versions: a [Community Edition](https://juicefs.com/docs/community/introduction) and an [Enterprise Edition](https://juicefs.com/en/blog/solutions/juicefs-enterprise-edition-features-vs-community-edition), you can use JuiceFS Enterprise Edition as on-premises deployment, or [use Cloud Service](https://juicefs.com/docs/cloud) directly. The Enterprise Edition uses a proprietary metadata engine, while its client shares code extensively with the [Community Edition](https://github.com/juicedata/juicefs).

This document compares the key attributes of JuiceFS and SeaweedFS in a table and then explores them in detail. You can easily see their main differences in the table below and delve into specific topics you're interested in within this article. By highlighting their contrasts and evaluating their suitability for different use cases, this document aims to help you make informed decisions.

## A quick summary of SeaweedFS vs. JuiceFS

| Comparison basis | SeaweedFS | JuiceFS |
| :--- | :--- | :--- |
| Metadata engine | Supports multiple databases | The Community Edition supports various databases; the Enterprise Edition uses an in-house, high-performance metadata engine. |
| Metadata operation atomicity | Not guaranteed | The Community Edition ensures atomicity through database transactions; the Enterprise Edition ensures atomicity within the metadata engine. |
| Changelog | Supported | Exclusive to the Enterprise Edition |
| Data storage | Self-contained | Relies on object storage |
| Erasure coding | Supported | Relies on object storage |
| Data consolidation | Supported | Relies on object storage |
| File splitting | 8MB | 64MB logical blocks + 4MB physical storage blocks |
| Tiered storage | Supported | Relies on object storage |
| Data compression | Supported (based on file extensions) | Supported (configured globally) |
| Storage encryption | Supported | Supported |
| POSIX compatibility | Basic | Full |
| S3 protocol | Basic | Basic |
| WebDAV protocol | Supported | Supported |
| HDFS compatibility | Basic | Full |
| CSI Driver | Supported | Supported |
| Client cache | Supported | Supported |
| Cluster data replication | Unidirectional and bidirectional replication is supported | Exclusive to the Enterprise Edition, only unidirectional replication is supported |
| Cloud data cache | Supported (manual synchronization) | Exclusive to the Enterprise Edition |
| Trash | Unsupported | Supported |
| Operations and monitoring | Supported | Supported |
| Release date | April 2015 | January 2021 |
| Primary maintainer | Individual (Chris Lu) | Company (Juicedata Inc.) |
| Programming language | Go | Go |
| Open source license | Apache License 2.0 | Apache License 2.0 |

## The SeaweedFS architecture

The system consists of three components:

- The volume servers, which store files in the underlying layer
- The master servers, which manage the cluster
- An optional component, filer, which provides additional features to the upper layer

![SeaweedFS architecture](../../images/seaweedfs_arch_intro.png)

In the system operation, both the volume server and the master server are used for file storage:

- The volume server focuses on data read and write operations.
- The master server primarily functions as a management service for the cluster and volumes.

In terms of data access, SeaweedFS implements a similar approach to Haystack. A user-created volume in SeaweedFS corresponds to a large disk file ("Superblock" in the diagram below). Within this volume, all files written by the user ("Needles" in the diagram) are merged into the large disk file.

![SeaweedFS Superblock](../../images/seaweedfs_superblock.png)

Data write and read process in SeaweedFS:

1. Before a write operation, the client initiates a write request to the master server.
2. SeaweedFS returns a File ID based on the current data volume. This ID is composed of three parts: \<volume id, file key, file cookie\>. During the writing process, basic metadata information such as file length and chunk details is also written together with the data.
3. After the write is completed, the caller needs to associate the file with the returned File ID and store this mapping in an external system such as MySQL.
4. When reading data, since the volume index is already loaded in memory, the system can use the File ID to quickly retrieve all necessary information about the file's location (offset). This enables efficient file reading.

On top of the underlying storage services, SeaweedFS offers a component called filer, which interfaces with the volume server and the master server. It provides features like POSIX support, WebDAV, and the S3 API. Like JuiceFS, the filer needs to connect to an external database to store metadata information.

## The JuiceFS architecture

JuiceFS adopts an architecture that separates data and metadata storage:

- File data is split and stored in object storage systems such as Amazon S3.
- Metadata is stored in a user-selected database such as Redis or MySQL.

The client connects to the metadata engine for metadata services and writes actual data to object storage, achieving distributed file systems with strong consistency .

![JuiceFS architecture](../../images/juicefs-arch-new.png)

For details about JuiceFS' architecture, see the [Architecture](../architecture.md) document.

## Architecture comparison

### Metadata

Both SeaweedFS and JuiceFS support storing file system metadata in external databases:

- SeaweedFS supports up to [24 databases](https://github.com/seaweedfs/seaweedfs/wiki/Filer-Stores).
- JuiceFS has a high requirement for database transaction capabilities and currently supports [10 transactional databases across 3 categories](../../reference/how_to_set_up_metadata_engine.md).

### Atomic operations

JuiceFS ensures strict atomicity for every operation, which requires strong transaction capabilities from the metadata engine like Redis and MySQL. As a result, JuiceFS supports fewer databases.

SeaweedFS provides weaker atomicity guarantees for operations. It only uses transactions of some databases (SQL, ArangoDB, and TiKV) during rename operations, with a lower requirement for database transaction capabilities. Additionally, during the rename operation, SeaweedFS does not lock the original directory or file during the metadata copying process. This may result in data loss under high loads.

### Changelog and related features

SeaweedFS generates changelog for all metadata operations. The changelog can be transmitted and replayed. This ensures data safety and enables features like file system data replication and operation auditing.

SeaweedFS supports file system data replication between multiple clusters. It offers two asynchronous data replication modes:

- Active-Active. In this mode, both clusters participate in read and write operations and they synchronize data bidirectionally. When there are more than two nodes in the cluster, certain operations such as renaming directories are subject to certain restrictions.
- Active-Passive. In this mode, a primary-secondary relationship is established, and the passive side is read-only.

Both modes achieve consistency between different cluster data by transmitting and applying changelog. Each changelog has a signature to ensure that the same message is applied only once.

The JuiceFS Community Edition does not implement a changelog, but it can use its inherent data replication capabilities from the metadata engine and object storage to achieve file system mirroring. For example, both [MySQL](https://dev.mysql.com/doc/refman/8.0/en/replication.html) and [Redis](https://redis.io/docs/management/replication) only support data replication. When combined with [S3's object replication feature](https://docs.aws.amazon.com/AmazonS3/latest/userguide/replication.html), either of them can enable a setup similar to SeaweedFS' Active-Passive mode without relying on JuiceFS.

It's worth noting that the JuiceFS Enterprise Edition implements the metadata engine based on changelog. It supports [data replication](https://juicefs.com/docs/cloud/guide/replication) and [mirror file system](https://juicefs.com/docs/cloud/guide/mirror).

## Storage comparison

As mentioned earlier, SeaweedFS' data storage is achieved through volume servers + master servers, supporting features like merging small data blocks and erasure coding.

JuiceFS' data storage relies on object storage services, and relevant features are provided by the object storage.

### File splitting

Both SeaweedFS and JuiceFS split files into smaller chunks before persisting them in the underlying data system:

- SeaweedFS splits files into 8MB blocks. For extremely large files (over 8GB), it also stores the chunk index in the underlying data system.
- JuiceFS uses 64MB logical data blocks (chunks), which are further divided into 4MB blocks to be uploaded to object storage. For details, see [How JuiceFS stores files](../architecture.md#how-juicefs-store-files).

### Tiered storage

For newly created volumes, SeaweedFS stores data locally. For older volumes, SeaweedFS supports uploading them to the cloud to achieve [hot-cold data separation](https://github.com/seaweedfs/seaweedfs/wiki/Tiered-Storage).

JuiceFS does not implement tiered storage but directly uses object storage's tiered management services, such as [Amazon S3 Glacier storage classes](https://aws.amazon.com/s3/storage-classes/glacier/?nc1=h_ls).

### Data compression

JuiceFS supports compressing all written data using LZ4 or Zstandard.
SeaweedFS determines whether to compress data based on factors such as the file extension and file type.

### Encryption

Both support encryption, including encryption during transmission and at rest:

- SeaweedFS supports encryption both in transit and at rest. When data encryption is enabled, all data written to the volume server is encrypted using random keys. The corresponding key information is managed by the filer that maintains the file metadata. For details, see the  [Wiki](https://github.com/seaweedfs/seaweedfs/wiki/Filer-Data-Encryption).
- For details about JuiceFS' encryption feature, see [Data Encryption](../../security/encryption.md).

## Client protocol comparison

### POSIX

JuiceFS is [fully POSIX-compatible](../../reference/posix_compatibility.md), while SeaweedFS currently [partially implements POSIX compatibility](https://github.com/seaweedfs/seaweedfs/wiki/FUSE-Mount), with ongoing feature enhancements.

### S3

JuiceFS implements an [S3 gateway](https://juicefs.com/docs/community/s3_gateway), enabling direct access to the file system through the S3 API. It supports tools like s3cmd, AWS CLI, and MinIO Client (mc) for file system management.

SeaweedFS currently [supports a subset of the S3 API](https://github.com/seaweedfs/seaweedfs/wiki/Amazon-S3-API), covering common read, write, list, and delete requests, with some extensions for specific requests like reads.

### WebDAV

Both support the WebDAV protocol. For details, see:

- [SeaweedFS Wiki](https://github.com/seaweedfs/seaweedfs/wiki/WebDAV)
- [JuiceFS documentation](../../deployment/webdav.md)

### HDFS

JuiceFS is [fully compatible with the HDFS API](../../deployment/hadoop_java_sdk.md), including Hadoop 2.x, Hadoop 3.x, and various components within the Hadoop ecosystem.

SeaweedFS offers [basic HDFS compatibility](https://github.com/seaweedfs/seaweedfs/wiki/Hadoop-Compatible-File-System). It lacks support for advanced operations like truncate, concat, checksum, and set attributes.

### CSI Driver

Both support a CSI Driver. For details, see:

- [SeaweedFS CSI Driver](https://github.com/seaweedfs/seaweedfs-csi-driver)
- [JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver)

## Other advanced features

### Client cache

SeaweedFS client is equipped with [basic cache capabilities](https://github.com/seaweedfs/seaweedfs/wiki/FUSE-Mount), but its documentation weren't located at the time of writing, you can search for `cache` in the [source code](https://github.com/seaweedfs/seaweedfs/blob/master/weed/command/mount.go).

JuiceFS' client supports [metadata and data caching](../../guide/cache.md), allowing users to optimize based on their application's needs.

### Object storage gateway

SeaweedFS can be used as an [object storage gateway](https://github.com/seaweedfs/seaweedfs/wiki/Gateway-to-Remote-Object-Storage), you can manually warm up specified data to local cache directory, while local modification is asynchronously uploaded to object storage.

JuiceFS stores files in split form. Due to its architecture, it does not support serving as a cache for object storage or a cache layer. However, the JuiceFS Enterprise Edition has a standalone feature to provide caching services for existing data in object storage, which is similar to SeaweedFS' object storage gateway.

### Trash

By default, JuiceFS enables the [Trash](../../security/trash.md) feature. To prevent accidental data loss and ensure data safety, deleted files are retained for a specified time.
However, SeaweedFS does not support this feature.

### Operations and maintenance

Both offer comprehensive maintenance and troubleshooting solutions:

- JuiceFS provides [`juicefs stats`](../../administration/fault_diagnosis_and_analysis.md#stats) and [`juicefs profile`](../../administration/fault_diagnosis_and_analysis.md#profile) to let users view real-time performance metrics. It offers a [`metrics`](../../administration/monitoring.md#collect-metrics) API to integrate monitoring data into Prometheus for visualization and monitoring alerts in Grafana.
- SeaweedFS uses [`weed shell`](https://github.com/seaweedfs/seaweedfs/wiki/weed-shell) to interactively execute maintenance tasks, such as checking the current cluster status and listing file directories. It also supports [push and pull](https://github.com/seaweedfs/seaweedfs/wiki/System-Metrics) approaches to integrate with Prometheus.


================================================
FILE: docs/en/introduction/io_processing.md
================================================
---
title: Data Processing Workflow
sidebar_position: 3
slug: /internals/io_processing
description: This article introduces read and write implementation of JuiceFS, including how it splits files into chunks.
---

## Data writing process {#workflow-of-write}

JuiceFS splits large files at multiple levels to improve I/O performance. See [how JuiceFS stores files](./architecture.md#how-juicefs-store-files). Files are initially divided into logical chunks (64 MiB each), which are isolated from each other and further broken down into slices. Slices are the data units for persistence. During a write request, data is stored in the client buffer as chunks/slices. A new slice is created if it does not overlap or adjoin any existing slices; otherwise, the affected existing slices are updated. On a flush operation, a slice is divided into blocks (4 MiB by default) and uploaded to the object storage. Metadata is updated upon successful upload.

Sequential writes are optimized, requiring only one continuously growing slice and one final flush. This maximizes object storage write performance. A simple [JuiceFS benchmark](../benchmark/performance_evaluation_guide.md) below shows sequentially writing a 1 GiB file with a 1 MiB I/O size at its first stage. The following figure shows the data flow in each component of the system.

![internals-write](../images/internals-write.png)

Use [`juicefs stats`](../reference/command_reference.mdx#stats) to obtain real-time performance monitoring metrics.

![internals-stats](../images/internals-stats.png)

The first highlighted section in the above figure shows:

- The average I/O size for writing to the object storage is `object.put / object.put_c = 4 MiB`. It is the same as the default block size.
- The ratio of metadata transactions to object storage transactions is `meta.txn : object.put_c -= 1 : 16`. It means that a single slice flush requires 1 metadata update and 16 uploads to the object storage. Each flush operation transmits 64 MiB of data (4 MiB * 16), equivalent to the default chunk size.
- The average request size in the FUSE layer approximately equals to `fuse.write / fuse.ops ~= 128 KiB`, matching the default request size limitation.

Generally, when JuiceFS writes a small file, the file is uploaded to the object storage upon file closure, and the I/O size is equal to the file size. In the third stage of the figure above, where 128 KiB small files are created, we can see that:

- The size of data written to the object storage during PUT operations is 128 KiB, calculated by `object.put / object.put_c`.
- The number of metadata transactions is approximately twice the number of PUT operations, since each file requires one create and one write.

When JuiceFS uploads objects smaller than the block size, it simultaneously writes them into the [local cache](../guide/cache.md) to improve future performance. As shown in the third stage of the figure above, the write bandwidth of the `blockcache` is the same as that of the object storage. Since small files are cached, reading these files is extremely fast, as demonstrated in the fourth stage.

Write operations are immediately committed to the client buffer, resulting in very low write latency (typically just a few microseconds). The actual upload to the object storage is automatically triggered internally when certain conditions are met, such as when the size or number of slices exceeds their limit, or data stays in the buffer for too long. Explicit calls, such as closing a file or invoking `fsync`, can also trigger uploading.

The client buffer is only released after the data stored inside is uploaded. In scenarios with high write concurrency, if the buffer size (configured using [`--buffer-size`](../reference/command_reference.mdx#mount-data-cache-options)) is not big enough, or the object storage's performance insufficient, write blocking may occur, because the buffer cannot be released timely. The real-time buffer usage is shown in the `usage.buf` field in the metrics figure. To slow things down, The JuiceFS client introduces a 10 ms delay to every write when the buffer usage exceeds the threshold. If the buffer usage is over twice the threshold, new writes are completely suspended until the buffer is released. Therefore, if the write latency keeps increasing or the buffer usage has exceeded the threshold for a long while, you should increase `--buffer-size`. Also consider increasing the maximum number of upload concurrency ([`--max-uploads`](../reference/command_reference.mdx#mount-data-storage-options), defaults to 20), which improves the upload bandwidth, thus boosting buffer release.

### Random writes {#random-write}

JuiceFS supports random writes, including mmap-based random writes.

Note that a block is an immutable object, because most object storage services don't support edit in blocks; they can only be re-uploaded and overwritten. Thus, when overwrites or random writes occur, JuiceFS avoids downloading the block for editing and re-uploading, which could cause serious I/O amplifications. Instead, writes are performed on new or existing slices. Relevant new blocks are uploaded to the object storage, and the new slice is appended to the slice list under the chunk. When a file is read, what the client sees is actually a consolidated view of all the slices.

Compared to sequential writes, random writes in large files are more complicated. There could be a number of intermittent slices in a chunk, possibly all smaller than 4 MiB. Frequent random writes require frequent metadata updates, which in turn further impact performance. To improve read performance, JuiceFS schedules compaction tasks when the number of slices under a chunk exceeds the limit. You can also manually trigger compaction by running [`juicefs gc`](../administration/status_check_and_maintenance.md#gc).

### Client write cache {#client-write-cache}

Client write cache is also referred to as "Writeback mode" throughout the docs.

For scenarios that does not deem consistency and data security as top priorities, enabling client write cache is also an option to further improve performance. When client write cache is enabled, flush operations return immediately after writing data to the local cache directory. Then, local data is uploaded asynchronously to the object storage. In other words, the local cache directory is a cache layer for the object storage.

Learn more in [Client Write Cache](../guide/cache.md#client-write-cache).

## Data reading process {#workflow-of-read}

JuiceFS supports sequential reads and random reads (including mmap-based random reads). During read requests, the object corresponding to the block is completely read through the `GetObject` API of the object storage, or only a certain range of data in the object may be read (e.g., the read range is limited by the `Range` parameter of [S3 API](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html)). Meanwhile, prefetching is performed (controlled by the [`--prefetch`](../reference/command_reference.mdx#mount) option) to download the complete data block into the local cache directory, as shown in the `blockcache` write speed in the second stage of the above metrics figure. This is very good for sequential reads as all cached data is utilized, maximizing the object storage access efficiency. The dataflow is illustrated in the figure below:

![internals-read](../images/internals-read.png)

Although prefetching works well for sequential reads, it might not be so effective for random reads on large files. It can cause read amplification and frequent cache eviction. Consider disabling prefetching using `--prefetch=0`. It is always hard to design cache strategy for random read scenarios. Two possible solutions are increasing the cache size to store all data locally or completely disabling the cache (`--cache-size=0`) and relying on a high-performance object storage service.

Reading small files (smaller than the block size) is much easier because the entire file can be read in a single request. Since small files are cached locally during the write process, future reads are fast.


================================================
FILE: docs/en/reference/_common_options.mdx
================================================
#### Metadata related options {#mount-metadata-options}

|Items|Description|
|-|-|
|`--subdir=value`|mount a sub-directory as root (default: "")|
|`--backup-meta=3600`|interval (in seconds) to automatically backup metadata in the object storage (0 means disable backup) (default: "3600")|
|`--backup-skip-trash` <VersionAdd>1.2</VersionAdd>|skip files and directories in trash when backup metadata.|
|`--heartbeat=12`|interval (in seconds) to send heartbeat; it's recommended that all clients use the same heartbeat value (default: "12")|
|`--read-only`|Read-only mode, i.e. allow only lookup/read operations. Note that this option implies `--no-bgjob`, so read-only clients do not execute background jobs.|
|`--no-bgjob`|Disable background jobs, default to false, which means clients by default carry out background jobs, including:<br/><ul><li>Clean up expired files in Trash (look for `cleanupDeletedFiles`, `cleanupTrash` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li><li>Delete slices that's not referenced (look for `cleanupSlices` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li><li>Clean up stale client sessions (look for `CleanStaleSessions` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go))</li></ul>Note that compaction isn't affected by this option, it happens automatically with file reads and writes, client will check if compaction is in need, and run in background (take Redis for example, look for `compactChunk` in [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go)).|
|`--atime-mode=noatime` <VersionAdd>1.1</VersionAdd> |Control atime (last time the file was accessed) behavior, support the following modes:<br/><ul><li>`noatime` (default): set when the file is created or when `SetAttr` is explicitly called. Accessing and modifying the file will not affect atime, tracking atime comes at a performance cost, so this is the default behavior</li><li>`relatime`: update inode access times relative to mtime (last time when the file data was modified) or ctime (last time when file metadata was changed). Only update atime if atime was earlier than the current mtime or ctime, or the file's atime is more than 1 day old</li><li>`strictatime`: always update atime on access</li></ul>|
|`--skip-dir-nlink=20` <VersionAdd>1.1</VersionAdd> |number of retries after which the update of directory nlink will be skipped (used for tkv only, 0 means never) (default: 20)|
|`--skip-dir-mtime=100ms` <VersionAdd>1.2</VersionAdd>|skip updating attribute of a directory if the mtime difference is smaller than this value (default: 100ms)|
|`--sort-dir` <VersionAdd>1.3</VersionAdd>|sort entries within a directory by name|
|`--fast-statfs` <VersionAdd>1.3</VersionAdd>|performance of `statfs` is improved by using local caching to reduce metadata access, but accuracy may decrease (default: false)|

#### Metadata cache related options {#mount-metadata-cache-options}

For metadata cache description and usage, refer to [Kernel metadata cache](../guide/cache.md#kernel-metadata-cache) and [Client memory metadata cache](../guide/cache.md#client-memory-metadata-cache).

|Items|Description|
|-|-|
|`--attr-cache=1`|attributes cache timeout in seconds (default: 1), read [Kernel metadata cache](../guide/cache.md#kernel-metadata-cache)|
|`--entry-cache=1`|file entry cache timeout in seconds (default: 1), read [Kernel metadata cache](../guide/cache.md#kernel-metadata-cache)|
|`--dir-entry-cache=1`|dir entry cache timeout in seconds (default: 1), read [Kernel metadata cache](../guide/cache.md#kernel-metadata-cache)|
|`--open-cache=0`|open file cache timeout in seconds (0 means disable this feature) (default: 0)|
|`--open-cache-limit value` <VersionAdd>1.1</VersionAdd> |max number of open files to cache (soft limit, 0 means unlimited) (default: 10000)|
|`--readdir-cache=false` <VersionAdd>1.3, only for mount</VersionAdd>|enable directory entry cache (default: false, disable this feature)|
|`--negative-entry-cache=0` <VersionAdd>1.3, only for mount</VersionAdd>|negative lookup (return ENOENT) cache timeout in seconds (default: 0, means disable this feature)|

#### Data storage related options {#mount-data-storage-options}

|Items|Description|
|-|-|
|`--storage=file`|Object storage type (e.g. `s3`, `gs`, `oss`, `cos`) (default: `"file"`, refer to [documentation](../reference/how_to_set_up_object_storage.md#supported-object-storage) for all supported object storage types).|
|`--bucket=value`|customized endpoint to access object storage|
|`--storage-class value` <VersionAdd>1.1</VersionAdd> |the storage class for data written by current client|
|`--get-timeout=60`|the max number of seconds to download an object (default: 60)|
|`--put-timeout=60`|the max number of seconds to upload an object (default: 60)|
|`--io-retries=10`|The number of retries when the network is abnormal and the number of retries for metadata requests are also controlled by this option. If the number of retries is exceeded, an `EIO Input/output error` error will be returned. (default: 10)|
|`--max-uploads=20`|Upload concurrency, defaults to 20. This is already a reasonably high value for 4M writes, with such write pattern, increasing upload concurrency usually demands higher `--buffer-size`, learn more at [Read/Write Buffer](../guide/cache.md#buffer-size). But for random writes around 100K, 20 might not be enough and can cause congestion at high load, consider using a larger upload concurrency, or try to consolidate small writes in the application end. |
|`--max-stage-write=0` <VersionAdd>1.2</VersionAdd>|The maximum number of concurrent writes of data blocks to the cache disk asynchronously. If the maximum number of concurrent writes is reached, the object storage will be uploaded directly (this option is only valid when ["Client write data cache"](../guide/cache.md#client-write-cache) is enabled) (default value: 0, that is, no concurrency limit)|
|`--max-deletes=10`|number of threads to delete objects (default: 10)|
|`--upload-limit=0`|bandwidth limit for upload in Mbps (default: 0)|
|`--download-limit=0`|bandwidth limit for download in Mbps (default: 0)|
|`--check-storage`<VersionAdd>1.3</VersionAdd>|test storage before mounting to expose access issues early|

#### Data cache related options {#mount-data-cache-options}

|Items|Description|
|-|-|
|`--buffer-size=300`|total read/write buffering in MiB (default: 300), see [Read/Write buffer](../guide/cache.md#buffer-size)|
|`--prefetch=1`|prefetch N blocks in parallel (default: 1), see [Client read data cache](../guide/cache.md#client-read-cache)|
|`--writeback`|upload objects in background (default: false), see [Client write data cache](../guide/cache.md#client-write-cache)|
|`--upload-delay=0`|When `--writeback` is enabled, you can use this option to add a delay to object storage upload, default to 0, meaning that upload will begin immediately after write. Different units are supported, including `s` (second), `m` (minute), `h` (hour). If files are deleted during this delay, upload will be skipped entirely, when using JuiceFS for temporary storage, use this option to reduce resource usage. Refer to [Client write data cache](../guide/cache.md#client-write-cache).|
|`--upload-hours` <VersionAdd>1.2</VersionAdd>|When `--writeback` is enabled, data blocks are only uploaded during the specified time of day. The format of the parameter is `<start hour>,<end hour>` (including "start hour", but not including "end hour", "start hour" must be less than or greater than "end hour"), where `<hour>` can range from 0 to 23. For example, `0,6` means that data blocks are only uploaded between 0:00 and 5:59 every day, and `23,3` means that data blocks are only uploaded between 23:00 every day and 2:59 the next day.|
|`--cache-dir=value`|directory paths of local cache, use `:` (Linux, macOS) or `;` (Windows) to separate multiple paths (default: `$HOME/.juicefs/cache` or `/var/jfsCache`), see [Client read data cache](../guide/cache.md#client-read-cache)|
|`--cache-mode value` <VersionAdd>1.1</VersionAdd> |file permissions for cached blocks (default: "0600")|
|`--cache-size=102400`|size of cached object for read in MiB (default: 102400), see [Client read data cache](../guide/cache.md#client-read-cache)|
|`--cache-items=0` <VersionAdd>1.3</VersionAdd> |max number of cached items (default is 0, which will be automatically calculated based on the `free‑space‑ratio`.)|
|`--free-space-ratio=0.1`|min free space ratio (default: 0.1), if [Client write data cache](../guide/cache.md#client-write-cache) is enabled, this option also controls write cache size, see [Client read data cache](../guide/cache.md#client-read-cache)|
|`--cache-partial-only`|cache random/small read only (default: false), see [Client read data cache](../guide/cache.md#client-read-cache)|
|`--cache-large-write` <VersionAdd>1.3</VersionAdd>|cache full blocks after uploading|
|`--verify-cache-checksum=extend` <VersionAdd>1.1</VersionAdd> |Checksum level for cache data. After enabled, checksum will be calculated on divided parts of the cache blocks and stored on disks, which are used for verification during reads. The following strategies are supported:<br/><ul><li>`none`: Disable checksum verification, if local cache data is tampered, bad data will be read;</li><li>`full` (default before 1.3): Perform verification when reading the full block, use this for sequential read scenarios;</li><li>`shrink`: Perform verification on parts that's fully included within the read range, use this for random read scenarios;</li><li>`extend`: Perform verification on parts that fully include the read range, this causes read amplifications and is only used for random read scenarios demanding absolute data integrity. (default since 1.3)</li></ul>|
|`--cache-eviction=2-random` <VersionAdd>1.1</VersionAdd> |cache eviction policy (`none` or `2-random`) (default: "2-random")|
|`--cache-scan-interval=1h` <VersionAdd>1.1</VersionAdd> |interval (in seconds) to scan cache-dir to rebuild in-memory index (default: "1h")|
|`--cache-expire=0` <VersionAdd>1.2</VersionAdd>|Cache blocks that have not been accessed for more than the set time, in seconds, will be automatically cleared (even if the value of `--cache-eviction` is `none`, these cache blocks will be deleted). A value of 0 means never expires (default: 0)|
|`--max-readahead` <VersionAdd>1.3</VersionAdd>|max buffering for read ahead in MiB|

#### Metrics related options {#mount-metrics-options}

||Items|Description|
|-|-|
|`--metrics=127.0.0.1:9567`|address to export metrics (default: `127.0.0.1:9567`)|
|`--custom-labels`|custom labels for metrics, format: `key1:value1;key2:value2` (default: "")|
|`--consul=127.0.0.1:8500`|Consul address to register (default: `127.0.0.1:8500`)|
|`--no-usage-report`|do not send usage report (default: false)|

#### Windows related options {#mount-windows-options}

|Items|Description|
|-|-|
|`--o=`|Used to specify additional FUSE mount options. The actual supported options are determined by WinFsp.|
|`--log=c:/juicefs.log` <VersionAdd>1.3</VersionAdd>|Path to save JuiceFS logs (only effective when running in background mode).|
|`-d` <VersionAdd>1.3</VersionAdd>|Run in background mode. On Windows, enabling this will run JuiceFS as a system service. (Note: This requires administrator privileges and only one file system can be mounted at a time in this mode.)|
|`--fuse-trace-log=c:/fuse.log` <VersionAdd>1.3</VersionAdd>|Specifies the trace log path for WinFsp's FUSE layer callbacks. (Default: "")|
|`--as-root`|A compatibility option that maps all file uid, gid, and write operations to the root user (uid=0).|
|`--show-dot-files` <VersionAdd>1.3</VersionAdd>|Show files that begin with a dot (.). By default, such files are treated as hidden.|
|`--winfsp-threads=16` <VersionAdd>1.3</VersionAdd>|Sets the number of threads WinFsp uses to handle kernel events. The default is min(CPU cores * 2, 16).|
|`--report-case` <VersionAdd>1.3</VersionAdd>|Configures whether JuiceFS should report the precise case of filenames when possible. For example, when opening aaa.txt that actually exists as AAA.txt, enabling this option allows JuiceFS to report the original case to the Windows kernel. (Note: Enabling this may affect performance.)|


================================================
FILE: docs/en/reference/command_reference.mdx
================================================
---
title: Command Reference
sidebar_position: 1
slug: /command_reference
description: Descriptions, usage and examples of all commands and options included in JuiceFS Client.
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

<!-- Special note: Since there are many common options for mount, gateway and webdav commands, in order to simplify document maintenance, we have unified these common options in the "_common_options.mdx" file. If you need to update related content, please check this file. -->
import CommonOptions from './_common_options.mdx';

Running `juicefs` by itself and it will print all available commands. In addition, you can add `-h/--help` flag after each command to get more information, e.g., `juicefs format -h`.

```
NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   1.2.0

COMMANDS:
   ADMIN:
     format   Format a volume
     config   Change configuration of a volume
     quota    Manage directory quotas
     destroy  Destroy an existing volume
     gc       Garbage collector of objects in data storage
     fsck     Check consistency of a volume
     restore  restore files from trash
     dump     Dump metadata into a JSON file
     load     Load metadata from a previously dumped JSON file
     version  Show version
   INSPECTOR:
     status   Show status of a volume
     stats    Show real time performance statistics of JuiceFS
     profile  Show profiling of operations completed in JuiceFS
     info     Show internal information of a path or inode
     debug    Collect and display system static and runtime information
     summary  Show tree summary of a directory
   SERVICE:
     mount    Mount a volume
     umount   Unmount a volume
     gateway  Start an S3-compatible gateway
     webdav   Start a WebDAV server
   TOOL:
     bench     Run benchmarks on a path
     objbench  Run benchmarks on an object storage
     warmup    Build cache for target directories/files
     rmr       Remove directories recursively
     sync      Sync between two storages
     clone     clone a file or directory without copying the underlying data
     compact   Trigger compaction of chunks

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             show warning and errors only (default: false)
   --trace                 enable trace log (default: false)
   --log-id value          append the given log id in log, use "random" to use random uuid
   --no-agent              disable pprof (:6060) agent (default: false)
   --pyroscope value       pyroscope address
   --no-color              disable colors (default: false)
   --help, -h              show help (default: false)
   --version, -V           print version only (default: false)

COPYRIGHT:
   Apache License 2.0
```

## Auto completion {#auto-completion}

To enable commands completion, simply source the script provided within [`hack/autocomplete`](https://github.com/juicedata/juicefs/tree/main/hack/autocomplete) directory. For example:

<Tabs groupId="juicefs-cli-autocomplete">
  <TabItem value="bash" label="Bash">

```shell
source hack/autocomplete/bash_autocomplete
```

  </TabItem>
  <TabItem value="zsh" label="Zsh">

```shell
source hack/autocomplete/zsh_autocomplete
```

  </TabItem>
</Tabs>

Please note the auto-completion is only enabled for the current session. If you want to apply it for all new sessions, add the `source` command to `.bashrc` or `.zshrc`:

<Tabs groupId="juicefs-cli-autocomplete">
  <TabItem value="bash" label="Bash">

```shell
echo "source path/to/bash_autocomplete" >> ~/.bashrc
```

  </TabItem>
  <TabItem value="zsh" label="Zsh">

```shell
echo "source path/to/zsh_autocomplete" >> ~/.zshrc
```

  </TabItem>
</Tabs>

Alternatively, if you are using bash on a Linux system, you may just copy the script to `/etc/bash_completion.d` and rename it to `juicefs`:

```shell
cp hack/autocomplete/bash_autocomplete /etc/bash_completion.d/juicefs
source /etc/bash_completion.d/juicefs
```

## Global options {#global-options}

|Items|Description|
|-|-|
|`-v` `--verbose` `--debug`|Enable debug logs.|
|`-q` `--quiet`|Show only warning and error logs.|
|`--trace`|Enable more detailed debug logs than the `--debug` option.|
|`--no-agent`|Disable pprof agent.|
|`--pyroscope`|Config [Pyroscope](https://github.com/pyroscope-io/pyroscope) address, e.g. `http://localhost:4040`.|
|`--no-color`|Disable log color.|


## Admin {#admin}

### `juicefs format` {#format}

Create and format a file system, if a volume already exists with the same `META-URL`, this command will skip the format step. To adjust configurations for existing volumes, use [`juicefs config`](#config).

#### Synopsis

```shell
juicefs format [command options] META-URL NAME

# Create a simple test volume (data will be stored in a local directory)
juicefs format sqlite3://myjfs.db myjfs

# Create a volume with Redis and S3
juicefs format redis://localhost myjfs --storage=s3 --bucket=https://mybucket.s3.us-east-2.amazonaws.com

# Create a volume with password protected MySQL
juicefs format mysql://jfs:mypassword@(127.0.0.1:3306)/juicefs myjfs
# A safer alternative
META_PASSWORD=mypassword juicefs format mysql://jfs:@(127.0.0.1:3306)/juicefs myjfs
# Provide password from file
META_PASSWORD_FILE=/secret/mypassword.txt juicefs format mysql://jfs:@(127.0.0.1:3306)/juicefs myjfs

# Create a volume with quota enabled
juicefs format sqlite3://myjfs.db myjfs --inodes=1000000 --capacity=102400

# Create a volume with trash disabled
juicefs format sqlite3://myjfs.db myjfs --trash-days=0
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see [JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md) for details.|
|`NAME`|Name of the file system|
|`--force`|overwrite existing format (default: false)|
|`--no-update`|don't update existing volume (default: false)|

#### Data storage options {#format-data-storage-options}

|Items|Description|
|-|-|
|`--storage=file`|Object storage type (e.g. `s3`, `gs`, `oss`, `cos`) (default: `file`, refer to [documentation](../reference/how_to_set_up_object_storage.md#supported-object-storage) for all supported object storage types)|
|`--bucket=/var/jfs`|A bucket URL to store data (default: `$HOME/.juicefs/local` or `/var/jfs`)|
|`--access-key=value`|Access Key for object storage (can also be set via the environment variable `ACCESS_KEY`), see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#aksk) for more.|
|`--secret-key value`|Secret Key for object storage (can also be set via the environment variable `SECRET_KEY`), see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#aksk) for more.|
|`--session-token=value`|session token for object storage, see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#session-token) for more.|
|`--storage-class value` <VersionAdd>1.1</VersionAdd> |the default storage class|

#### Data format options {#format-data-format-options}

|Items|Description|
|-|-|
|`--block-size=4M`|size of block in KiB (default: 4M). 4M is usually a better default value because many object storage services use 4M as their internal block size, thus using the same block size in JuiceFS usually yields better performance.|
|`--compress=none`|compression algorithm, choose from `lz4`, `zstd`, `none` (default). Enabling compression will inevitably affect performance. Among the two supported algorithms, `lz4` offers a better performance, while `zstd` comes with a higher compression ratio, Google for their detailed comparison.|
|`--encrypt-rsa-key=value`|A path to RSA private key (PEM)|
|`--encrypt-algo=aes256gcm-rsa`|encrypt algorithm (aes256gcm-rsa, chacha20-rsa) (default: "aes256gcm-rsa")|
|`--hash-prefix`|For most object storages, if object storage blocks are sequentially named, they will also be closely stored in the underlying physical regions. When loaded with intensive concurrent consecutive reads, this can cause hotspots and hinder object storage performance.<br/><br/>Enabling `--hash-prefix` will add a hash prefix to name of the blocks (slice ID mod 256, see [internal implementation](../development/internals.md#object-storage-naming-format)), this distributes data blocks evenly across actual object storage regions, offering more consistent performance. Obviously, this option dictates object naming pattern and **should be specified when a file system is created, and cannot be changed on-the-fly.**<br/><br/>Currently, [AWS S3](https://aws.amazon.com/about-aws/whats-new/2018/07/amazon-s3-announces-increased-request-rate-performance) had already made improvements and no longer require application side optimization, but for other types of object storages, this option still recommended for large scale scenarios.|
|`--shards=0`|If your object storage limit speed in a bucket level (or you're using a self-hosted object storage with limited performance), you can store the blocks into N buckets by hash of key (default: 0), when N is greater than 0, `bucket` should to be in the form of `%d`, e.g. `--bucket "juicefs-%d"`. `--shards` cannot be changed afterwards and must be planned carefully ahead.|

#### Management options {#format-management-options}

|Items|Description|
|-|-|
|`--capacity=0`|storage space limit in GiB, default to 0 which means no limit. Capacity will include trash files, if [trash](../security/trash.md) is enabled.|
|`--inodes=0`|Limit the number of inodes, default to 0 which means no limit.|
|`--trash-days=1`|By default, delete files are put into [trash](../security/trash.md), this option controls the number of days before trash files are expired, default to 1, set to 0 to disable trash.|
|`--enable-acl=true` <VersionAdd>1.2</VersionAdd>|enable [POSIX ACL](../security/posix_acl.md)，it is irreversible. |

### `juicefs config` {#config}

Change config of a volume. Note that after updating some settings, the client may not take effect immediately, and it needs to wait for a certain period of time. The specific waiting time can be controlled by the [`--heartbeat`](#mount-metadata-options) option.

#### Synopsis

```shell
juicefs config [command options] META-URL

# Show the current configurations
juicefs config redis://localhost

# Change volume "quota"
juicefs config redis://localhost --inodes 10000000 --capacity 1048576

# Change maximum days before files in trash are deleted
juicefs config redis://localhost --trash-days 7

# Limit client version that is allowed to connect
juicefs config redis://localhost --min-client-version 1.0.0 --max-client-version 1.1.0
```

#### Options

|Items|Description|
|-|-|
|`--yes, -y`|automatically answer 'yes' to all prompts and run non-interactively (default: false)|
|`--force`|skip sanity check and force update the configurations (default: false)|

#### Data storage options {#config-data-storage-options}

|Items|Description|
|-|-|
|`--storage=file` <VersionAdd>1.1</VersionAdd> |Object storage type (e.g. `s3`, `gs`, `oss`, `cos`) (default: `"file"`, refer to [documentation](../reference/how_to_set_up_object_storage.md#supported-object-storage) for all supported object storage types).|
|`--bucket=/var/jfs`|A bucket URL to store data (default: `$HOME/.juicefs/local` or `/var/jfs`)|
|`--access-key=value`|Access Key for object storage (can also be set via the environment variable `ACCESS_KEY`), see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#aksk) for more.|
|`--secret-key value`|Secret Key for object storage (can also be set via the environment variable `SECRET_KEY`), see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#aksk) for more.|
|`--session-token=value`|session token for object storage, see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#session-token) for more.|
|`--storage-class value` <VersionAdd>1.1</VersionAdd> |the default storage class|
|`--upload-limit=0`|bandwidth limit for upload in Mbps (default: 0)|
|`--download-limit=0`|bandwidth limit for download in Mbps (default: 0)|

#### Management options {#config-management-options}

|Items|Description|
|-|-|
|`--capacity value`|limit for space in GiB|
|`--inodes value`|limit for number of inodes|
|`--trash-days value`|number of days after which removed files will be permanently deleted|
|`--enable-acl` <VersionAdd>1.2</VersionAdd>|enable [POSIX ACL](../security/posix_acl.md) (irreversible), at the same time, the minimum client version allowed to connect will be upgraded to v1.2|
|`--encrypt-secret`|encrypt the secret key if it was previously stored in plain format (default: false)|
|`--min-client-version value` <VersionAdd>1.1</VersionAdd> |minimum client version allowed to connect|
|`--max-client-version value` <VersionAdd>1.1</VersionAdd> |maximum client version allowed to connect|
|`--dir-stats` <VersionAdd>1.1</VersionAdd> |enable dir stats, which is necessary for fast summary and dir quota (default: false)|

### `juicefs quota` <VersionAdd>1.1</VersionAdd> {#quota}

Manage directory quotas

#### Synopsis

```shell
juicefs quota command [command options] META-URL

# Set quota to a directory
juicefs quota set redis://localhost --path /dir1 --capacity 1 --inodes 100

# Get quota of a directory
juicefs quota get redis://localhost --path /dir1

# List all directory quotas
juicefs quota list redis://localhost

# Delete quota of a directory
juicefs quota delete redis://localhost --path /dir1

# Check quota consistency of a directory
juicefs quota check redis://localhost
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see "[JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md)" for details.|
|`--path value`|full path of the directory within the volume|
|`--capacity value`|hard quota of the directory limiting its usage of space in GiB (default: 0)|
|`--inodes value`|hard quota of the directory limiting its number of inodes (default: 0)|
|`--repair`|repair inconsistent quota (default: false)|
|`--strict`|calculate total usage of directory in strict mode (NOTE: may be slow for huge directory) (default: false)|

### `juicefs destroy` {#destroy}

Destroy an existing volume, will delete relevant data in metadata engine and object storage. See [How to destroy a file system](../administration/destroy.md).

#### Synopsis

```shell
juicefs destroy [command options] META-URL UUID

juicefs destroy redis://localhost e94d66a8-2339-4abd-b8d8-6812df737892
```

#### Options

|Items|Description|
|-|-|
|`--yes, -y` <VersionAdd>1.1</VersionAdd> |automatically answer 'yes' to all prompts and run non-interactively (default: false)|
|`--force`|skip sanity check and force destroy the volume (default: false)|

### `juicefs gc` {#gc}

If for some reason, a object storage block escape JuiceFS management completely, i.e. the metadata is gone, but the block still persists in the object storage, and cannot be released, this is called an "object leak". If this happens without any special file system manipulation, it could well indicate a bug within JuiceFS, file a [GitHub Issue](https://github.com/juicedata/juicefs/issues/new/choose) to let us know.

Meanwhile, you can run this command to deal with leaked objects. It also deletes stale slices produced by file overwrites. See [Status Check & Maintenance](../administration/status_check_and_maintenance.md#gc).

#### Synopsis

```shell
juicefs gc [command options] META-URL

# Check only, no writable change
juicefs gc redis://localhost

# Trigger compaction of all slices
juicefs gc redis://localhost --compact

# Delete leaked objects
juicefs gc redis://localhost --delete
```

#### Options

|Items|Description|
|-|-|
|`--compact`|compact all chunks with more than 1 slices (default: false).|
|`--delete`|delete leaked objects (default: false)|
|`--threads=10`|number of threads to delete leaked objects (default: 10)|

### `juicefs fsck` {#fsck}

Check consistency of file system.

#### Synopsis

```shell
juicefs fsck [command options] META-URL

juicefs fsck redis://localhost
```

#### Options

|Items|Description|
|-|-|
|`--path value` <VersionAdd>1.1</VersionAdd> |absolute path within JuiceFS to check|
|`--repair` <VersionAdd>1.1</VersionAdd> |repair specified path if it's broken (default: false)|
|`--recursive, -r` <VersionAdd>1.1</VersionAdd> |recursively check or repair (default: false)|
|`--sync-dir-stat` <VersionAdd>1.1</VersionAdd> |sync stat of all directories, even if they are existed and not broken (NOTE: it may take a long time for huge trees) (default: false)|

### `juicefs restore` <VersionAdd>1.1</VersionAdd> {#restore}

Rebuild the tree structure for trash files, and put them back to original directories.

#### Synopsis

```shell
juicefs restore [command options] META HOUR ...

juicefs restore redis://localhost/1 2023-05-10-01
```

#### Options

|Items|Description|
|-|-|
|`--put-back value`|move the recovered files into original directory (default: false)|
|`--threads value`|number of threads (default: 10)|

### `juicefs dump` {#dump}

Dump metadata into a JSON file. Refer to ["Metadata backup"](../administration/metadata_dump_load.md#backup) for more information.

#### Synopsis

```shell
juicefs dump [command options] META-URL [FILE]

# Export metadata to meta-dump.json
juicefs dump redis://localhost meta-dump.json

# Export metadata for only one subdirectory of the file system
juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see [JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md) for details.|
|`FILE`|Export file path, if not specified, it will be exported to standard output. If the filename ends with `.gz`, it will be automatically compressed.|
|`--subdir=path`|Only export metadata for the specified subdirectory.|
|`--keep-secret-key` <VersionAdd>1.1</VersionAdd> |Export object storage authentication information, the default is `false`. Since it is exported in plain text, pay attention to data security when using it. If the export file does not contain object storage authentication information, you need to use [`juicefs config`](#config) to reconfigure object storage authentication information after the subsequent import is completed.|
|`--threads=10` <VersionAdd>1.2</VersionAdd>|number of threads to dump metadata. (default: 10)|
|`--fast` <VersionAdd>1.2</VersionAdd>|Use more memory to speedup dump.|
|`--skip-trash` <VersionAdd>1.2</VersionAdd>|Skip files and directories in trash.|

### `juicefs load` {#load}

Load metadata from a previously dumped JSON file. Read ["Metadata recovery and migration"](../administration/metadata_dump_load.md#recovery-and-migration) to learn more.

#### Synopsis

```shell
juicefs load [command options] META-URL [FILE]

# Import the metadata backup file meta-dump.json to the database
juicefs load redis://127.0.0.1:6379/1 meta-dump.json
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see [JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md) for details.|
|`FILE`|Import file path, if not specified, it will be imported from standard input. If the filename ends with `.gz`, it will be automatically decompressed.|
|`--encrypt-rsa-key=path` <VersionAdd>1.0.4</VersionAdd> |The path to the RSA private key file used for encryption.|
|`--encrypt-alg=aes256gcm-rsa` <VersionAdd>1.0.4</VersionAdd> |Encryption algorithm, the default is `aes256gcm-rsa`.|

## Inspector {#inspector}

### `juicefs status` {#status}

Show status of JuiceFS.

#### Synopsis

```shell
juicefs status [command options] META-URL

juicefs status redis://localhost
```

#### Options

|Items|Description|
|-|-|
|`--session=0, -s 0`|show detailed information (sustained inodes, locks) of the specified session (SID) (default: 0)|
|`--more, -m` <VersionAdd>1.1</VersionAdd> |show more statistic information, may take a long time (default: false)|

### `juicefs stats` {#stats}

Show runtime statistics, read [Real-time performance monitoring](../administration/fault_diagnosis_and_analysis.md#performance-monitor) for more.

#### Synopsis

```shell
juicefs stats [command options] MOUNTPOINT

juicefs stats /mnt/jfs

# More metrics
juicefs stats /mnt/jfs -l 1
```

#### Options

|Items|Description|
|-|-|
|`--schema=ufmco`|schema string that controls the output sections (`u`: usage, `f`: FUSE, `m`: metadata, `c`: block cache, `o`: object storage, `g`: Go) (default: `ufmco`)|
|`--interval=1`|interval in seconds between each update (default: 1)|
|`--verbosity=0`|verbosity level, 0 or 1 is enough for most cases (default: 0)|

### `juicefs profile` {#profile}

Show profiling of operations completed in JuiceFS, based on [access log](../administration/fault_diagnosis_and_analysis.md#access-log). read [Real-time performance monitoring](../administration/fault_diagnosis_and_analysis.md#performance-monitor) for more.

#### Synopsis

```shell
juicefs profile [command options] MOUNTPOINT/LOGFILE

# Monitor real time operations
juicefs profile /mnt/jfs

# Replay an access log
cat /mnt/jfs/.accesslog > /tmp/jfs.alog
# Press Ctrl-C to stop the "cat" command after some time
juicefs profile /tmp/jfs.alog

# Analyze an access log and print the total statistics immediately
juicefs profile /tmp/jfs.alog --interval 0
```

#### Options

|Items|Description|
|-|-|
|`--uid=value, -u value`|only track specified UIDs (separated by comma)|
|`--gid=value, -g value`|only track specified GIDs (separated by comma)|
|`--pid=value, -p value`|only track specified PIDs (separated by comma)|
|`--interval=2`|flush interval in seconds; set it to 0 when replaying a log file to get an immediate result (default: 2)|

### `juicefs info` {#info}

Show internal information for given paths or inodes.

#### Synopsis

```shell
juicefs info [command options] PATH or INODE

# Check a path
juicefs info /mnt/jfs/foo

# Check an inode
cd /mnt/jfs
juicefs info -i 100
```

#### Options

|Items|Description|
|-|-|
|`--inode, -i`|use inode instead of path (current dir should be inside JuiceFS) (default: false)|
|`--recursive, -r`|get summary of directories recursively (NOTE: it may take a long time for huge trees) (default: false)|
|`--strict` <VersionAdd>1.1</VersionAdd> |get accurate summary of directories (NOTE: it may take a long time for huge trees) (default: false)|
|`--raw`|show internal raw information (default: false)|

### `juicefs debug` <VersionAdd>1.1</VersionAdd> {#debug}

It collects and displays information from multiple dimensions such as the operating environment and system logs to help better locate errors

#### Synopsis

```shell
juicefs debug [command options] MOUNTPOINT

# Collect and display information about the mount point /mnt/jfs
juicefs debug /mnt/jfs

# Specify the output directory as /var/log
juicefs debug --out-dir=/var/log /mnt/jfs

# Get the last up to 1000 log entries
juicefs debug --out-dir=/var/log --limit=1000 /mnt/jfs
```

#### Options

|Items|Description|
|-|-|
|`--out-dir=./debug/`|The output directory of the results, automatically created if the directory does not exist (default: `./debug/`)|
|`--limit=value`|The number of log entries collected, from newest to oldest, if not specified, all entries will be collected|
|`--stats-sec=5`|The number of seconds to sample .stats file (default: 5)|
|`--trace-sec=5`|The number of seconds to sample trace metrics (default: 5)|
|`--profile-sec=30`|The number of seconds to sample profile metrics (default: 30)|

### `juicefs summary` <VersionAdd>1.1</VersionAdd> {#summary}

It is used to show tree summary of target directory.

#### Synopsis

```shell
juicefs summary [command options] PATH

# Show with path
juicefs summary /mnt/jfs/foo

# Show max depth of 5
juicefs summary --depth 5 /mnt/jfs/foo

# Show top 20 entries
juicefs summary --entries 20 /mnt/jfs/foo

# Show accurate result
juicefs summary --strict /mnt/jfs/foo
```

#### Options

|Items|Description|
|-|-|
|`--depth value, -d value`|depth of tree to show (zero means only show root) (default: 2)|
|`--entries value, -e value`|show top N entries (sort by size) (default: 10)|
|`--strict`|show accurate summary, including directories and files (may be slow) (default: false)|
|`--csv`|print summary in csv format (default: false)|

## Service {#service}

### `juicefs mount` {#mount}

Mount a volume. The volume must be formatted in advance.

JuiceFS can be mounted by root or normal user, but due to their privilege differences, cache directory and log path will vary, read below descriptions for more.

#### Synopsis

```shell
juicefs mount [command options] META-URL MOUNTPOINT

# Mount in foreground
juicefs mount redis://localhost /mnt/jfs

# Mount in background with password protected Redis
juicefs mount redis://:mypassword@localhost /mnt/jfs -d
# A safer alternative
META_PASSWORD=mypassword juicefs mount redis://localhost /mnt/jfs -d

# Mount with a sub-directory as root
juicefs mount redis://localhost /mnt/jfs --subdir /dir/in/jfs

# Enable "writeback" mode, which improves performance at the risk of losing objects
juicefs mount redis://localhost /mnt/jfs -d --writeback

# Enable "read-only" mode
juicefs mount redis://localhost /mnt/jfs -d --read-only

# Disable metadata backup
juicefs mount redis://localhost /mnt/jfs --backup-meta 0
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see [JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md) for details.|
|`MOUNTPOINT`|file system mount point, e.g. `/mnt/jfs`, `Z:`.|
|`-d, --background`|run in background (default: false)|
|`--no-syslog`|disable syslog (default: false)|
|`--log=path`|path of log file when running in background (default: `$HOME/.juicefs/juicefs.log` or `/var/log/juicefs.log`)|
|`--force`|force to mount even if the mount point is already mounted by the same filesystem.|
|`--update-fstab` <VersionAdd>1.1</VersionAdd> |add / update entry in `/etc/fstab`, will create a symlink from `/sbin/mount.juicefs` to JuiceFS executable if not existing (default: false)|
|`--disable-transparent-hugepage` <VersionAdd>1.3</VersionAdd> |Disable the kernel’s Transparent Huge Page (THP). In situations like memory pressure, keeping THP enabled may cause processes to hang. (default: false)|

#### FUSE related options {#mount-fuse-options}

|Items|Description|
|-|-|
|`--enable-xattr`|enable extended attributes (xattr) (default: false)|
|`--enable-cap` <VersionAdd>1.3</VersionAdd>|enable security.capability xattr (default: false)|
|`--enable-selinux` <VersionAdd>1.3</VersionAdd>|enable security.selinux xattr (default: false)|
|`--enable-ioctl` <VersionAdd>1.1</VersionAdd> |enable ioctl (support GETFLAGS/SETFLAGS only) (default: false)|
|`--root-squash value` <VersionAdd>1.1</VersionAdd> |mapping local root user (UID = 0) to another one specified as UID:GID|
|`--all-squash value` <VersionAdd>1.3</VersionAdd> |mapping all users to another one specified as UID:GID|
|`--umask value` <VersionAdd>1.3</VersionAdd> |umask for new file and directory in octal|
|`--prefix-internal` <VersionAdd>1.1</VersionAdd> |add '.jfs' prefix to all internal files (default: false)|
|`--max-fuse-io=128K` <VersionAdd>1.3</VersionAdd>|maximum size for fuse request (default: 128K)|
|`-o value`|other FUSE options, see [FUSE Mount Options](../reference/fuse_mount_options.md)|

<CommonOptions />

<!-- Note: The purpose of the following HTML is only to avoid reporting errors when checking for broken links (because these headers are in the "_common_options.mdx" file), and will not be displayed on the actual page. Please do not delete or move it (must be placed below the "<CommonOptions />" line). -->
<div style={{ display: 'none' }}>

#### {#mount-metadata-options}
#### {#mount-metadata-cache-options}
#### {#mount-data-storage-options}
#### {#mount-data-cache-options}
#### {#mount-metrics-options}

</div>

### `juicefs umount` {#umount}

Unmount a volume.

#### Synopsis

```shell
juicefs umount [command options] MOUNTPOINT

juicefs umount /mnt/jfs
```

#### Options

|Items|Description|
|-|-|
|`-f, --force`|force unmount a busy mount point (default: false)|
|`--flush` <VersionAdd>1.1</VersionAdd> |wait for all staging chunks to be flushed (default: false)|

### `juicefs gateway` {#gateway}

Start an S3-compatible gateway, read [Deploy JuiceFS S3 Gateway](../guide/gateway.md) for more.

#### Synopsis

```shell
juicefs gateway [command options] META-URL ADDRESS

export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=12345678
juicefs gateway redis://localhost localhost:9000
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see [JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md) for details.|
|`ADDRESS`|S3 gateway address and listening port, for example: `localhost:9000`|
|`--log value` <VersionAdd>1.2</VersionAdd>|path for gateway log|
|`--access-log=path`|path for JuiceFS access log.|
|`--background, -d` <VersionAdd>1.2</VersionAdd>|run in background (default: false)|
|`--no-banner`|disable MinIO startup information (default: false)|
|`--multi-buckets`|use top level of directories as buckets (default: false)|
|`--keep-etag`|save the ETag for uploaded objects (default: false)|
|`--umask=022`|umask for new file and directory in octal (default: 022)|
|`--object-tag` <VersionAdd>1.2</VersionAdd>|enable object tagging API|
|`--domain value` <VersionAdd>1.2</VersionAdd>|domain for virtual-host-style requests|
|`--refresh-iam-interval=5m` <VersionAdd>1.2</VersionAdd>|interval to reload gateway IAM from configuration (default: 5m)|

<CommonOptions />

### `juicefs webdav` {#webdav}

Start a WebDAV server, refer to [Deploy WebDAV Server](../deployment/webdav.md) for more.

#### Synopsis

```shell
juicefs webdav [command options] META-URL ADDRESS

juicefs webdav redis://localhost localhost:9007
```

#### Options

|Items|Description|
|-|-|
|`META-URL`|Database URL for metadata storage, see [JuiceFS supported metadata engines](../reference/how_to_set_up_metadata_engine.md) for details.|
|`ADDRESS`|WebDAV address and listening port, for example: `localhost:9007`.|
|`--cert-file` <VersionAdd>1.1</VersionAdd>|certificate file for HTTPS|
|`--key-file` <VersionAdd>1.1</VersionAdd>|key file for HTTPS|
|`--gzip`|compress served files via gzip (default: false)|
|`--disallowList`|disallow list a directory (default: false)|
|`--enable-proppatch` <VersionAdd>1.3</VersionAdd>|enable proppatch method support|
|`--log value` <VersionAdd>1.2</VersionAdd>|path for WebDAV log|
|`--access-log=path`|path for JuiceFS access log|
|`--background, -d` <VersionAdd>1.2</VersionAdd>|run in background (default: false)|
|`--threads=50, -p 50` <VersionAdd>1.3</VersionAdd>|number of threads for delete jobs (max 255)|

<CommonOptions />

## Tool {#tool}

### `juicefs bench` {#bench}

Run benchmark, including read/write/stat for big and small files.
For a detailed introduction to the `bench` subcommand, refer to the [documentation](../benchmark/performance_evaluation_guide.md#juicefs-bench).

#### Synopsis

```shell
juicefs bench [command options] PATH

# Run benchmarks with 4 threads
juicefs bench /mnt/jfs -p 4

# Run benchmarks of only small files
juicefs bench /mnt/jfs --big-file-size 0
```

#### Options

|Items|Description|
|-|-|
|`--block-size=1`|block size in MiB (default: 1)|
|`--big-file-size=1024`|size of big file in MiB (default: 1024)|
|`--small-file-size=128`|size of small file in KiB (default: 128)|
|`--small-file-count=100`|number of small files (default: 100)|
|`--threads=1, -p 1`|number of concurrent threads (default: 1)|

### `juicefs objbench` {#objbench}

Run basic benchmarks on the target object storage to test if it works as expected. Read [documentation](../benchmark/performance_evaluation_guide.md#juicefs-objbench) for more.

#### Synopsis

```shell
juicefs objbench [command options] BUCKET

# Run benchmarks on S3
ACCESS_KEY=myAccessKey SECRET_KEY=mySecretKey juicefs objbench --storage=s3 https://mybucket.s3.us-east-2.amazonaws.com -p 6
```

#### Options

|Items|Description|
|-|-|
|`--storage=file`|Object storage type (e.g. `s3`, `gs`, `oss`, `cos`) (default: `file`, refer to [documentation](../reference/how_to_set_up_object_storage.md#supported-object-storage) for all supported object storage types)|
|`--access-key=value`|Access Key for object storage (can also be set via the environment variable `ACCESS_KEY`), see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#aksk) for more.|
|`--secret-key value`|Secret Key for object storage (can also be set via the environment variable `SECRET_KEY`), see [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md#aksk) for more.|
|`--session-token value` <VersionAdd>1.0</VersionAdd>|session token for object storage|
|`--shards`<VersionAdd>1.3</VersionAdd>|If your object storage limit speed in a bucket level (or you're using a self-hosted object storage with limited performance), you can store the blocks into N buckets by hash of key (default: 0), when N is greater than 0, `bucket` should to be in the form of `%d`, e.g. `--bucket "juicefs-%d"`. `--shards` cannot be changed afterwards and must be planned carefully ahead.|
|`--block-size=4096`|size of each IO block in KiB (default: 4096)|
|`--big-object-size=1024`|size of each big object in MiB (default: 1024)|
|`--small-object-size=128`|size of each small object in KiB (default: 128)|
|`--small-objects=100`|number of small objects (default: 100)|
|`--skip-functional-tests`|skip functional tests (default: false)|
|`--threads=4, -p 4`|number of concurrent threads (default: 4)|

### `juicefs warmup` {#warmup}

Download data to local cache in advance, to achieve better performance on application's first read. You can specify a mount point path to recursively warm-up all files under this path. You can also specify a file through the `--file` option to only warm-up the files contained in it.

If the files needing warming up resides in many different directories, you should specify their names in a text file, and pass to the `warmup` command using the `--file` option, allowing `juicefs warmup` to download concurrently, which is significantly faster than calling `juicefs warmup` multiple times, each with a single file.

#### Synopsis

```shell
juicefs warmup [command options] [PATH ...]

# Warm up all files in datadir
juicefs warmup /mnt/jfs/datadir

# Warm up selected files
echo '/jfs/f1
/jfs/f2
/jfs/f3' > /tmp/filelist.txt
juicefs warmup -f /tmp/filelist.txt
```

#### Options

|Items|Description|
|-|-|
|`--file=path, -f path`|file containing a list of paths (each line is a file path)|
|`--threads=50, -p 50`|number of concurrent workers, default to 50. Reduce this number in low bandwidth environment to avoid download timeouts|
|`--background, -b`|run in background (default: false)|
|`--evict` <VersionAdd>1.2</VersionAdd>|evict cached blocks|
|`--check` <VersionAdd>1.2</VersionAdd>|check whether the data blocks are cached or not|

### `juicefs rmr` {#rmr}

Remove all the files and subdirectories, similar to `rm -rf`, except this command deals with metadata directly (bypassing kernel), thus is much faster.

If trash is enabled, deleted files are moved into trash. Read more at [Trash](../security/trash.md).

#### Synopsis

```shell
juicefs rmr PATH ...

juicefs rmr /mnt/jfs/foo
```

#### Options

|Items|Description|
|-|-|
|`--skip-trash`<VersionAdd>1.3</VersionAdd>|skip trash and delete files directly (requires root)|
|`--threads=50, -p 50`<VersionAdd>1.3</VersionAdd>|number of threads for delete jobs (max 255)|

### `juicefs sync` {#sync}

Sync between two storage, read [Data migration](../guide/sync.md) for more.

#### Synopsis

```shell
juicefs sync [command options] SRC DST

# Sync object from OSS to S3
juicefs sync oss://mybucket.oss-cn-shanghai.aliyuncs.com s3://mybucket.s3.us-east-2.amazonaws.com

# Sync objects from S3 to JuiceFS
juicefs sync s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/

# SRC: a1/b1,a2/b2,aaa/b1   DST: empty   sync result: aaa/b1
juicefs sync --exclude='a?/b*' s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/

# SRC: a1/b1,a2/b2,aaa/b1   DST: empty   sync result: a1/b1,aaa/b1
juicefs sync --include='a1/b1' --exclude='a[1-9]/b*' s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/

# SRC: a1/b1,a2/b2,aaa/b1,b1,b2  DST: empty   sync result: b2
juicefs sync --include='a1/b1' --exclude='a*' --include='b2' --exclude='b?' s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/
```

As shown in the examples, the format of both source (`SRC`) and destination (`DST`) paths is:

```
[NAME://][ACCESS_KEY:SECRET_KEY[:TOKEN]@]BUCKET[.ENDPOINT][/PREFIX]
```

In which:

- `NAME`: JuiceFS supported data storage types like `s3`, `oss`, refer to [this document](../reference/how_to_set_up_object_storage.md#supported-object-storage) for a full list.
- `ACCESS_KEY` and `SECRET_KEY`: The credential required to access the data storage, special characters need to be [URL encoded](https://www.w3schools.com/tags/ref_urlencode.ASP), e.g. `/` must be substituted with `%2F`. If you are not familiar with AKSK management, refer to [this document](../reference/how_to_set_up_object_storage.md#aksk).
- `TOKEN` token used to access the object storage, as some object storage supports the use of temporary token to obtain permission for a limited time.
- `BUCKET[.ENDPOINT]`: The access address of the data storage service. The format may be different for different storage types, and refer to [the document](../reference/how_to_set_up_object_storage.md#supported-object-storage).
- `[/PREFIX]`: Optional, a prefix for the source and destination paths that can be used to limit synchronization of data only in certain paths.

#### Selection related options {#sync-selection-related-options}

|Items|Description|
|-|-|
|`--files-from` <VersionAdd>1.3</VersionAdd>|Only synchronize the objects recorded in the given file, where each line is the relative path of the object to be synchronized. If the object is a directory, it is recommended to end with `/`.|
|`--start=KEY, -s KEY, --end=KEY, -e KEY`|Provide object storage key range for syncing.|
|`--end KEY, -e KEY`|the last `KEY` to sync|
|`--exclude=PATTERN`|Exclude keys matching `PATTERN`. Refer to the ["Filtering"](../guide/sync.md#filtering) document to learn how to use it.|
|`--include=PATTERN`|Include keys matching `PATTERN`, need to be used with `--exclude`. Refer to the ["Filtering"](../guide/sync.md#filtering) document to learn how to use it.|
|`--match-full-path` <VersionAdd>1.2</VersionAdd>|Use "Full path filtering mode", default is false. Refer to the ["Filtering modes"](../guide/sync.md#filtering-mode) document to learn how to use it.|
|`--max-size-SIZE` <VersionAdd>1.2</VersionAdd>|skip files larger than `SIZE`|
|`--min-size-SIZE` <VersionAdd>1.2</VersionAdd>|skip files smaller than `SIZE`|
|`--max-age=DURATION` <VersionAdd>1.2</VersionAdd>|Skip files whose last modification time exceeds `DURATION`, in seconds. For example, `--max-age=3600` means to synchronize only files that have been modified within 1 hour.|
|`--min-age=DURATION` <VersionAdd>1.2</VersionAdd>|Skip files whose last modification time is no more than `DURATION`, in seconds. For example, `--min-age=3600` means to synchronize only files whose last modification time is more than 1 hour from the current time.|
|`--start-time` <VersionAdd>1.3</VersionAdd>|skip files modified before start-time. example: 2006-01-02 15:04:05|
|`--end-time` <VersionAdd>1.3</VersionAdd>|skip files modified after end-time. example: 2006-01-02 15:04:05|
|`--limit=-1`|Limit the number of objects that will be processed, default to -1 which means unlimited.|
|`--update, -u`|Update existing files if the source files' `mtime` is newer, default to false.|
|`--force-update, -f`|Always update existing file, default to false.|
|`--existing, --ignore-non-existing` <VersionAdd>1.1</VersionAdd> |Skip creating new files on destination, default to false.|
|`--ignore-existing` <VersionAdd>1.1</VersionAdd> |Skip updating files that already exist on destination, default to false.|

#### Action related options {#sync-action-related-options}

|Items|Description|
|-|-|
|`--dirs`|Sync empty directories as well.|
|`--perms`|Preserve permissions, default to false.|
|`--links, -l`|Copy symlinks as symlinks default to false.|
|`--inplace` <VersionAdd>1.2</VersionAdd>|When a file in the source path is modified, directly modify the file with the same name in the destination path instead of first writing a temporary file in the destination path and then atomically renaming the temporary file to the real file name. This option only makes sense when the `--update` option is enabled and the storage system of the destination path supports in-place modification of files (such as JuiceFS, HDFS, NFS). That is to say, if the storage system of the destination path is object storage, enable this option is invalid. (default: false)|
|`--delete-src, --deleteSrc`|Delete objects that already exist in destination. Different from rsync, files won't be deleted at the first run, instead they will be deleted at the next run, after files are successfully copied to the destination.|
|`--delete-dst, --deleteDst`|Delete extraneous objects from destination.|
|`--check-all`|Verify the integrity of all files in source and destination, default to false. Comparison is done on byte streams, which comes at a performance cost.|
|`--check-new`|Verify the integrity of newly copied files, default to false. Comparison is done on byte streams, which comes at a performance cost.|
|`--check-change` <VersionAdd>1.3</VersionAdd>|Verify whether the data has changed before and after synchronization, default is false. Based on file size and mtime, which is lightweight.|
|`--max-failure`<VersionAdd>1.3</VersionAdd> |max number of allowed failed files (-1 for unlimited)|
|`--dry`|Don't actually copy any file.|

#### Storage related options {#sync-storage-related-options}

|Items|Description|
|-|-|
|`--threads=10, -p 10`|Number of concurrent threads, default to 10.|
|`--list-threads=1` <VersionAdd>1.1</VersionAdd> |Number of `list` threads, default to 1. Read [concurrent `list`](../guide/sync.md#concurrent-list) to learn its usage.|
|`--list-depth=1` <VersionAdd>1.1</VersionAdd> |Depth of concurrent `list` operation, default to 1. Read [concurrent `list`](../guide/sync.md#concurrent-list) to learn its usage.|
|`--no-https`|Do not use HTTPS, default to false.|
|`--storage-class value` <VersionAdd>1.1</VersionAdd> |the storage class for destination|
|`--bwlimit=0`|Limit bandwidth in Mbps default to 0 which means unlimited.|

#### Cluster related options {#sync-cluster-related-options}

|Items| Description|
|-|-|
|`--manager-addr=ADDR`| The listening address of the Manager node in distributed synchronization mode in the format: `<IP>:[port]`. If not specified, it listens on a random port. If this option is omitted, it listens on a random local IPv4 address and a random port. |
|`--worker=ADDR,ADDR`| Worker node addresses used in distributed syncing, comma separated. |

#### Metrics related options {#sync-metircs-related-options}

|Items|Description|
|-|-|
|`--metrics value` <VersionAdd>1.2</VersionAdd>|address to export metrics (default: "127.0.0.1:9567")|
|`--consul value` <VersionAdd>1.2</VersionAdd>|Consul address to register (default: "127.0.0.1:8500")|

### `juicefs clone` <VersionAdd>1.1</VersionAdd> {#clone}

Quickly clone directories or files within a single JuiceFS mount point. The cloning process involves copying only the metadata without copying the data blocks, making it extremely fast. Read [Clone Files or Directories](../guide/clone.md) for more.

#### Synopsis

```shell
juicefs clone [command options] SRC DST

# Clone a file
juicefs clone /mnt/jfs/file1 /mnt/jfs/file2

# Clone a directory
juicefs clone /mnt/jfs/dir1 /mnt/jfs/dir2

# Preserve the UID, GID, and mode of the file
juicefs clone -p /mnt/jfs/file1 /mnt/jfs/file2
```

#### Options

|Items|Description|
|-|-|
|`--preserve, -p`|By default, the executor's UID and GID are used for the clone result, and the mode is recalculated based on the user's umask. Use this option to preserve the UID, GID, and mode of the file.|

### `juicefs compact` <VersionAdd>1.2</VersionAdd> {#compact}

Performs fragmentation optimization, merging, or cleaning of non-contiguous slices in the given directory to improve read performance. For detailed information, refer to [「Status Check and Maintenance」](../administration/status_check_and_maintenance.md).

#### Overview

```shell
juicefs compact [command options] PATH

# Perform fragmentation optimization on the specified directory
juicefs compact /mnt/jfs
```

#### Parameters

| Item | Description |
|-|-|
| `--threads, -p` | Number of threads to concurrently execute tasks (default: 10) |


================================================
FILE: docs/en/reference/fuse_mount_options.md
================================================
---
title: FUSE Mount Options
sidebar_position: 5
slug: /fuse_mount_options
---

JuiceFS provides several access methods, FUSE is the common one, which is the way to mount the file system locally using the `juicefs mount` command. Users can add FUSE mount options for more granular control.

This guide describes the common FUSE mount options for JuiceFS, with two ways to add mount options:

1. Run [`juicefs mount`](../reference/command_reference.mdx#mount), and use `-o` to specify multiple options separated by commas.

   ```bash
   juicefs mount -d -o allow_other,writeback_cache sqlite3://myjfs.db ~/jfs
   ```

2. When writing `/etc/fstab` items, add FUSE options directly to the `options` field, with multiple options separated by commas.

   ```
   # <file system>       <mount point>   <type>      <options>           <dump>  <pass>
   redis://localhost:6379/1    /jfs      juicefs     _netdev,writeback_cache   0       0
   ```

## default_permissions

This option is automatically enabled when JuiceFS is mounted and does not need to be explicitly specified. It will enable the kernel's file access checks, which are performed outside the filesystem. When enabled, both the kernel checks and the file system checks must succeed before further operations.

:::tip
The kernel performs standard Unix permission checks based on mode bits, UID/GID, and directory entry ownership.
:::

## allow_other

By default FUSE only allows access to the user mounting the file system. `allow_other` option overrides this behavior to allow access for other users. When mounting JuiceFS using root, `allow_other` is automatically assumed (search for `AllowOther` in [`fuse.go`](https://github.com/juicedata/juicefs/blob/main/pkg/fuse/fuse.go)). When mounting by non-root users, you'll need to first modify `/etc/fuse.conf` and enable `user_allow_other`, and then add `allow_other` to the mount command.

## writeback_cache

:::note
This mount option requires at least version 3.15 Linux kernel
:::

FUSE supports ["writeback-cache mode"](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt), which means the `write()` syscall can often complete rapidly. It's recommended to enable this mount option when write small data (e.g. 100 bytes) frequently.

## user_id and group_id

These options are used to specify the owner ID and owner group ID of the file system (as distinct from the UID and GID of a file or directory) for higher-level permission validation. If the allow_other option is specified, this option will not work. e.g. `sudo juicefs mount -o user_id=100,group_id=100`.

## debug

This option will output Debug information from the low-level library (`go-fuse`) to `juicefs.log`.

:::note
This option will output debug information for the low-level library (`go-fuse`) to `juicefs.log`. Note that this option is different from the global `-debug` option for the JuiceFS client, where the former outputs debug information for the `go-fuse` library and the latter outputs debug information for the JuiceFS client. see the documentation [Fault Diagnosis and Analysis](../administration/fault_diagnosis_and_analysis.md).
:::


================================================
FILE: docs/en/reference/how_to_set_up_metadata_engine.md
================================================
---
title: How to Set Up Metadata Engine
sidebar_position: 2
slug: /databases_for_metadata
description: JuiceFS supports Redis, TiKV, PostgreSQL, MySQL and other databases as metadata engines, and this article describes how to set up and use them.
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

:::tip
`META_PASSWORD` is supported from JuiceFS v1.0. You should [upgrade](../administration/upgrade.md) if you're still using older versions.
:::

JuiceFS is a decoupled structure that separates data and metadata. Metadata can be stored in any supported database (called Metadata Engine). Many databases are supported and they all comes with different performance and intended scenarios, refer to [our docs](../benchmark/metadata_engines_benchmark.md) for comparison.

## The storage usage of metadata {#storage-usage}

The storage space required for metadata is related to the length of the file name, the type and length of the file, and extended attributes. It is difficult to accurately estimate the metadata storage space requirements of a file system. For simplicity, we can approximate based on the storage space required for a single small file without extended attributes.

- **Key-Value Database** (e.g. Redis, TiKV): 300 bytes/file
- **Relational Database** (e.g. SQLite, MySQL, PostgreSQL): 600 bytes/file

When the average file is larger (over 64MB), or the file is frequently modified and has a lot of fragments, or there are many extended attributes, or the average file name is long (over 50 bytes), more storage space is needed.

When you need to migrate between two types of metadata engines, you can use this method to estimate the required storage space. For example, if you want to migrate the metadata engine from a relational database (MySQL) to a key-value database (Redis), and the current usage of MySQL is 30GB, then the target Redis needs to prepare at least 15GB or more of memory. The reverse is also true.

## Redis Compatible Database

### Redis

JuiceFS requires Redis version 4.0 and above. Redis Cluster is also supported, but in order to avoid transactions across different Redis instances, JuiceFS puts all metadata for one file system on a single Redis instance.

:::tip Redis Cluster Key Prefix
When using Redis Cluster, the database number in the URL is used as a **key prefix** rather than for actual database selection (since Redis Cluster only supports database 0). The prefix format is `{N}` (e.g., `{1}`, `{2}`), which uses Redis hash tags to ensure all keys for one volume are routed to the same slot. This allows multiple JuiceFS file systems to share a single Redis Cluster:

```shell
# Different volumes use different DB numbers as key prefixes
juicefs format redis://cluster:6379/1 volume1   # keys prefixed with {1}
juicefs format redis://cluster:6379/2 volume2   # keys prefixed with {2}
```

You can verify the keys in Redis Cluster using:

```shell
redis-cli -c -h <host> -p 6379 keys '{1}*'   # list all keys for volume with prefix {1}
```

:::

To ensure metadata security, JuiceFS requires [`maxmemory-policy noeviction`](https://redis.io/docs/reference/eviction/), otherwise it will try to set it to `noeviction` when starting JuiceFS, and will print a warning log if it fails. Refer to [Redis Best practices](../administration/metadata/redis_best_practices.md) for more.

#### Create a file system

When using Redis as the metadata storage engine, the following format is usually used to access the database:

<Tabs>
  <TabItem value="tcp" label="TCP">

```
redis[s]://[<username>:<password>@]<host>[:<port>]/<db>
```

  </TabItem>
  <TabItem value="unix-socket" label="Unix socket">

```
unix://[<username>:<password>@]<socket-file-path>?db=<db>
```

  </TabItem>
</Tabs>

Where `[]` enclosed are optional and the rest are mandatory.

- If the [TLS](https://redis.io/docs/manual/security/encryption) feature of Redis is enabled, the protocol header needs to use `rediss://`, otherwise use `redis://`.
- `<username>` is introduced after Redis 6.0 and can be ignored if there is no username, but the `:` colon in front of the password needs to be kept, e.g. `redis://:<password>@<host>:6379/1`.
- The default port number on which Redis listens is `6379`, which can be left blank if the default port number is not changed, e.g. `redis://:<password>@<host>/1`.
- Redis supports multiple [logical databases](https://redis.io/commands/select), please replace `<db>` with the actual database number used.
- If you need to connect to Redis Sentinel, the format will be slightly different, refer to [Redis Best Practices](../administration/metadata/redis_best_practices.md#high-availability) for details.
- If username / password contains special characters, use single quote to avoid unexpected shell interpretations, or use the `REDIS_PASSWORD` environment.

:::tip
A Redis instance can, by default, create a total of 16 logical databases, with each of these databases eligible for the creation of a singular JuiceFS file system. Thus, under ordinary circumstances, a single Redis instance may be utilized to form up to 16 JuiceFS file systems. However, it is crucial to note that the logical databases intended for use with JuiceFS must not be shared with other applications, as doing so could lead to data inconsistencies.
:::

For example, the following command will create a JuiceFS file system named `pics`, using the database No. `1` in Redis to store metadata:

```shell
juicefs format \
    --storage s3 \
    ... \
    "redis://:mypassword@192.168.1.6:6379/1" \
    pics
```

For security purposes, it is recommended to pass the password using the environment variable `META_PASSWORD` or `REDIS_PASSWORD`, e.g.

```shell
export META_PASSWORD=mypassword
```

Similarly, the password can be provided from a file using:

```shell
export META_PASSWORD_FILE=/secret/mypassword.txt
```

Then there is no need to set a password in the metadata URL.

```shell
juicefs format \
    --storage s3 \
    ... \
    "redis://192.168.1.6:6379/1" \
    pics
```

#### Mount a file system

If you need to share the same file system across multiple nodes, ensure that all nodes has access to the Metadata Engine.

```shell
juicefs mount -d "redis://:mypassword@192.168.1.6:6379/1" /mnt/jfs
```

Passing passwords with the `META_PASSWORD` or `REDIS_PASSWORD` environment variables is also supported.

```shell
export META_PASSWORD=mypassword
juicefs mount -d "redis://192.168.1.6:6379/1" /mnt/jfs
```

Similarly, the password can be provided from a file using as follows:

```shell
export META_PASSWORD_FILE=/secret/mypassword.txt
juicefs mount -d "redis://192.168.1.6:6379/1" /mnt/jfs
```

#### Set up TLS

JuiceFS supports both TLS server-side encryption authentication and mTLS mutual encryption authentication connections to Redis. When connecting to Redis via TLS or mTLS, use the `rediss://` protocol header. However, when using TLS server-side encryption authentication, it is not necessary to specify the client certificate and private key.

:::note
Using Redis mTLS requires JuiceFS version 1.1.0 and above
:::

If Redis server has enabled mTLS feature, it is necessary to provide client certificate, private key, and CA certificate that issued the client certificate to connect. In JuiceFS, mTLS can be used in the following way:

```shell
juicefs format --storage s3 \
    ... \
    "rediss://192.168.1.6:6379/1?tls-cert-file=/etc/certs/client.crt&tls-key-file=/etc/certs/client.key&tls-ca-cert-file=/etc/certs/ca.crt"
    pics
```

In the code mentioned above, we use the `rediss://` protocol header to enable mTLS functionality, and then use the following options to specify the path of the client certificate:

- `tls-cert-file=<path>`: The path of the client certificate.
- `tls-key-file=<path>`: The path of the private key.
- `tls-ca-cert-file=<path>`: The path of the CA certificate. It is optional. If it is not specified, the system CA certificate will be used.
- `insecure-skip-verify=true` It can skip verifying the server certificate.

When specifying options in a URL, start with the `?` symbol and use the `&` symbol to separate multiple options, for example: `?tls-cert-file=client.crt&tls-key-file=client.key`.

In the above example, `/etc/certs` is just a directory name. Replace it with your actual certificate directory when using it, which can be a relative or absolute path.

### KeyDB

[KeyDB](https://keydb.dev) is an open source fork of Redis, developed to stay aligned with the Redis community. KeyDB implements multi-threading support, better memory utilization, and greater throughput on top of Redis, and also supports [Active Replication](https://github.com/JohnSully/KeyDB/wiki/Active-Replication), i.e., the Active Active feature.

:::note
Same as Redis, the Active Replication is asynchronous, which may cause consistency issues. So use with caution!
:::

When being used as metadata storage engine for Juice, KeyDB is used exactly in the same way as Redis. So please refer to the [Redis](#redis) section for usage.

## Key-Value Database

### BadgerDB

[BadgerDB](https://github.com/dgraph-io/badger) is an embedded, persistent, and standalone Key-Value database developed in pure Go. The database files are stored locally in the specified directory.

When using BadgerDB as the JuiceFS metadata storage engine, use `badger://` to specify the database path.

#### Create a file system

You only need to create a file system for use, and there is no need to create a BadgerDB database in advance.

```shell
juicefs format badger://$HOME/badger-data myjfs
```

This command creates `badger-data` as a database directory in the `home` directory of the current user, which is used as metadata storage for JuiceFS.

#### Mount a file system

The database path needs to be specified when mounting the file system.

```shell
juicefs mount -d badger://$HOME/badger-data /mnt/jfs
```

:::tip
BadgerDB only allows single-process access. If you need to perform operations like `gc`, `fsck`, `dump`, and `load`, you need to unmount the file system first.
:::

### TiKV

[TiKV](https://tikv.org) is a distributed transactional Key-Value database. It is originally developed by PingCAP as the storage layer for their flagship product TiDB. Now TiKV is an independent open source project, and is also a granduated project of CNCF.

By using the official tool TiUP, you can easily build a local playground for testing (refer [here](https://tikv.org/docs/latest/concepts/tikv-in-5-minutes) for details). Production environment generally requires at least three hosts to store three data replicas (refer to the [official document](https://tikv.org/docs/latest/deploy/install/install) for all deployment steps).

:::note
It's recommended to use dedicated TiKV 5.0+ cluster as the metadata engine for JuiceFS.
:::

#### Create a file system

When using TiKV as the metadata storage engine, parameters needs to be specified as the following format:

```shell
tikv://<pd_addr>[,<pd_addr>...]/<prefix>
```

The `prefix` is a user-defined string, which can be used to distinguish multiple file systems or applications when they share the same TiKV cluster. For example:

```shell
juicefs format \
    --storage s3 \
    ... \
    "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" \
    pics
```

#### Set up TLS

If you need to enable TLS, you can set the TLS configuration item by adding the query parameter after the metadata URL. Currently supported configuration items:

| Name        | Value                                                                                                                                                      |
|-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `ca`        | CA root certificate, used to connect TiKV/PD with TLS                                                                                                      |
| `cert`      | certificate file path, used to connect TiKV/PD with TLS                                                                                                    |
| `key`       | private key file path, used to connect TiKV/PD with TLS                                                                                                    |
| `verify-cn` | verify component caller's identity, [reference link](https://docs.pingcap.com/tidb/stable/enable-tls-between-components#verify-component-callers-identity) |

For example:

```shell
juicefs format \
    --storage s3 \
    ... \
    "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs?ca=/path/to/ca.pem&cert=/path/to/tikv-server.pem&key=/path/to/tikv-server-key.pem&verify-cn=CN1,CN2" \
    pics
```

#### Mount a file system

```shell
juicefs mount -d "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" /mnt/jfs
```

### etcd

[etcd](https://etcd.io) is a small-scale key-value database with high availability and reliability, which can be used as metadata storage for JuiceFS.

#### Create a file system

When using etcd as the metadata engine, the `Meta-URL` parameter needs to be specified in the following format:

```
etcd://[user:password@]<addr>[,<addr>...]/<prefix>
```

Where `user` and `password` are required when etcd enables user authentication. The `prefix` is a user-defined string. When multiple file systems or applications share an etcd cluster, setting the prefix can avoid confusion and conflict. An example is as follows:

```shell
juicefs format etcd://user:password@192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs pics
```

#### Set up TLS

If you need to enable TLS, set the TLS configuration item by adding the query parameter after the metadata URL, use absolute path for certificate files to avoid file not found error.

| Name                   | Value                 |
|------------------------|-----------------------|
| `cacert`               | CA root certificate   |
| `cert`                 | certificate file path |
| `key`                  | private key file path |
| `server-name`          | name of server        |
| `insecure-skip-verify` | 1                     |

For example:

```shell
juicefs format \
    --storage s3 \
    ... \
    "etcd://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs?cert=/path/to/ca.pem&cacert=/path/to/etcd-server.pem&key=/path/to/etcd-key.pem&server-name=etcd" \
    pics
```

#### Mount a file system

```shell
juicefs mount -d "etcd://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" /mnt/jfs
```

:::note
When mounting to the background, the path to the certificate needs to use an absolute path.
:::

### FoundationDB <VersionAdd>1.1</VersionAdd>

[FoundationDB](https://www.foundationdb.org) is a distributed database that can hold large-scale structured data on multiple clustered servers. The database system focuses on high performance, high scalability, and good fault tolerance. Using FoundationDB as the metadata engine requires its client library, so by default it is not enabled in the JuiceFS released binaries. If you need to use it, please compile it yourself.

#### Compile JuiceFS

First, you need to install the FoundationDB client library (refer to the [official documentation](https://apple.github.io/foundationdb/api-general.html#installing-client-binaries) for more details):

<Tabs>
  <TabItem value="debian" label="Debian and derivatives">

```shell
curl -O https://github.com/apple/foundationdb/releases/download/6.3.25/foundationdb-clients_6.3.25-1_amd64.deb
sudo dpkg -i foundationdb-clients_6.3.25-1_amd64.deb
```

  </TabItem>
  <TabItem value="centos" label="RHEL and derivatives">

```shell
curl -O https://github.com/apple/foundationdb/releases/download/6.3.25/foundationdb-clients-6.3.25-1.el7.x86_64.rpm
sudo rpm -Uvh foundationdb-clients-6.3.25-1.el7.x86_64.rpm
```

  </TabItem>
</Tabs>

Then, compile JuiceFS supporting FoundationDB:

```shell
make juicefs.fdb
```

#### Create a file system

When using FoundationDB as the metadata engine, the `Meta-URL` parameter needs to be specified in the following format:

```uri
fdb://[config file address]?prefix=<prefix>
```

The `<cluster_file_path>` is the FoundationDB configuration file path, which is used to connect to the FoundationDB server. The `<prefix>` is a user-defined string, which can be used to distinguish multiple file systems or applications when they share the same FoundationDB cluster. For example:

```shell
juicefs.fdb format \
    --storage s3 \
    ... \
    "fdb:///etc/foundationdb/fdb.cluster?prefix=jfs" \
    pics
```

#### Set up TLS

If you need to enable TLS, the general steps are as follows. For details, please refer to [official documentation](https://apple.github.io/foundationdb/tls.html).

##### Use OpenSSL to generate a CA certificate

```shell
openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout private.key -out cert.crt
cat cert.crt private.key > fdb.pem
```

##### Configure TLS

| Command-line Option    | Client Option      | Environment Variable       | Purpose                                                                    |
|------------------------|--------------------|----------------------------|----------------------------------------------------------------------------|
| `tls_certificate_file` | `TLS_cert_path`    | `FDB_TLS_CERTIFICATE_FILE` | Path to the file from which the local certificates can be loaded           |
| `tls_key_file`         | `TLS_key_path`     | `FDB_TLS_KEY_FILE`         | Path to the file from which to load the private key                        |
| `tls_verify_peers`     | `tls_verify_peers` | `FDB_TLS_VERIFY_PEERS`     | The byte-string for the verification of peer certificates and sessions     |
| `tls_password`         | `tls_password`     | `FDB_TLS_PASSWORD`         | The byte-string representing the passcode for unencrypting the private key |
| `tls_ca_file`          | `TLS_ca_path`      | `FDB_TLS_CA_FILE`          | Path to the file containing the CA certificates to trust                   |

##### Configure the server

The TLS parameters can be configured in `foundationdb.conf` or environment variables, as shown in the following configuration files (emphasis on the `[foundationdb.4500]` configuration).

```ini title="foundationdb.conf"
[fdbmonitor]
user = foundationdb
group = foundationdb

[general]
restart-delay = 60
## by default, restart-backoff = restart-delay-reset-interval = restart-delay
# initial-restart-delay = 0
# restart-backoff = 60
# restart-delay-reset-interval = 60
cluster-file = /etc/foundationdb/fdb.cluster
# delete-envvars =
# kill-on-configuration-change = true
## Default parameters for individual fdbserver processes

[fdbserver]
command = /usr/sbin/fdbserver
#public-address = auto:$ID
#listen-address = public
datadir = /var/lib/foundationdb/data/$ID
logdir = /var/log/foundationdb
# logsize = 10MiB
# maxlogssize = 100MiB
# machine-id =
# datacenter-id =
# class =
# memory = 8GiB
# storage-memory = 1GiB
# cache-memory = 2GiB
# metrics-cluster =
# metrics-prefix =

[fdbserver.4500]
Public - address = 127.0.0.1:4500: TLS
listen-address = public
tls_certificate_file = /etc/foundationdb/fdb.pem
tls_ca_file = /etc/foundationdb/cert.crt
tls_key_file = /etc/foundationdb/private.key
tls_verify_peers= Check.Valid=0

[backup_agent]
command = /usr/lib/foundationdb/backup_agent/backup_agent
logdir = /var/log/foundationdb

[backup_agent.1]
```

In addition, you need to add the suffix `:tls` after the address in `fdb.cluster`, `fdb.cluster` is as follows:

```uri title="fdb.cluster"
U6pT9Jhl:ClZfjAWM@127.0.0.1:4500:tls
```

##### Configure the client

You need to configure TLS parameters and `fdb.cluster` on the client machine, `fdbcli` is the same.

Connected by `fdbcli`:

```shell
fdbcli --tls_certificate_file=/etc/foundationdb/fdb.pem \
       --tls_ca_file=/etc/foundationdb/cert.crt \
       --tls_key_file=/etc/foundationdb/private.key \
       --tls_verify_peers=Check.Valid=0
```

Connected by API (`fdbcli` also applies):

```shell
export FDB_TLS_CERTIFICATE_FILE=/etc/foundationdb/fdb.pem \
export FDB_TLS_CA_FILE=/etc/foundationdb/cert.crt \
export FDB_TLS_KEY_FILE=/etc/foundationdb/private.key \
export FDB_TLS_VERIFY_PEERS=Check.Valid=0
```

#### Mount a file system

```shell
juicefs.fdb mount -d \
    "fdb:///etc/foundationdb/fdb.cluster?prefix=jfs" \
    /mnt/jfs
```

## SQL Database

Each database can only be used by one JuiceFS file system by default. If you want multiple file systems to share a database, you can achieve this by adding a `table_prefix` <VersionAdd>1.3</VersionAdd> query parameter in the META-URL to add different table prefixes for different file systems.
For example: `mysql://user:mypassword@(192.168.1.6:3306)/juicefs?table_prefix=volume1`

### MySQL

[MySQL](https://www.mysql.com) is one of the most popular open source relational databases, and is often preferred for web applications.

>[MariaDB](https://mariadb.org) is an open source branch of MySQL, maintained by the original developers of MySQL. With its high compatibility with MySQL, setting up the Meta engine in MariaDB uses the same parameters and configurations as MySQL.
>
>[OceanBase](https://en.oceanbase.com) is a self-developed distributed relational database designed for processing massive data and high-concurrency transactions. It features high performance, strong consistency, and high availability. OceanBase is also highly compatible with MySQL, allowing the metadata engine to be configured in the same way.

#### Create a file system

When using MySQL as the metadata storage engine, you need to create a database manually before create the file system. The command with the following format is usually used to access the database:

<Tabs>
  <TabItem value="tcp" label="TCP">

```
mysql://<username>[:<password>]@(<host>:3306)/<database-name>
```

  </TabItem>
  <TabItem value="unix-socket" label="Unix socket">

```
mysql://<username>[:<password>]@unix(<socket-file-path>)/<database-name>
```

  </TabItem>
</Tabs>

:::note

1. Don't leave out the `()` brackets on either side of the URL.
2. Special characters in passwords do not require url encoding

:::

For example:

```shell
juicefs format \
    --storage s3 \
    ... \
    "mysql://user:mypassword@(192.168.1.6:3306)/juicefs" \
    pics
```

A more secure approach would be to pass the database password through the environment variable `META_PASSWORD`:

```shell
export META_PASSWORD="mypassword"
juicefs format \
    --storage s3 \
    ... \
    "mysql://user@(192.168.1.6:3306)/juicefs" \
    pics
```

Or equivalently:

```shell
export META_PASSWORD_FILE="/secret/mypassword.txt"
juicefs format \
    --storage s3 \
    ... \
    "mysql://user@(192.168.1.6:3306)/juicefs" \
    pics
```

To connect to a TLS enabled MySQL server, pass the `tls=true` parameter (or `tls=skip-verify` if using a self-signed certificate):

```shell
juicefs format \
    --storage s3 \
    ... \
    "mysql://user:mypassword@(192.168.1.6:3306)/juicefs?tls=true" \
    pics
```

#### Mount a file system

```shell
juicefs mount -d "mysql://user:mypassword@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

Passing password with the `META_PASSWORD` environment variable is also supported when mounting a file system.

```shell
export META_PASSWORD="mypassword"
juicefs mount -d "mysql://user@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

Passing the password using a file is also supported as follows:

```shell
export META_PASSWORD_FILE="/secret/mypassword.txt"
juicefs mount -d "mysql://user@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

To connect to a TLS enabled MySQL server, pass the `tls=true` parameter (or `tls=skip-verify` if using a self-signed certificate):

```shell
juicefs mount -d "mysql://user:mypassword@(192.168.1.6:3306)/juicefs?tls=true" /mnt/jfs
```

For more examples of MySQL database address format, please refer to [Go-MySQL-Driver](https://github.com/Go-SQL-Driver/MySQL/#examples).

### PostgreSQL

[PostgreSQL](https://www.postgresql.org) is a powerful open source relational database with a perfect ecosystem and rich application scenarios, and it also works as the metadata engine of JuiceFS.

Many cloud computing platforms offer hosted PostgreSQL database services, or you can deploy one yourself by following the [Usage Wizard](https://www.postgresqltutorial.com/postgresql-getting-started).

Other PostgreSQL-compatible databases (such as CockroachDB) can also be used as metadata engine.

#### Create a file system

When using PostgreSQL as the metadata storage engine, you need to create a database manually before creating the file system by following the format below:

<Tabs>
  <TabItem value="tcp" label="TCP">

```
postgres://[username][:<password>]@<host>[:5432]/<database-name>[?parameters]
```

  </TabItem>
  <TabItem value="unix-socket" label="Unix socket">

```
postgres://[username][:<password>]@/<database-name>?host=<socket-directories-path>[&parameters]
```

  </TabItem>
</Tabs>

Where `[]` enclosed are optional and the rest are mandatory.

For example:

```shell
juicefs format \
    --storage s3 \
    ... \
    "postgres://user:mypassword@192.168.1.6:5432/juicefs" \
    pics
```

A more secure approach would be to pass the database password through the environment variable `META_PASSWORD`:

```shell
export META_PASSWORD="mypassword"
juicefs format \
    --storage s3 \
    ... \
    "postgres://user@192.168.1.6:5432/juicefs" \
    pics
```

The password can also be passed using a file as follows:

```shell
export META_PASSWORD_FILE="/secret/mypassword.txt"
juicefs format \
    --storage s3 \
    ... \
    "postgres://user@192.168.1.6:5432/juicefs" \
    pics
```

:::note

1. JuiceFS uses public [schema](https://www.postgresql.org/docs/current/ddl-schemas.html) by default, if you want to use a `non-public schema`,  you need to specify `search_path` in the connection string parameter. e.g `postgres://user:mypassword@192.168.1.6:5432/juicefs?search_path=pguser1`
2. If the `public schema` is not the first hit in the `search_path` configured on the PostgreSQL server, the `search_path` parameter must be explicitly set in the connection string.
3. The `search_path` connection parameter can be set to multiple schemas natively, but currently JuiceFS only supports setting one. `postgres://user:mypassword@192.168.1.6:5432/juicefs?search_path=pguser1,public` will be considered illegal.
4. Special characters in the password need to be replaced by url encoding. For example, `|` needs to be replaced with `%7C`.

:::

#### Mount a file system

```shell
juicefs mount -d "postgres://user:mypassword@192.168.1.6:5432/juicefs" /mnt/jfs
```

Passing password with the `META_PASSWORD` environment variable is also supported when mounting a file system.

```shell
export META_PASSWORD="mypassword"
juicefs mount -d "postgres://user@192.168.1.6:5432/juicefs" /mnt/jfs
```

Passing a password using a file is also supported as follows:

```shell
export META_PASSWORD_FILE="/secret/mypassword.txt"
juicefs mount -d "postgres://user@192.168.1.6:5432/juicefs" /mnt/jfs
```

#### Troubleshooting

The JuiceFS client connects to PostgreSQL via SSL encryption by default. If you encountered an error saying `pq: SSL is not enabled on the server`, you need to enable SSL encryption for PostgreSQL according to your own business scenario, or you can disable it by adding a parameter to the metadata URL Validation.

```shell
juicefs format \
    --storage s3 \
    ... \
    "postgres://user@192.168.1.6:5432/juicefs?sslmode=disable" \
    pics
```

Additional parameters can be appended to the metadata URL. More details can be seen [here](https://pkg.go.dev/github.com/lib/pq#hdr-Connection_String_Parameters).

### SQLite

[SQLite](https://sqlite.org) is a widely used small, fast, single-file, reliable and full-featured SQL database engine.

The SQLite database has only one file, which is very flexible to create and use. When using SQLite as the JuiceFS metadata storage engine, there is no need to create a database file in advance, and you can directly create a file system:

```shell
juicefs format \
    --storage s3 \
    ... \
    "sqlite3://my-jfs.db" \
    pics
```

Executing the above command will automatically create a database file named `my-jfs.db` in the current directory. **Please keep this file properly**!

Mount the file system:

```shell
juicefs mount -d "sqlite3://my-jfs.db" /mnt/jfs/
```

Please note the location of the database file, if it is not in the current directory, you need to specify the absolute path to the database file, e.g.

```shell
juicefs mount -d "sqlite3:///home/herald/my-jfs.db" /mnt/jfs/
```

One can also add driver supported [PRAGMA Statements](https://www.sqlite.org/pragma.html) to the connection string like:

```shell
"sqlite3://my-jfs.db?cache=shared&_busy_timeout=5000"
```

For more examples of SQLite database address format, please refer to [Go-SQLite3 Driver](https://github.com/mattn/go-sqlite3#connection-string).

:::note
Since SQLite is a single-file database, usually only the host where the database is located can access it. Therefore, SQLite database is more suitable for standalone use. For multiple servers sharing the same file system, it is recommended to use databases such as Redis or MySQL.
:::


================================================
FILE: docs/en/reference/how_to_set_up_object_storage.md
================================================
---
title: How to Set Up Object Storage
sidebar_position: 3
description: This article introduces the object storages supported by JuiceFS and how to configure and use it.
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

As you can learn from [JuiceFS Technical Architecture](../introduction/architecture.md), JuiceFS is a distributed file system with data and metadata stored separately. JuiceFS uses object storage as the main data storage and uses databases such as Redis, PostgreSQL and MySQL as metadata storage.

## Storage options {#storage-options}

When creating a JuiceFS file system, there are following options to set up the storage:

- `--storage`: Specify the type of storage to be used by the file system, e.g. `--storage s3`
- `--bucket`: Specify the storage access address, e.g. `--bucket https://myjuicefs.s3.us-east-2.amazonaws.com`
- `--access-key` and `--secret-key`: Specify the authentication information when accessing the storage

For example, the following command uses Amazon S3 object storage to create a file system:

```shell
juicefs format --storage s3 \
    --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \
    --access-key abcdefghijklmn \
    --secret-key nmlkjihgfedAcBdEfg \
    redis://192.168.1.6/1 \
    myjfs
```

## Other options {#other-options}

When executing the `juicefs format` or `juicefs mount` command, you can set some special options in the form of URL parameters in the `--bucket` option, such as `tls-insecure-skip-verify=true` in `https://myjuicefs.s3.us-east-2.amazonaws.com?tls-insecure-skip-verify=true` is to skip the certificate verification of HTTPS requests.

Client certificates are also supported as they are commonly used for mTLS connections, for example:
`https://myjuicefs.s3.us-east-2.amazonaws.com?ca-certs=./path/to/ca&ssl-cert=./path/to/cert&ssl-key=./path/to/privatekey`

## Enable data sharding {#enable-data-sharding}

When creating a file system, multiple buckets can be defined as the underlying storage of the file system through the [`--shards`](../reference/command_reference.mdx#format-data-format-options) option. In this way, the system will distribute the files to multiple buckets based on the hashed value of the file name. Data sharding technology can distribute the load of concurrent writing of large-scale data to multiple buckets, thereby improving the writing performance.

The following are points to note when using the data sharding function:

- The `--shards` option accepts an integer between 0 and 256, indicating how many Buckets the files will be scattered into. The default value is 0, indicating that the data sharding function is not enabled.
- Only multiple buckets under the same object storage can be used.
- The integer wildcard `%d` needs to be used to specify the buckets, for example, `"http://192.168.1.18:9000/myjfs-%d"`. Buckets can be created in advance in this format, or automatically created by the JuiceFS client when creating a file system.
- The data sharding is set at the time of creation and cannot be modified after creation. You cannot increase or decrease the number of buckets, nor cancel the shards function.

For example, the following command creates a file system with 4 shards.

```shell
juicefs format --storage s3 \
    --shards 4 \
    --bucket "https://myjfs-%d.s3.us-east-2.amazonaws.com" \
    ...
```

After executing the above command, the JuiceFS client will create 4 buckets named `myjfs-0`, `myjfs-1`, `myjfs-2`, and `myjfs-3`.

## Access Key and Secret Key {#aksk}

In general, object storages are authenticated with Access Key ID and Access Key Secret. For JuiceFS file system, they are provided by options `--access-key` and `--secret-key` (or AK, SK for short).

It is more secure to pass credentials via environment variables `ACCESS_KEY` and `SECRET_KEY` instead of explicitly specifying the options `--access-key` and `--secret-key` in the command line when creating a filesystem, e.g.,

```shell
export ACCESS_KEY=abcdefghijklmn
export SECRET_KEY=nmlkjihgfedAcBdEfg
juicefs format --storage s3 \
    --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \
    redis://192.168.1.6/1 \
    myjfs
```

Public clouds typically allow users to create IAM (Identity and Access Management) roles, such as [AWS IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) or [Alibaba Cloud RAM role](https://www.alibabacloud.com/help/doc-detail/110376.htm), which can be assigned to VM instances. If the cloud server instance already has read and write access to the object storage, there is no need to specify `--access-key` and `--secret-key`.

## Use temporary access credentials {#session-token}

Permanent access credentials generally have two parts, Access Key, Secret Key, while temporary access credentials generally include three parts, Access Key, Secret Key and token, and temporary access credentials have an expiration time, usually between a few minutes and a few hours.

### How to get temporary credentials {#how-to-get-temporary-credentials}

Different cloud vendors have different acquisition methods. Generally, the Access Key, Secret Key and ARN representing the permission boundary of the temporary access credential are required as parameters to request access to the STS server of the cloud service vendor to obtain the temporary access credential. This process can generally be simplified by the SDK provided by the cloud vendor. For example, Amazon S3 can refer to this [link](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html) to obtain temporary credentials, and Alibaba Cloud OSS can refer to this [link](https://www.alibabacloud.com/help/en/object-storage-service/latest/use-a-temporary-credential-provided-by-sts-to-access-oss).

### How to set up object storage with temporary access credentials {#how-to-set-up-object-storage-with-temporary-access-credentials}

The way of using temporary credentials is not much different from using permanent credentials. When formatting the file system, pass the Access Key, Secret Key, and token of the temporary credentials through `--access-key`, `--secret-key`, `--session-token` can set the value. E.g:

```bash
juicefs format \
    --storage oss \
    --access-key xxxx \
    --secret-key xxxx \
    --session-token xxxx \
    --bucket https://bucketName.oss-cn-hangzhou.aliyuncs.com \
    redis://localhost:6379/1 \
    test1
```

Since temporary credentials expire quickly, the key is how to update the temporary credentials that JuiceFS uses after `format` the file system before the temporary credentials expire. The credential update process is divided into two steps:

1. Before the temporary certificate expires, apply for a new temporary certificate;
2. Without stopping the running JuiceFS, use the `juicefs config Meta-URL --access-key xxxx --secret-key xxxx --session-token xxxx` command to hot update the access credentials.

Newly mounted clients will use the new credentials directly, and all clients already running will also update their credentials within a minute. The entire update process will not affect the running business. Due to the short expiration time of the temporary credentials, the above steps need to **be executed in a long-term loop** to ensure that the JuiceFS service can access the object storage normally.

## Internal and public endpoint {#internal-and-public-endpoint}

Typically, object storage services provide a unified URL for access, but the cloud platform usually provides both internal and external endpoints. For example, the platform cloud services that meet the criteria will automatically resolve requests to the internal endpoint of the object storage. This offers you a lower latency, and internal network traffic is free.

Some cloud computing platforms also distinguish between internal and public networks, but instead of providing a unified access URL, they provide separate internal Endpoint and public Endpoint addresses.

JuiceFS also provides flexible support for this object storage service that distinguishes between internal and public addresses. For scenarios where the same file system is shared, the object storage is accessed through internal Endpoint on the servers that meet the criteria, and other computers are accessed through public Endpoint, which can be used as follows:

- **When creating a file system**: It is recommended to use internal Endpoint address for `--bucket`
- **When mounting a file system**: For clients that do not satisfy the internal line, you can specify a public Endpoint address to `--bucket`.

Creating a file system using an internal Endpoint ensures better performance and lower latency, and for clients that cannot be accessed through an internal address, you can specify a public Endpoint to mount with the option `--bucket`.

## Storage class <VersionAdd>1.1</VersionAdd> {#storage-class}

Object storage usually supports multiple storage classes, such as standard storage, infrequent access storage, and archive storage. Different storage classes will have different prices and availability, you can set the default storage class with the [`--storage-class`](../reference/command_reference.mdx#format-data-storage-options) option when creating the JuiceFS file system, or set a new storage class with the [`--storage-class`](../reference/command_reference.mdx#mount-data-storage-options) option when mounting the JuiceFS file system. Please refer to the user manual of the object storage you are using to see how to set the value of the `--storage-class` option (such as [Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass)).

:::note
When using certain storage classes (such as archive and deep archive), the data cannot be accessed immediately, and the data needs to be restored in advance and accessed after a period of time.
:::

:::note
When using certain storage classes (such as infrequent access), there are minimum bill units, and additional charges may be incurred for reading data. Please refer to the user manual of the object storage you are using for details.
:::

## Using proxy {#using-proxy}

If the network environment where the client is located is affected by firewall policies or other factors that require access to external object storage services through a proxy, the corresponding proxy settings are different for different operating systems. Please refer to the corresponding user manual for settings.

On Linux, for example, the proxy can be set by creating `http_proxy` and `https_proxy` environment variables.

```shell
export http_proxy=http://localhost:8035/
export https_proxy=http://localhost:8035/
juicefs format \
    --storage s3 \
    ... \
    myjfs
```

## Supported object storage {#supported-object-storage}

If you wish to use a storage system that is not listed, feel free to submit a requirement [issue](https://github.com/juicedata/juicefs/issues).

| Name                                                        | Value      |
|:-----------------------------------------------------------:|:----------:|
| [Amazon S3](#amazon-s3)                                     | `s3`       |
| [Google Cloud Storage](#google-cloud)                       | `gs`       |
| [Azure Blob Storage](#azure-blob-storage)                   | `wasb`     |
| [Backblaze B2](#backblaze-b2)                               | `b2`       |
| [IBM Cloud Object Storage](#ibm-cloud-object-storage)       | `ibmcos`   |
| [Oracle Cloud Object Storage](#oracle-cloud-object-storage) | `s3`       |
| [Scaleway Object Storage](#scaleway-object-storage)         | `scw`      |
| [DigitalOcean Spaces](#digitalocean-spaces)                 | `space`    |
| [Wasabi](#wasabi)                                           | `wasabi`   |
| [Telnyx Cloud Storage](#telnyx)                             | `s3`       |
| [Storj DCS](#storj-dcs)                                     | `s3`       |
| [Vultr Object Storage](#vultr-object-storage)               | `s3`       |
| [Cloudflare R2](#r2)                                        | `s3`       |
| [Bunny Storage](#bunny)                                     | `bunny`    |
| [Alibaba Cloud OSS](#alibaba-cloud-oss)                     | `oss`      |
| [Tencent Cloud COS](#tencent-cloud-cos)                     | `cos`      |
| [Huawei Cloud OBS](#huawei-cloud-obs)                       | `obs`      |
| [Baidu Object Storage](#baidu-object-storage)               | `bos`      |
| [Volcano Engine TOS](#volcano-engine-tos)                   | `tos`      |
| [Kingsoft Cloud KS3](#kingsoft-cloud-ks3)                   | `ks3`      |
| [QingStor](#qingstor)                                       | `qingstor` |
| [Qiniu](#qiniu)                                             | `qiniu`    |
| [CTYun OOS](#ctyun-oos)                                     | `oos`      |
| [ECloud Object Storage](#ecloud-object-storage)             | `eos`      |
| [JD Cloud OSS](#jd-cloud-oss)                               | `s3`       |
| [UCloud US3](#ucloud-us3)                                   | `ufile`    |
| [Ceph RADOS](#ceph-rados)                                   | `ceph`     |
| [Ceph RGW](#ceph-rgw)                                       | `s3`       |
| [Gluster](#gluster)                                         | `gluster`  |
| [Swift](#swift)                                             | `swift`    |
| [MinIO](#minio)                                             | `minio`    |
| [WebDAV](#webdav)                                           | `webdav`   |
| [HDFS](#hdfs)                                               | `hdfs`     |
| [Apache Ozone](#apache-ozone)                               | `s3`       |
| [Redis](#redis)                                             | `redis`    |
| [TiKV](#tikv)                                               | `tikv`     |
| [etcd](#etcd)                                               | `etcd`     |
| [SQLite](#sqlite)                                           | `sqlite3`  |
| [MySQL](#mysql)                                             | `mysql`    |
| [PostgreSQL](#postgresql)                                   | `postgres` |
| [Local disk](#local-disk)                                   | `file`     |
| [SFTP/SSH](#sftp)                                           | `sftp`     |

### Amazon S3

S3 supports [two styles of endpoint URI](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html): virtual hosted-style and path-style. The difference is:

- Virtual-hosted-style: `https://<bucket>.s3.<region>.amazonaws.com`
- Path-style: `https://s3.<region>.amazonaws.com/<bucket>`

The `<region>` should be replaced with specific region code, e.g. the region code of US East (N. Virginia) is `us-east-1`. All the available region codes can be found [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions).

:::note
For AWS users in China, you need add `.cn` to the host, i.e. `amazonaws.com.cn`, and check [this document](https://docs.amazonaws.cn/en_us/aws/latest/userguide/endpoints-arns.html) for region code.
:::

:::note
If the S3 bucket has public access (anonymous access is supported), please set `--access-key` to `anonymous`.
:::

In JuiceFS both the two styles are supported to specify the bucket address, for example:

<Tabs groupId="amazon-s3-endpoint">
  <TabItem value="virtual-hosted-style" label="Virtual-hosted-style">

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.s3.<region>.amazonaws.com \
    ... \
    myjfs
```

  </TabItem>
  <TabItem value="path-style" label="Path-style">

```bash
juicefs format \
    --storage s3 \
    --bucket https://s3.<region>.amazonaws.com/<bucket> \
    ... \
    myjfs
```

  </TabItem>
</Tabs>

You can also set `--storage` to `s3` to connect to S3-compatible object storage, e.g.:

<Tabs groupId="amazon-s3-endpoint">
  <TabItem value="virtual-hosted-style" label="Virtual-hosted-style">

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

  </TabItem>
  <TabItem value="path-style" label="Path-style">

```bash
juicefs format \
    --storage s3 \
    --bucket https://<endpoint>/<bucket> \
    ... \
    myjfs
```

  </TabItem>
</Tabs>

:::tip
The format of the option `--bucket` for all S3 compatible object storage services is `https://<bucket>.<endpoint>` or `https://<endpoint>/<bucket>`. The default `region` is `us-east-1`. When a different `region` is required, it can be set manually via the environment variable `AWS_REGION` or `AWS_DEFAULT_REGION`.
:::

### Google Cloud Storage {#google-cloud}

Google Cloud uses [IAM](https://cloud.google.com/iam/docs/overview) to manage permissions for accessing resources. Through authorizing [service accounts](https://cloud.google.com/iam/docs/creating-managing-service-accounts#iam-service-accounts-create-gcloud), you can have a fine-grained control of the access rights of cloud servers and object storage.

For cloud servers and object storage that belong to the same service account, as long as the account grants access to the relevant resources, there is no need to provide authentication information when creating a JuiceFS file system, and the cloud platform will automatically complete authentication.

For cases where you want to access the object storage from outside the Google Cloud Platform, for example, to create a JuiceFS file system on your local computer using Google Cloud Storage, you need to configure authentication information. Since Google Cloud Storage does not use Access Key ID and Access Key Secret, but rather the JSON key file of the service account to authenticate the identity.

Please refer to ["Authentication as a service account"](https://cloud.google.com/docs/authentication/production) to create JSON key file for the service account and download it to the local computer, and define the path to the key file via the environment variable `GOOGLE_APPLICATION_ CREDENTIALS`, e.g.:

```shell
export GOOGLE_APPLICATION_CREDENTIALS="$HOME/service-account-file.json"
```

You can write the command to create environment variables to `~/.bashrc` or `~/.profile` and have the shell set it automatically every time you start.

Once you have configured the environment variables for passing key information, the commands to create a file system locally and on Google Cloud Server are identical. For example,

```bash
juicefs format \
    --storage gs \
    --bucket <bucket>[.region] \
    ... \
    myjfs
```

As you can see, there is no need to include authentication information in the command, and the client will authenticate the access to the object storage through the JSON key file set in the previous environment variable. Also, since the bucket name is [globally unique](https://cloud.google.com/storage/docs/naming-buckets#considerations), when creating a file system, you only need to specify the bucket name in the option `--bucket`.

### Azure Blob Storage

To use Azure Blob Storage as data storage of JuiceFS, please [check the documentation](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage) to learn how to view the storage account name and access key, which correspond to the values ​​of the `--access-key` and `--secret-key` options, respectively.

The `--bucket` option is set in the format `https://<container>.<endpoint>`, please replace `<container>` with the name of the actual blob container and `<endpoint>` with `core.windows.net` (Azure Global) or `core.chinacloudapi.cn` (Azure China). For example:

```bash
juicefs format \
    --storage wasb \
    --bucket https://<container>.<endpoint> \
    --access-key <storage-account-name> \
    --secret-key <storage-account-access-key> \
    ... \
    myjfs
```

In addition to providing authorization information through the options `--access-key` and `--secret-key`, you could also create a [connection string](https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string) and set the environment variable `AZURE_STORAGE_CONNECTION_STRING`. For example:

```bash
# Use connection string
export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=XXX;AccountKey=XXX;EndpointSuffix=core.windows.net"
juicefs format \
    --storage wasb \
    --bucket https://<container> \
    ... \
    myjfs
```

:::note
For Azure users in China, the value of `EndpointSuffix` is `core.chinacloudapi.cn`.
:::

### Backblaze B2

To use Backblaze B2 as a data storage for JuiceFS, you need to create [application key](https://www.backblaze.com/b2/docs/application_keys.html) first. **Application Key ID** and **Application Key** corresponds to Access Key and Secret Key, respectively.

Backblaze B2 supports two access interfaces: the B2 native API and the S3-compatible API.

#### B2 native API

The storage type should be set to `b2`, and only the bucket name needs to be set in the option `--bucket`. For example:

```bash
juicefs format \
    --storage b2 \
    --bucket <bucket> \
    --access-key <application-key-ID> \
    --secret-key <application-key> \
    ... \
    myjfs
```

#### S3-compatible API

The storage type should be set to `s3`, and the full bucket address in the option `bucket` needs to be specified. For example:

```bash
juicefs format \
    --storage s3 \
    --bucket https://s3.eu-central-003.backblazeb2.com/<bucket> \
    --access-key <application-key-ID> \
    --secret-key <application-key> \
    ... \
    myjfs
```

### IBM Cloud Object Storage

When creating JuiceFS file system using IBM Cloud Object Storage, you first need to create an [API key](https://cloud.ibm.com/docs/account?topic=account-manapikey) and an [instance ID](https://cloud.ibm.com/docs/key-protect?topic=key-protect-retrieve-instance-ID). The "API key" and "instance ID" are the equivalent of access key and secret key, respectively.

IBM Cloud Object Storage provides [multiple endpoints](https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-endpoints) for each region, depending on your network (e.g. public or private). Thus, please choose an appropriate endpoint. For example:

```bash
juicefs format \
    --storage ibmcos \
    --bucket https://<bucket>.<endpoint> \
    --access-key <API-key> \
    --secret-key <instance-ID> \
    ... \
    myjfs
```

### Oracle Cloud Object Storage

Oracle Cloud Object Storage supports S3 compatible access. Please refer to [official documentation](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm) for more information.

The `endpoint` format for this object storage is: `${namespace}.compat.objectstorage.${region}.oraclecloud.com`, for example:

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.<endpoint> \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

### Scaleway Object Storage

Please follow [this document](https://www.scaleway.com/en/docs/generate-api-keys) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.s3.<region>.scw.cloud`. Remember to replace `<region>` with specific region code, e.g. the region code of "Amsterdam, The Netherlands" is `nl-ams`. All available region codes can be found [here](https://www.scaleway.com/en/docs/object-storage-feature/#-Core-Concepts). For example:

```bash
juicefs format \
    --storage scw \
    --bucket https://<bucket>.s3.<region>.scw.cloud \
    ... \
    myjfs
```

### DigitalOcean Spaces

Please follow [this document](https://www.digitalocean.com/community/tutorials/how-to-create-a-digitalocean-space-and-api-key) to learn how to get access key and secret key.

The `--bucket` option format is `https://<space-name>.<region>.digitaloceanspaces.com`. Please replace `<region>` with specific region code, e.g. `nyc3`. All available region codes can be found [here](https://www.digitalocean.com/docs/spaces/#regional-availability). For example:

```bash
juicefs format \
    --storage space \
    --bucket https://<space-name>.<region>.digitaloceanspaces.com \
    ... \
    myjfs
```

### Wasabi

Please follow [this document](https://wasabi-support.zendesk.com/hc/en-us/articles/360019677192-Creating-a-Root-Access-Key-and-Secret-Key) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.s3.<region>.wasabisys.com`, replace `<region>` with specific region code, e.g. the region code of US East 1 (N. Virginia) is `us-east-1`. All available region codes can be found [here](https://wasabi-support.zendesk.com/hc/en-us/articles/360.15.26031-What-are-the-service-URLs-for-Wasabi-s-different-regions-). For example:

```bash
juicefs format \
    --storage wasabi \
    --bucket https://<bucket>.s3.<region>.wasabisys.com \
    ... \
    myjfs
```

:::note
For users in Tokyo (ap-northeast-1) region, please refer to [this document](https://wasabi-support.zendesk.com/hc/en-us/articles/360039372392-How-do-I-access-the-Wasabi-Tokyo-ap-northeast-1-storage-region-) to learn how to get appropriate endpoint URI.***
:::

### Telnyx

Prerequisites

- A [Telnyx account](https://telnyx.com/sign-up)
- [API key](https://portal.telnyx.com/#/app/api-keys) – this will be used as both `access-key` and `secret-key`

Set up JuiceFS:

```bash
juicefs format \
    --storage s3 \
    --bucket https://<regional-endpoint>.telnyxstorage.com/<bucket> \
    --access-key <api-key> \
    --secret-key <api-key> \
    ... \
    myjfs
```

Available regional endpoints are [here](https://developers.telnyx.com/docs/cloud-storage/api-endpoints).

### Storj DCS

Please refer to [this document](https://docs.storj.io/api-reference/s3-compatible-gateway) to learn how to create access key and secret key.

Storj DCS is an S3-compatible storage, using `s3` for option `--storage`. The setting format of the option `--bucket` is `https://gateway.<region>.storjshare.io/<bucket>`, and please replace `<region>` with the corresponding region code you need. There are currently three available regions: `us1`, `ap1` and `eu1`. For example:

```shell
juicefs format \
    --storage s3 \
    --bucket https://gateway.<region>.storjshare.io/<bucket> \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

:::caution
Storj DCS [ListObjects](https://github.com/storj/gateway-st/blob/main/docs/s3-compatibility.md#listobjects) API is not fully S3 compatible (result list is not sorted), so some features of JuiceFS do not work. For example, `juicefs gc`, `juicefs fsck`, `juicefs sync`, `juicefs destroy`. And when using `juicefs mount`, you need to disable [automatic-backup](../administration/metadata_dump_load.md#backup-automatically) function by adding `--backup-meta 0`.
:::

### Vultr Object Storage

Vultr Object Storage is an S3-compatible storage, using `s3` for `--storage` option. The format of the option `--bucket` is `https://<bucket>.<region>.vultrobjects.com/`. For example:

```shell
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.ewr1.vultrobjects.com/ \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

Please find the access and secret keys for object storage [in the customer portal](https://my.vultr.com/objectstorage).

### Cloudflare R2 {#r2}

R2 is Cloudflare's object storage service and provides an S3-compatible API, so usage is the same as Amazon S3. Please refer to [Documentation](https://developers.cloudflare.com/r2/data-access/s3-api/tokens) to learn how to create Access Key and Secret Key.

```shell
juicefs format \
    --storage s3 \
    --bucket https://<ACCOUNT_ID>.r2.cloudflarestorage.com/myjfs \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

For production, it is recommended to pass key information via the `ACCESS_KEY` and `SECRET_KEY` environment variables, e.g.

```shell
export ACCESS_KEY=<your-access-key>
export SECRET_KEY=<your-sceret-key>
juicefs format \
    --storage s3 \
    --bucket https://<ACCOUNT_ID>.r2.cloudflarestorage.com/myjfs \
    ... \
    myjfs
```

:::caution
Cloudflare R2 `ListObjects` API is not fully S3 compatible (result list is not sorted), so some features of JuiceFS do not work. For example, `juicefs gc`, `juicefs fsck`, `juicefs sync`, `juicefs destroy`. And when using `juicefs mount`, you need to disable [automatic-backup](../administration/metadata_dump_load.md#backup-automatically) function by adding `--backup-meta 0`.
:::

### Bunny Storage {#bunny}

Bunny Storage offers a non-S3 compatible object storage with multiple performance tiers and many storage regions. It uses [it uses a custom API](https://docs.bunny.net/reference/storage-api).

This is not included by default, please build it with tag `bunny`

#### Usage

Create a Storage Zone and use the Zone Name with the Hostname of the Location separated by a dot as Bucket name and the `Write Password` as Secret Key.

```shell
juicefs format \
    --storage bunny \
    --secret-key "write-password" \
    --bucket "https://uk.storage.bunnycdn.com/myzone" \ # https://<Endpoint>/<Zonename>
    myjfs
```

### Alibaba Cloud OSS

Please follow [this document](https://www.alibabacloud.com/help/doc-detail/125558.htm) to learn how to get access key and secret key. If you have already created [RAM role](https://www.alibabacloud.com/help/doc-detail/110376.htm) and assigned it to a VM instance, you could omit the options `--access-key` and `--secret-key`.

Alibaba Cloud also supports using [Security Token Service (STS)](https://www.alibabacloud.com/help/doc-detail/100624.htm) to authorize temporary access to OSS. If you wanna use STS, you should omit the options `--access-key` and `--secret-key` and set environment variables `ALICLOUD_ACCESS_KEY_ID`, `ALICLOUD_ACCESS_KEY_SECRET` and `SECURITY_TOKEN`instead, for example:

```bash
# Use Security Token Service (STS)
export ALICLOUD_ACCESS_KEY_ID=XXX
export ALICLOUD_ACCESS_KEY_SECRET=XXX
export SECURITY_TOKEN=XXX
juicefs format \
    --storage oss \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

OSS provides [multiple endpoints](https://www.alibabacloud.com/help/doc-detail/31834.htm) for each region, depending on your network (e.g. public or internal network). Please choose an appropriate endpoint.

If you are creating a file system on AliCloud's server, you can specify the bucket name directly in the option `--bucket`. For example.

```bash
# Running within Alibaba Cloud
juicefs format \
    --storage oss \
    --bucket <bucket> \
    ... \
    myjfs
```

### Tencent Cloud COS

The naming rule of bucket in Tencent Cloud is `<bucket>-<APPID>`, so you must append `APPID` to the bucket name. Please follow [this document](https://intl.cloud.tencent.com/document/product/436/13312) to learn how to get `APPID`.

The full format of `--bucket` option is `https://<bucket>-<APPID>.cos.<region>.myqcloud.com`, and please replace `<region>` with specific region code. E.g. the region code of Shanghai is `ap-shanghai`. You could find all available region codes [here](https://intl.cloud.tencent.com/document/product/436/6224). For example:

```bash
juicefs format \
    --storage cos \
    --bucket https://<bucket>-<APPID>.cos.<region>.myqcloud.com \
    ... \
    myjfs
```

If you are creating a file system on Tencent Cloud's server, you can specify the bucket name directly in the option `--bucket`. For example.

```bash
# Running within Tencent Cloud
juicefs format \
    --storage cos \
    --bucket <bucket>-<APPID> \
    ... \
    myjfs
```

### Huawei Cloud OBS

Please follow [this document](https://support.huaweicloud.com/usermanual-ca/zh-cn_topic_0046606340.html) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.obs.<region>.myhuaweicloud.com`, and please replace `<region>` with specific region code. E.g. the region code of Beijing 1 is `cn-north-1`. You could find all available region codes [here](https://developer.huaweicloud.com/endpoint?OBS). For example:

```bash
juicefs format \
    --storage obs \
    --bucket https://<bucket>.obs.<region>.myhuaweicloud.com \
    ... \
    myjfs
```

If you are creating a file system on Huawei Cloud's server, you can specify the bucket name directly in the option `--bucket`. For example,

```bash
# Running within Huawei Cloud
juicefs format \
    --storage obs \
    --bucket <bucket> \
    ... \
    myjfs
```

### Baidu Object Storage

Please follow [this document](https://cloud.baidu.com/doc/Reference/s/9jwvz2egb) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.<region>.bcebos.com`, and please replace `<region>` with specific region code. E.g. the region code of Beijing is `bj`. You could find all available region codes [here](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D%EF%BC%88endpoint%EF%BC%89). For example:

```bash
juicefs format \
    --storage bos \
    --bucket https://<bucket>.<region>.bcebos.com \
    ... \
    myjfs
```

If you are creating a file system on Baidu Cloud's server, you can specify the bucket name directly in the option `--bucket`. For example,

```bash
# Running within Baidu Cloud
juicefs format \
    --storage bos \
    --bucket <bucket> \
    ... \
    myjfs
```

### Volcano Engine TOS <VersionAdd>1.0.3</VersionAdd> {#volcano-engine-tos}

Please follow [this document](https://www.volcengine.com/docs/6291/65568) to learn how to get access key and secret key.

The TOS provides [multiple endpoints](https://www.volcengine.com/docs/6349/107356) for each region, depending on your network (e.g. public or internal). Please choose an appropriate endpoint. For example:

```bash
juicefs format \
    --storage tos \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### Kingsoft Cloud KS3

Please follow [this document](https://docs.ksyun.com/documents/1386) to learn how to get access key and secret key.

KS3 provides [multiple endpoints](https://docs.ksyun.com/documents/6761) for each region, depending on your network (e.g. public or internal). Please choose an appropriate endpoint. For example:

```bash
juicefs format \
    --storage ks3 \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### QingStor

Please follow [this document](https://docsv3.qingcloud.com/storage/object-storage/api/practices/signature/#%E8%8E%B7%E5%8F%96-access-key) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.<region>.qingstor.com`, replace `<region>` with specific region code. E.g. the region code of Beijing 3-A is `pek3a`. You could find all available region codes [here](https://docs.qingcloud.com/qingstor/#%E5%8C%BA%E5%9F%9F%E5%8F%8A%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D). For example:

```bash
juicefs format \
    --storage qingstor \
    --bucket https://<bucket>.<region>.qingstor.com \
    ... \
    myjfs
```

:::note
The format of `--bucket` option for all QingStor compatible object storage services is `http://<bucket>.<endpoint>`.
:::

### Qiniu

Please follow [this document](https://developer.qiniu.com/af/kb/1479/how-to-access-or-locate-the-access-key-and-secret-key) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.s3-<region>.qiniucs.com`, replace `<region>` with specific region code. E.g. the region code of China East is `cn-east-1`. You could find all available region codes [here](https://developer.qiniu.com/kodo/4088/s3-access-domainname). For example:

```bash
juicefs format \
    --storage qiniu \
    --bucket https://<bucket>.s3-<region>.qiniucs.com \
    ... \
    myjfs
```

### CTYun OOS

Please follow [this document](https://www.ctyun.cn/help2/10000101/10473683) to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.<endpoint>`,  For example:

```bash
juicefs format \
    --storage oos \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### ECloud Object Storage

Please follow [this document](https://ecloud.10086.cn/op-help-center/doc/article/24501) to learn how to get access key and secret key.

ECloud Object Storage provides [multiple endpoints](https://ecloud.10086.cn/op-help-center/doc/article/40956) for each region, depending on your network (e.g. public or internal). Please choose an appropriate endpoint. For example:

```bash
juicefs format \
    --storage eos \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### JD Cloud OSS

Please follow [this document](https://docs.jdcloud.com/cn/account-management/accesskey-management)  to learn how to get access key and secret key.

The `--bucket` option format is `https://<bucket>.<region>.jdcloud-oss.com`，and please replace `<region>` with specific region code. You could find all available region codes [here](https://docs.jdcloud.com/cn/object-storage-service/oss-endpont-list). For example:

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.<region>.jdcloud-oss.com \
    ... \
    myjfs
```

### UCloud US3

Please follow [this document](https://docs.ucloud.cn/uai-censor/access/key) to learn how to get access key and secret key.

US3 (formerly UFile) provides [multiple endpoints](https://docs.ucloud.cn/ufile/introduction/region) for each region, depending on your network (e.g. public or internal). Please choose an appropriate endpoint. For example:

```bash
juicefs format \
    --storage ufile \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### Ceph RADOS

:::note
JuiceFS v1.0 uses `go-ceph` v0.4.0, which supports Ceph Luminous (v12.2.x) and above.
JuiceFS v1.1 uses `go-ceph` v0.18.0, which supports Ceph Octopus (v15.2.x) and above.
Make sure that JuiceFS matches your Ceph and `librados` version, see [`go-ceph`](https://github.com/ceph/go-ceph#supported-ceph-versions).
:::

The [Ceph Storage Cluster](https://docs.ceph.com/en/latest/rados) has a messaging layer protocol that enables clients to interact with a Ceph Monitor and a Ceph OSD Daemon. The [`librados`](https://docs.ceph.com/en/latest/rados/api/librados-intro) API enables you to interact with the two types of daemons:

- The [Ceph Monitor](https://docs.ceph.com/en/latest/rados/configuration/common/#monitors), which maintains a master copy of the cluster map.
- The [Ceph OSD Daemon (OSD)](https://docs.ceph.com/en/latest/rados/configuration/common/#osds), which stores data as objects on a storage node.

JuiceFS supports the use of native Ceph APIs based on `librados`. You need to install `librados` library and build `juicefs` binary separately.

First, install a `librados` that matches the version of your Ceph installation, For example, if Ceph version is Octopus (v15.2.x), then it is recommended to use `librados` v15.2.x.

<Tabs>
  <TabItem value="debian" label="Debian and derivatives">

```bash
sudo apt-get install librados-dev
```

  </TabItem>
  <TabItem value="centos" label="RHEL and derivatives">

```bash
sudo yum install librados2-devel
```

  </TabItem>
</Tabs>

Then compile JuiceFS for Ceph (make sure you have Go 1.20+ and GCC 5.4+ installed):

```bash
make juicefs.ceph
```

When using with Ceph, the JuiceFS Client object storage related options are interpreted differently:

* `--bucket` stands for the Ceph storage pool, the format is `ceph://<pool-name>`. A [pool](https://docs.ceph.com/en/latest/rados/operations/pools) is a logical partition for storing objects. Create a pool before use.
* `--access-key` stands for the Ceph cluster name, the default value is `ceph`.
* `--secret-key` option is [Ceph client user name](https://docs.ceph.com/en/latest/rados/operations/user-management), the default user name is `client.admin`.

In order to reach Ceph Monitor, `librados` reads Ceph configuration file by searching default locations and the first found will be used. The locations are:

- `CEPH_CONF` environment variable
- `/etc/ceph/ceph.conf`
- `~/.ceph/config`
- `ceph.conf` in the current working directory

Since these additional Ceph configuration files are needed during the mount, CSI Driver users need to [upload them to Kubernetes, and map to the mount pod](https://juicefs.com/docs/csi/guide/pv/#mount-pod-extra-files).

To format a volume, run:

```bash
juicefs.ceph format \
    --storage ceph \
    --bucket ceph://<pool-name> \
    --access-key <cluster-name> \
    --secret-key <user-name> \
    ... \
    myjfs
```

### Ceph RGW

[Ceph Object Gateway](https://ceph.io/ceph-storage/object-storage) is an object storage interface built on top of `librados` to provide applications with a RESTful gateway to Ceph Storage Clusters. Ceph Object Gateway supports S3-compatible interface, so we could set `--storage` to `s3` directly.

The `--bucket` option format is `http://<bucket>.<endpoint>` (virtual hosted-style). For example:

```bash
juicefs format \
    --storage s3 \
    --bucket http://<bucket>.<endpoint> \
    ... \
    myjfs
```

### Gluster

[Gluster](https://github.com/gluster/glusterfs) is a software defined distributed storage that can scale to several petabytes. JuiceFS communicates with Gluster via the `libgfapi` library, so it needs to be built separately before used.

First, install `libgfapi` (version 6.0 - 10.1, [10.4+ is not supported yet](https://github.com/juicedata/juicefs/issues/4043))

<Tabs>
  <TabItem value="debian" label="Debian and derivatives">

```bash
sudo apt-get install uuid-dev libglusterfs-dev glusterfs-common
```

  </TabItem>
  <TabItem value="centos" label="RHEL and derivatives">

```bash
sudo yum install glusterfs glusterfs-api-devel glusterfs-libs
```

  </TabItem>
</Tabs>

Then compile JuiceFS supporting Gluster:

```bash
make juicefs.gluster
```

Now we can create a JuiceFS volume on Gluster:

```bash
juicefs format \
    --storage gluster \
    --bucket host1,host2,host3/gv0 \
    ... \
    myjfs
```

The format of `--bucket` option is `<host[,host...]>/<volume_name>`. Please note the `volume_name` here is the name of Gluster volume, and has nothing to do with the name of JuiceFS volume.

### Swift

[OpenStack Swift](https://github.com/openstack/swift) is a distributed object storage system designed to scale from a single machine to thousands of servers. Swift is optimized for multi-tenancy and high concurrency. Swift is ideal for backups, web and mobile content, and any other unstructured data that can grow without bound.

The `--bucket` option format is `http://<container>.<endpoint>`. A container defines a namespace for objects.

**Currently, JuiceFS only supports [Swift V1 authentication](https://www.swiftstack.com/docs/cookbooks/swift_usage/auth.html).**

The value of `--access-key` option is username. The value of `--secret-key` option is password. For example:

```bash
juicefs format \
    --storage swift \
    --bucket http://<container>.<endpoint> \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

### MinIO

[MinIO](https://min.io) is an open source lightweight object storage, compatible with Amazon S3 API.

It is easy to run a MinIO instance locally using Docker. For example, the following command sets and maps port `9900` for the console with `--console-address ":9900"` and also maps the data path for the MinIO to the `minio-data` folder in the current directory, which can be modified if needed.

```shell
sudo docker run -d --name minio \
    -p 9000:9000 \
    -p 9900:9900 \
    -e "MINIO_ROOT_USER=minioadmin" \
    -e "MINIO_ROOT_PASSWORD=minioadmin" \
    -v $PWD/minio-data:/data \
    --restart unless-stopped \
    minio/minio server /data --console-address ":9900"
```

After container is up and running, you can access:

- **MinIO API**: [http://127.0.0.1:9000](http://127.0.0.1:9000), this is the object storage service address used by JuiceFS
- **MinIO UI**: [http://127.0.0.1:9900](http://127.0.0.1:9900), this is used to manage the object storage itself, not related to JuiceFS

The initial Access Key and Secret Key of the object storage are both `minioadmin`.

When using MinIO as data storage for JuiceFS, set the option `--storage` to `minio`.

```bash
juicefs format \
    --storage minio \
    --bucket http://127.0.0.1:9000/<bucket> \
    --access-key minioadmin \
    --secret-key minioadmin \
    ... \
    myjfs
```

:::note

1. Currently, JuiceFS only supports path-style MinIO URI addresses, e.g., `http://127.0.0.1:9000/myjfs`.
1. The `MINIO_REGION` environment variable can be used to set the region of MinIO, if not set, the default is `us-east-1`.
1. When using Multi-Node MinIO deployment, consider setting using a DNS address in the service endpoint, resolving to all MinIO Node IPs, as a simple load-balancer, e.g. `http://minio.example.com:9000/myjfs`

:::

### WebDAV

[WebDAV](https://en.wikipedia.org/wiki/WebDAV) is an extension of the Hypertext Transfer Protocol (HTTP)
that facilitates collaborative editing and management of documents stored on the WWW server among users.
From JuiceFS v0.15+, JuiceFS can use a storage that speaks WebDAV as a data storage.

You need to set `--storage` to `webdav`, and `--bucket` to the endpoint of WebDAV. If basic authorization is enabled, username and password should be provided as `--access-key` and `--secret-key`, for example:

```bash
juicefs format \
    --storage webdav \
    --bucket http://<endpoint>/ \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

### HDFS

[HDFS](https://hadoop.apache.org) is the file system for Hadoop, which can be used as the object storage for JuiceFS.

When HDFS is used, `--access-key` can be used to specify the `username`, and `hdfs` is usually the default superuser. For example:

```bash
juicefs format \
    --storage hdfs \
    --bucket namenode1:8020 \
    --access-key hdfs \
    ... \
    myjfs
```

When `--access-key` is not specified on formatting, JuiceFS will use the current user of `juicefs mount` or Hadoop SDK to access HDFS. It will hang and fail with IO error eventually, if the current user don't have enough permission to read/write the blocks in HDFS.

JuiceFS will try to load configurations for HDFS client based on `$HADOOP_CONF_DIR` or `$HADOOP_HOME`. If an empty value is provided to `--bucket`, the default HDFS found in Hadoop configurations will be used.

bucket format:

- `[hdfs://]namenode:port[/path]`

for HA cluster:

- `[hdfs://]namenode1:port,namenode2:port[/path]`
- `[hdfs://]nameservice[/path]`

For HDFS which enable Kerberos, `KRB5KEYTAB` and `KRB5PRINCIPAL` environment var can be used to set keytab and principal.

### Apache Ozone

Apache Ozone is a scalable, redundant, and distributed object storage for Hadoop. It supports S3-compatible interface, so we could set `--storage` to `s3` directly.

```bash
juicefs format \
    --storage s3 \
    --bucket http://<endpoint>/<bucket>\
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

### Redis

[Redis](https://redis.io) can be used as both metadata storage for JuiceFS and as data storage, but when using Redis as a data storage, it is recommended not to store large-scale data.

#### Standalone

The `--bucket` option format is `redis://<host>:<port>/<db>`. The value of `--access-key` option is username. The value of `--secret-key` option is password. For example:

```bash
juicefs format \
    --storage redis \
    --bucket redis://<host>:<port>/<db> \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

#### Redis Sentinel

In Redis Sentinel mode, the format of the `--bucket` option is `redis[s]://MASTER_NAME,SENTINEL_ADDR[,SENTINEL_ADDR]:SENTINEL_PORT[/DB]`. Sentinel's password needs to be declared through the `SENTINEL_PASSWORD_FOR_OBJ` environment variable. For example:

```bash
export SENTINEL_PASSWORD_FOR_OBJ=sentinel_password
juicefs format \
    --storage redis \
    --bucket redis://masterName,1.2.3.4,1.2.5.6:26379/2  \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

#### Redis Cluster

In Redis Cluster mode, the format of `--bucket` option is `redis[s]://ADDR:PORT,[ADDR:PORT],[ADDR:PORT]`. For example:

```bash
juicefs format \
    --storage redis \
    --bucket redis://127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002  \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

### TiKV

[TiKV](https://tikv.org) is a highly scalable, low latency, and easy to use key-value database. It provides both raw and ACID-compliant transactional key-value API.

TiKV can be used as both metadata storage and data storage for JuiceFS.

:::note
It's recommended to use dedicated TiKV 5.0+ cluster as the data storage for JuiceFS.
:::

The `--bucket` option format is `<host>:<port>,<host>:<port>,<host>:<port>`, and `<host>` is the address of Placement Driver (PD). The options `--access-key` and `--secret-key` have no effect and can be omitted. For example:

```bash
juicefs format \
    --storage tikv \
    --bucket "<host>:<port>,<host>:<port>,<host>:<port>" \
    ... \
    myjfs
```

:::note
Don't use the same TiKV cluster for both metadata and data, because JuiceFS uses non-transactional protocol (RawKV) for objects and transactional protocol (TnxKV) for metadata. The TxnKV protocol has special encoding for keys, so they may overlap with keys even they has different prefixes. BTW, it's recommended to enable [Titan](https://tikv.org/docs/latest/deploy/configure/titan) in TiKV for data cluster.
:::

#### Set up TLS

If you need to enable TLS, you can set the TLS configuration item by adding the query parameter after the bucket URL. Currently supported configuration items:

| Name        | Value                                                                                                                                                   |
|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
| `ca`        | CA root certificate, used to connect TiKV/PD with TLS                                                                                                   |
| `cert`      | certificate file path, used to connect TiKV/PD with TLS                                                                                                 |
| `key`       | private key file path, used to connect TiKV/PD with TLS                                                                                                 |
| `verify-cn` | verify component caller's identity, [reference link](https://docs.pingcap.com/tidb/dev/enable-tls-between-components#verify-component-callers-identity) |

For example:

```bash
juicefs format \
    --storage tikv \
    --bucket "<host>:<port>,<host>:<port>,<host>:<port>?ca=/path/to/ca.pem&cert=/path/to/tikv-server.pem&key=/path/to/tikv-server-key.pem&verify-cn=CN1,CN2" \
    ... \
    myjfs
```

### etcd

[etcd](https://etcd.io) is a small-scale key-value database with high availability and reliability, which can be used as both the metadata storage of JuiceFS and the data storage of JuiceFS.

etcd will [limit](https://etcd.io/docs/latest/dev-guide/limit) a single request to no more than 1.5MB by default, you need to change the block size (`--block-size` option) of JuiceFS to 1MB or even lower.

The `--bucket` option needs to fill in the etcd address, the format is similar to `<host1>:<port>,<host2>:<port>,<host3>:<port>`. The `--access-key` and `--secret-key` options are filled with username and password, which can be omitted when etcd does not enable user authentication. E.g:

```bash
juicefs format \
    --storage etcd \
    --block-size 1024 \  # This option is very important
    --bucket "<host1>:<port>,<host2>:<port>,<host3>:<port>/prefix" \
    --access-key myname \
    --secret-key mypass \
    ... \
    myjfs
```

#### Set up TLS

If you need to enable TLS, you can set the TLS configuration item by adding the query parameter after the bucket URL. Currently supported configuration items:

| Name                   | Value                 |
|------------------------|-----------------------|
| `cacert`               | CA root certificate   |
| `cert`                 | certificate file path |
| `key`                  | private key file path |
| `server-name`          | name of server        |
| `insecure-skip-verify` | 1                     |

For example:

```bash
juicefs format \
    --storage etcd \
    --bucket "<host>:<port>,<host>:<port>,<host>:<port>?cacert=/path/to/ca.pem&cert=/path/to/server.pem&key=/path/to/key.pem&server-name=etcd" \
    ... \
    myjfs
```

:::note
The path to the certificate needs to be an absolute path, and make sure that all machines that need to mount can use this path to access them.
:::

### SQLite

[SQLite](https://sqlite.org) is a small, fast, single-file, reliable, full-featured single-file SQL database engine widely used around the world.

When using SQLite as a data store, you only need to specify its absolute path.

```shell
juicefs format \
    --storage sqlite3 \
    --bucket /path/to/sqlite3.db \
    ... \
    myjfs
```

:::note
Since SQLite is an embedded database, only the host where the database is located can access it, and cannot be used in multi-machine sharing scenarios. If a relative path is used when formatting, it will cause problems when mounting, please use an absolute path.
:::

### MySQL

[MySQL](https://www.mysql.com) is one of the popular open source relational databases, often used as the database of choice for web applications, both as a metadata engine for JuiceFS and for storing files data. MySQL-compatible [MariaDB](https://mariadb.org), [TiDB](https://github.com/pingcap/tidb), etc. can be used as data storage.

When using MySQL as a data storage, you need to create a database in advance and add the desired permissions, specify the access address through the `--bucket` option, specify the user name through the `--access-key` option, and specify the password through the `--secret-key` option. An example is as follows:

```shell
juicefs format \
    --storage mysql \
    --bucket (<host>:3306)/<database-name> \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

After the file system is created, JuiceFS creates a table named `jfs_blob` in the database to store the data.

:::note
Don't miss the parentheses `()` in the `--bucket` parameter.
:::

### PostgreSQL

[PostgreSQL](https://www.postgresql.org) is a powerful open source relational database with a complete ecology and rich application scenarios. It can be used as both the metadata engine of JuiceFS and the data storage. Other databases compatible with the PostgreSQL protocol (such as [CockroachDB](https://github.com/cockroachdb/cockroach), etc.) can also be used as data storage.

When creating a file system, you need to create a database and add the corresponding read and write permissions. Use the `--bucket` option to specify the address of the data, use the `--access-key` option to specify the username, and use the `--secret-key` option to specify the password. An example is as follows:

```shell
juicefs format \
    --storage postgres \
    --bucket <host>:<port>/<db>[?parameters] \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

After the file system is created, JuiceFS creates a table named `jfs_blob` in the database to store the data.

#### Troubleshooting

The JuiceFS client uses SSL encryption to connect to PostgreSQL by default. If the connection error `pq: SSL is not enabled on the server` indicates that the database does not have SSL enabled. You can enable SSL encryption for PostgreSQL according to your business scenario, or you can add the parameter `sslmode=disable` to the bucket URL to disable encryption verification.

### Local disk

When creating JuiceFS storage, if no storage type is specified, the local disk will be used to store data by default. The default storage path for root user is `/var/jfs`, and `~/.juicefs/local` is for ordinary users.

For example, using the local Redis database and local disk to create a JuiceFS storage named `test`:

```shell
juicefs format redis://localhost:6379/1 test
```

Local storage is usually only used to help users understand how JuiceFS works and to give users an experience on the basic features of JuiceFS. The created JuiceFS storage cannot be mounted by other clients within the network and can only be used on a single machine.

### SFTP/SSH {#sftp}

SFTP - Secure File Transfer Protocol, It is not a type of storage. To be precise, JuiceFS reads and writes to disks on remote hosts via SFTP/SSH, thus allowing any SSH-enabled operating system to be used as a data storage for JuiceFS.

For example, the following command uses the SFTP protocol to connect to the remote server `192.168.1.11` and creates the `myjfs/` folder in the `$HOME` directory of user `tom` as the data storage of JuiceFS.

```shell
juicefs format  \
    --storage sftp \
    --bucket 192.168.1.11:myjfs/ \
    --access-key tom \
    --secret-key 123456 \
    ...
    redis://localhost:6379/1 myjfs
```

#### Notes

- `--bucket` is used to set the server address and storage path in the format `[sftp://]<IP/Domain>:[port]:<Path>`. Note that the directory name should end with `/`, and the port number is optionally defaulted to `22`, e.g. `192.168.1.11:22:myjfs/`.
- `--access-key` set the username of the remote server
- `--secret-key` set the password of the remote server

### NFS {#nfs}

NFS - Network File System, is a commonly used file-sharing service in Unix-like operating systems. It allows computers within a network to access remote files as if they were local files.

JuiceFS supports using NFS as the underlying storage to build a file system, offering two usage methods: local mount and direct mode.

#### Local Mount

JuiceFS v1.1 and earlier versions only support using NFS as underlying storage via local mount. This method requires mounting the directory on the NFS server locally first, and then using it as a local disk to create the JuiceFS file system.

For example, first mount the `/srv/data` directory from the remote NFS server `192.168.1.11` to the local `/mnt/data` directory, and then access it in `file` mode.

```shell
$ sudo mount -t nfs 192.168.1.11:/srv/data /mnt/data
$ sudo juicefs format \
    --storage file \
    --bucket /mnt/data \
    ... \
    redis://localhost:6379/1 myjfs
```

From JuiceFS's perspective, the locally mounted NFS is still a local disk, so the `--storage` option is set to `file`.

Similarly, because the underlying storage can only be accessed on the mounted device, to share access across multiple devices, you need to mount the NFS share on each device separately, or provide external access through network-based methods such as WebDAV or S3 Gateway.

#### Direct Mode

JuiceFS v1.2 and later versions support using NFS as the underlying storage in direct mode. This method does not require pre-mounting the NFS directory locally but accesses the shared directory directly through the built-in NFS protocol in the JuiceFS client.

For example, the remote server's `/etc/exports` configuration file exports the following NFS share:

```
/srv/data    192.168.1.0/24(rw,sync,no_subtree_check)
```

You can directly use the JuiceFS client to connect to the `/srv/data` directory on the NFS server to create the file system:

```shell
$ sudo juicefs format  \
    --storage nfs \
    --bucket 192.168.1.11:/srv/data \
    ... \
    redis://localhost:6379/1 myjfs
```

In direct mode, the `--storage` option is set to `nfs`, and the `--bucket` option is set to the NFS server address and shared directory. The JuiceFS client will directly connect to the directory on the NFS server to read and write data.

**A few considerations:**

1. JuiceFS direct mode currently only supports the NFSv3 protocol.
2. The JuiceFS client needs permission to access the NFS shared directory.
3. NFS by default enables the `root_squash` feature, which maps root access to the NFS share to the `nobody` user by default. To avoid permission issues with NFS shares, you can set the owner of the shared directory to `nobody:nogroup` or configure the NFS share with the `no_root_squash` option to disable permission squashing.


================================================
FILE: docs/en/reference/p8s_metrics.md
================================================
---
title: JuiceFS Metrics
sidebar_position: 4
---

If you haven't yet set up monitoring for JuiceFS, read [monitoring and data visualization"](../administration/monitoring.md) to learn how.

## Global labels {#global-labels}

| Name       | Description      |
| ----       | -----------      |
| `vol_name` | Volume name      |
| `instance` | Client host name in format `<host>:<port>`. Refer to [official document](https://prometheus.io/docs/concepts/jobs_instances) for more information |
| `mp`       | Mount point path, if metrics are reported through [Prometheus Pushgateway](https://github.com/prometheus/pushgateway), for example, [JuiceFS Hadoop Java SDK](../administration/monitoring.md#hadoop), `mp` will be `sdk-<PID>` |

## File system {#file-system}

### Metrics

| Name                          | Description                            | Unit |
|-------------------------------|----------------------------------------|------|
| `juicefs_used_space`          | Total used space                       | byte |
| `juicefs_used_inodes`         | Total number of inodes                 |      |

## Operating system {#operating-system}

### Metrics

| Name                | Description           | Unit   |
| ----                | -----------           | ----   |
| `juicefs_uptime`    | Total running time    | second |
| `juicefs_cpu_usage` | Accumulated CPU usage | second |
| `juicefs_memory`    | Used memory           | byte   |

## Metadata engine {#metadata-engine}

### Metrics

| Name                                              | Description                                | Unit   |
| ----                                              | -----------                                | ----   |
| `juicefs_transaction_durations_histogram_seconds` | Transactions latency distributions         | second |
| `juicefs_transaction_restart`                     | Number of times a transaction restarted |        |

## FUSE {#fuse}

### Metrics

| Name                                           | Description                          | Unit   |
| ----                                           | -----------                          | ----   |
| `juicefs_fuse_read_size_bytes`                 | Size distributions of read request   | byte   |
| `juicefs_fuse_written_size_bytes`              | Size distributions of write request  | byte   |
| `juicefs_fuse_ops_durations_histogram_seconds` | Operations latency distributions     | second |
| `juicefs_fuse_open_handlers`                   | Number of open files and directories |        |

## SDK {#sdk}

### Metrics

| Name                                          | Description                         | Unit   |
| ----                                          | -----------                         | ----   |
| `juicefs_sdk_read_size_bytes`                 | Size distributions of read request  | byte   |
| `juicefs_sdk_written_size_bytes`              | Size distributions of write request | byte   |
| `juicefs_sdk_ops_durations_histogram_seconds` | Operations latency distributions    | second |

## Cache {#cache}

### Metrics

| Name                                    | Description                                 | Unit   |
|:----------------------------------------|---------------------------------------------|--------|
| `juicefs_blockcache_blocks`             | Number of cached blocks                     |        |
| `juicefs_blockcache_bytes`              | Size of cached blocks                       | byte   |
| `juicefs_blockcache_hits`               | Count of cached block hits                  |        |
| `juicefs_blockcache_miss`               | Count of cached block miss                  |        |
| `juicefs_blockcache_writes`             | Count of cached block writes                |        |
| `juicefs_blockcache_drops`              | Count of cached block drops                 |        |
| `juicefs_blockcache_evicts`             | Count of cached block evicts                |        |
| `juicefs_blockcache_hit_bytes`          | Size of cached block hits                   | byte   |
| `juicefs_blockcache_miss_bytes`         | Size of cached block miss                   | byte   |
| `juicefs_blockcache_write_bytes`        | Size of cached block writes                 | byte   |
| `juicefs_blockcache_read_hist_seconds`  | Latency distributions of read cached block  | second |
| `juicefs_blockcache_write_hist_seconds` | Latency distributions of write cached block | second |
| `juicefs_staging_blocks`                | Number of blocks in the staging path        |        |
| `juicefs_staging_block_bytes`           | Total bytes of blocks in the staging path   | byte   |
| `juicefs_staging_block_delay_seconds`   | Total seconds of delay for staging blocks   | second |

## Object storage {#object-storage}

### Labels

| Name     | Description                                                    |
| ----     | -----------                                                    |
| `method` | Method to request object storage (e.g. GET, PUT, HEAD, DELETE) |

### Metrics

| Name                                                 | Description                                  | Unit   |
| ----                                                 | -----------                                  | ----   |
| `juicefs_object_request_durations_histogram_seconds` | Object storage request latency distributions | second |
| `juicefs_object_request_errors`                      | Count of failed requests to object storage   |        |
| `juicefs_object_request_data_bytes`                  | Size of requests to object storage           | byte   |

## Internal {#internal}

### Metrics

| Name                                   | Description                          | Unit |
|----------------------------------------| -----------                          | ---- |
| `juicefs_compact_size_histogram_bytes` | Size distributions of compacted data | byte |
| `juicefs_used_read_buffer_size_bytes`  | size of currently used buffer for read |      |

## Data synchronization {#sync}

### Metrics

| Name | Description | Unit |
|-|-|-|
| `juicefs_sync_scanned` | Number of all objects scanned from the source | |
| `juicefs_sync_handled` | Number of objects from the source that have been processed | |
| `juicefs_sync_pending` | Number of objects waiting to be synchronized | |
| `juicefs_sync_copied` | Number of objects that have been synchronized | |
| `juicefs_sync_copied_bytes` | Total size of data that has been synchronized | byte |
| `juicefs_sync_skipped` | Number of objects that skipped during synchronization | |
| `juicefs_sync_failed` | Number of objects that failed during synchronization | |
| `juicefs_sync_deleted` | Number of objects that deleted during synchronization | |
| `juicefs_sync_checked` | Number of objects that have been verified by checksum during synchronization | |
| `juicefs_sync_checked_bytes` | Total size of data that has been verified by checksum during synchronization | byte |


================================================
FILE: docs/en/reference/posix_compatibility.md
================================================
---
title: POSIX Compatibility
sidebar_position: 6
slug: /posix_compatibility
description: Learn how JuiceFS ensures POSIX compatibility through testing with pjdfstest and LTP.
---

JuiceFS ensures POSIX compatibility by using [pjdfstest](https://github.com/pjd/pjdfstest) and [Linux Test Project (LTP)](https://github.com/linux-test-project/ltp) for testing.

## Pjdfstest

Pjdfstest is a test suite that helps to test POSIX system calls. JuiceFS passed all of its latest 8,813 tests:

```
All tests successful.

Test Summary Report
-------------------
/root/soft/pjdfstest/tests/chown/00.t          (Wstat: 0 Tests: 1323 Failed: 0)
  TODO passed:   693, 697, 708-709, 714-715, 729, 733
Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr  0.38 sys +  2.57 cusr  3.93 csys =  9.65 CPU)
Result: PASS
```

:::note
When running pjdfstest, you must disable the JuiceFS trash, because the test deletes files directly rather than moving them to the trash. The JuiceFS trash is enabled by default. To disable it, run `juicefs config <meta-url> --trash-days 0`.
:::

Besides the features covered by pjdfstest, JuiceFS provides:

- Close-to-open consistency. It ensures that once a file is written and closed, the written data is accessible in the following open and read operations. Within the same mount point, all written data can be read immediately.
- Rename and all other metadata operations are atomic, guaranteed by the transactional nature of metadata engines.
- Open files remain accessible after being unlinked from the same mount point.
- Mmap (tested with FSx).
- Fallocate with punch hole support.
- Extended attributes (xattr).
- BSD locks (flock).
- POSIX traditional record locks (fcntl).

:::note
POSIX record locks are classified as **traditional locks** ("process-associated") and **OFD locks** (open file description locks). Their locking operation commands are `F_SETLK` and `F_OFD_SETLK` respectively. Due to the implementation of the FUSE kernel module, JuiceFS currently only supports traditional record locks. More details can be found at: [https://man7.org/linux/man-pages/man2/fcntl.2.html](https://man7.org/linux/man-pages/man2/fcntl.2.html).
:::

## LTP

LTP is a joint project developed and maintained by IBM, Cisco, Fujitsu, and others.

> The project goal is to deliver tests to the open source community that validates the reliability, robustness, and stability of Linux.
>
> The LTP testsuite contains a collection of tools for testing the Linux kernel and related features. Our goal is to improve the Linux kernel and system libraries by bringing test automation to the testing effort.

JuiceFS passed most of the file system related tests.

### Test environment

- Host: Amazon EC2: c5d.xlarge (4C 8G)
- OS: Ubuntu 20.04.1 LTS (Kernel `5.4.0-1029-aws`)
- Object storage: Amazon S3
- JuiceFS version: 0.17-dev (2021-09-16 292f2b65)

### Test steps

1. Download the LTP [release](https://github.com/linux-test-project/ltp/releases/download/20210524/ltp-full-20210524.tar.bz2) from GitHub.
2. Unarchive, compile, and install LTP:

   ```bash
   tar -jvxf ltp-full-20210524.tar.bz2
   cd ltp-full-20210524
   ./configure
   make all
   make install
   ```

3. Change the directory to `/opt/ltp` where the test tools are installed:

   ```bash
   cd /opt/ltp
   ```

   The test definition files are located under `runtest`. To speed up testing, we delete some pressure cases and unrelated cases in `fs` and `syscalls` (refer to [Appendix](#appendix), modified files are saved as `fs-jfs` and `syscalls-jfs`), then execute:

   ```bash
   ./runltp -d /mnt/jfs -f fs_bind,fs_perms_simple,fsx,io,smoketest,fs-jfs,syscalls-jfs
   ```

### Test result

```bash
Testcase                                           Result     Exit Value
--------                                           ------     ----------
fcntl17                                            FAIL       7
fcntl17_64                                         FAIL       7
getxattr05                                         CONF       32
ioctl_loop05                                       FAIL       4
ioctl_ns07                                         FAIL       1
lseek11                                            CONF       32
open14                                             CONF       32
openat03                                           CONF       32
setxattr03                                         FAIL       6

-----------------------------------------------
Total Tests: 1270
Total Skipped Tests: 4
Total Failures: 5
Kernel Version: 5.4.0-1029-aws
Machine Architecture: x86_64
```

Here are causes of the skipped and failed tests:

- fcntl17, fcntl17_64: These tests require the file system to automatically detect deadlocks when trying to add POSIX locks. JuiceFS does not support it yet.
- getxattr05: This test requires extended ACLs, which are not yet supported by JuiceFS.
- ioctl_loop05, ioctl_ns07, setxattr03: These tests require `ioctl`, which is not yet supported by JuiceFS.
- lseek11: This test requires `lseek` to handle `SEEK_DATA` and `SEEK_HOLE` flags. JuiceFS uses a kernel general function, which does not support these two flags.
- open14, openat03: These tests require `open` to handle the `O_TMPFILE` flag. It is not supported by FUSE and thus not by JuiceFS.

### Appendix

Here are deleted cases in `fs` and `syscalls`:

```bash
# fs --> fs-jfs
gf01 growfiles -W gf01 -b -e 1 -u -i 0 -L 20 -w -C 1 -l -I r -T 10 -f glseek20 -S 2 -d $TMPDIR
gf02 growfiles -W gf02 -b -e 1 -L 10 -i 100 -I p -S 2 -u -f gf03_ -d $TMPDIR
gf03 growfiles -W gf03 -b -e 1 -g 1 -i 1 -S 150 -u -f gf05_ -d $TMPDIR
gf04 growfiles -W gf04 -b -e 1 -g 4090 -i 500 -t 39000 -u -f gf06_ -d $TMPDIR
gf05 growfiles -W gf05 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -u -f gf07_ -d $TMPDIR
gf06 growfiles -W gf06 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -f g_rand10 -S 2 -d $TMPDIR
gf07 growfiles -W gf07 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -I p -f g_rand13 -S 2 -d $TMPDIR
gf08 growfiles -W gf08 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -f g_rand11 -S 2 -d $TMPDIR
gf09 growfiles -W gf09 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -I p -f g_rand12 -S 2 -d $TMPDIR
gf10 growfiles -W gf10 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I l -f g_lio14 -S 2 -d $TMPDIR
gf11 growfiles -W gf11 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I L -f g_lio15 -S 2 -d $TMPDIR
gf12 mkfifo $TMPDIR/gffifo17; growfiles -b -W gf12 -e 1 -u -i 0 -L 30 $TMPDIR/gffifo17
gf13 mkfifo $TMPDIR/gffifo18; growfiles -b -W gf13 -e 1 -u -i 0 -L 30 -I r -r 1-4096 $TMPDIR/gffifo18
gf14 growfiles -W gf14 -b -e 1 -u -i 0 -L 20 -w -l -C 1 -T 10 -f glseek19 -S 2 -d $TMPDIR
gf15 growfiles -W gf15 -b -e 1 -u -r 1-49600 -I r -u -i 0 -L 120 -f Lgfile1 -d $TMPDIR
gf16 growfiles -W gf16 -b -e 1 -i 0 -L 120 -u -g 4090 -T 101 -t 408990 -l -C 10 -c 1000 -S 10 -f Lgf02_ -d $TMPDIR
gf17 growfiles -W gf17 -b -e 1 -i 0 -L 120 -u -g 5000 -T 101 -t 499990 -l -C 10 -c 1000 -S 10 -f Lgf03_ -d $TMPDIR
gf18 growfiles -W gf18 -b -e 1 -i 0 -L 120 -w -u -r 10-5000 -I r -l -S 2 -f Lgf04_ -d $TMPDIR
gf19 growfiles -W gf19 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -o O_RDWR,O_CREAT,O_TRUNC -u -f gf08i_ -d $TMPDIR
gf20 growfiles -W gf20 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1-256000:512 -R 512-256000 -T 4 -f gfbigio-$$ -d $TMPDIR
gf21 growfiles -W gf21 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bld-$$ -d $TMPDIR
gf22 growfiles -W gf22 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bldf-$$ -d $TMPDIR
gf23 growfiles -W gf23 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 512-64000:1024 -R 1-384000 -T 4 -f gf-inf-$$ -d $TMPDIR
gf24 growfiles -W gf24 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -f gf-jbld-$$ -d $TMPDIR
gf25 growfiles -W gf25 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1024000-2048000:2048 -R 4095-2048000 -T 1 -f gf-large-gs-$$ -d $TMPDIR
gf26 growfiles -W gf26 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 128-32768:128 -R 512-64000 -T 4 -f gfsmallio-$$ -d $TMPDIR
gf27 growfiles -W gf27 -b -D 0 -w -g 8b -C 1 -b -i 1000 -u -f gfsparse-1-$$ -d $TMPDIR
gf28 growfiles -W gf28 -b -D 0 -w -g 16b -C 1 -b -i 1000 -u -f gfsparse-2-$$ -d $TMPDIR
gf29 growfiles -W gf29 -b -D 0 -r 1-4096 -R 0-33554432 -i 0 -L 60 -C 1 -u -f gfsparse-3-$$ -d $TMPDIR
gf30 growfiles -W gf30 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -o O_RDWR,O_CREAT,O_SYNC -g 20480 -T 10 -t 20480 -f gf-sync-$$ -d $TMPDIR
rwtest01 export LTPROOT; rwtest -N rwtest01 -c -q -i 60s  -f sync 10%25000:$TMPDIR/rw-sync-$$
rwtest02 export LTPROOT; rwtest -N rwtest02 -c -q -i 60s  -f buffered 10%25000:$TMPDIR/rw-buffered-$$
rwtest03 export LTPROOT; rwtest -N rwtest03 -c -q -i 60s -n 2  -f buffered -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-buff-$$
rwtest04 export LTPROOT; rwtest -N rwtest04 -c -q -i 60s -n 2  -f sync -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-sync-$$
rwtest05 export LTPROOT; rwtest -N rwtest05 -c -q -i 50 -T 64b 500b:$TMPDIR/rwtest01%f
iogen01 export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$
quota_remount_test01 quota_remount_test01.sh
isofs isofs.sh

# syscalls --> syscalls-jfs
bpf_prog05 bpf_prog05
cacheflush01 cacheflush01
chown01_16 chown01_16
chown02_16 chown02_16
chown03_16 chown03_16
chown04_16 chown04_16
chown05_16 chown05_16
clock_nanosleep03 clock_nanosleep03
clock_gettime03 clock_gettime03
leapsec01 leapsec01
close_range01 close_range01
close_range02 close_range02
fallocate06 fallocate06
fchown01_16 fchown01_16
fchown02_16 fchown02_16
fchown03_16 fchown03_16
fchown04_16 fchown04_16
fchown05_16 fchown05_16
fcntl06 fcntl06
fcntl06_64 fcntl06_64
getegid01_16 getegid01_16
getegid02_16 getegid02_16
geteuid01_16 geteuid01_16
geteuid02_16 geteuid02_16
getgid01_16 getgid01_16
getgid03_16 getgid03_16
getgroups01_16 getgroups01_16
getgroups03_16 getgroups03_16
getresgid01_16 getresgid01_16
getresgid02_16 getresgid02_16
getresgid03_16 getresgid03_16
getresuid01_16 getresuid01_16
getresuid02_16 getresuid02_16
getresuid03_16 getresuid03_16
getrusage04 getrusage04
getuid01_16 getuid01_16
getuid03_16 getuid03_16
ioctl_sg01 ioctl_sg01
fanotify16 fanotify16
fanotify18 fanotify18
fanotify19 fanotify19
lchown01_16 lchown01_16
lchown02_16 lchown02_16
lchown03_16 lchown03_16
mbind02 mbind02
mbind03 mbind03
mbind04 mbind04
migrate_pages02 migrate_pages02
migrate_pages03 migrate_pages03
modify_ldt01 modify_ldt01
modify_ldt02 modify_ldt02
modify_ldt03 modify_ldt03
move_pages01 move_pages01
move_pages02 move_pages02
move_pages03 move_pages03
move_pages04 move_pages04
move_pages05 move_pages05
move_pages06 move_pages06
move_pages07 move_pages07
move_pages09 move_pages09
move_pages10 move_pages10
move_pages11 move_pages11
move_pages12 move_pages12
msgctl05 msgctl05
msgstress04 msgstress04
openat201 openat201
openat202 openat202
openat203 openat203
madvise06 madvise06
madvise09 madvise09
ptrace04 ptrace04
quotactl01 quotactl01
quotactl04 quotactl04
quotactl06 quotactl06
readdir21 readdir21
recvmsg03 recvmsg03
sbrk03 sbrk03
semctl08 semctl08
semctl09 semctl09
set_mempolicy01 set_mempolicy01
set_mempolicy02 set_mempolicy02
set_mempolicy03 set_mempolicy03
set_mempolicy04 set_mempolicy04
set_thread_area01 set_thread_area01
setfsgid01_16 setfsgid01_16
setfsgid02_16 setfsgid02_16
setfsgid03_16 setfsgid03_16
setfsuid01_16 setfsuid01_16
setfsuid02_16 setfsuid02_16
setfsuid03_16 setfsuid03_16
setfsuid04_16 setfsuid04_16
setgid01_16 setgid01_16
setgid02_16 setgid02_16
setgid03_16 setgid03_16
sgetmask01 sgetmask01
setgroups01_16 setgroups01_16
setgroups02_16 setgroups02_16
setgroups03_16 setgroups03_16
setgroups04_16 setgroups04_16
setregid01_16 setregid01_16
setregid02_16 setregid02_16
setregid03_16 setregid03_16
setregid04_16 setregid04_16
setresgid01_16 setresgid01_16
setresgid02_16 setresgid02_16
setresgid03_16 setresgid03_16
setresgid04_16 setresgid04_16
setresuid01_16 setresuid01_16
setresuid02_16 setresuid02_16
setresuid03_16 setresuid03_16
setresuid04_16 setresuid04_16
setresuid05_16 setresuid05_16
setreuid01_16 setreuid01_16
setreuid02_16 setreuid02_16
setreuid03_16 setreuid03_16
setreuid04_16 setreuid04_16
setreuid05_16 setreuid05_16
setreuid06_16 setreuid06_16
setreuid07_16 setreuid07_16
setuid01_16 setuid01_16
setuid03_16 setuid03_16
setuid04_16 setuid04_16
shmctl06 shmctl06
socketcall01 socketcall01
socketcall02 socketcall02
socketcall03 socketcall03
ssetmask01 ssetmask01
swapoff01 swapoff01
swapoff02 swapoff02
swapon01 swapon01
swapon02 swapon02
swapon03 swapon03
switch01 endian_switch01
sysinfo03 sysinfo03
timerfd04 timerfd04
perf_event_open02 perf_event_open02
statx07 statx07
io_uring02 io_uring02
```


================================================
FILE: docs/en/reference/redis-csc.md
================================================
# Redis Client-Side Caching Support in JuiceFS

Starting with version 6.0, Redis provides [Client-Side Caching](https://redis.io/docs/latest/develop/reference/client-side-caching) which allows clients to maintain local caches of data in a faster and more efficient way. JuiceFS includes full support for this feature, offering significant performance improvements for metadata operations.

## How it works

Redis Client-Side Caching (CSC) works by:

1. The client enables tracking mode with `CLIENT TRACKING ON BCAST`
2. The client caches data locally after reading it from Redis
3. Redis notifies the client when cached keys are modified by any client
4. The client invalidates those keys in its local cache

This results in reduced network traffic, lower latency, and higher throughput.

## Configuration

JuiceFS supports Redis CSC through the following options in the metadata URL:

```shell
--meta-url="redis://localhost/1?client-cache=true" # Enable client-side caching (always BCAST mode) 
--meta-url="redis://localhost/1?client-cache=true&client-cache-size=500" # Set cache size (default 12800) 
--meta-url="redis://localhost/1?client-cache=true&client-cache-expire=60s" # Set cache expiration (default: 60s)
```

### Options

- `client-cache`: Enables client-side caching in BCAST mode (set to any value except "false")
- `client-cache-size`: Maximum cache size (default: 12800)
- `client-cache-expire`: Cache expiration time (default: 60s)
- `client-cache-preload`: Number of file objects under the root directory preloaded after mounting. (default: 0)

When client-side caching is enabled, JuiceFS caches:

1. **Inode attributes**: File/directory metadata like permissions, size, timestamps
2. **Directory entries**: Name to inode mappings for faster lookups

> **Note:** Redis Client Side Cache requires Redis server version 6.0 or higher. Using this feature with older Redis versions will result in errors.

### Preloading Cache

When client-side caching is enabled and `client-cache-preload` is set, JuiceFS will preload the file-object attributes and entries under the root directory after mounting. This lazy preloading happens in the background and helps to:

1. Warm up the cache for common operations
2. Reduce latency for initial file system operations
3. Provide better performance from the moment the file system is mounted

The preloading process intelligently prioritizes the most important inodes by:

1. Starting with the root directory
2. Loading the most frequently accessed top-level directories and files
3. Recursively exploring important subdirectories

The preloading process runs in a background goroutine with fail-safe mechanisms and won't block or affect normal file system operations.

## Modes

JuiceFS uses BCAST mode for simplicity and reliability:

- **BCAST mode**: All keys accessed by the client are tracked and notifications are sent for any changes.

BCAST mode provides the simplest implementation while ensuring cache coherence across all clients.

## Requirements

- Redis server version 6.0 or higher
- JuiceFS with CSC support enabled

## Performance Considerations

1. The default 12800 cache size should be sufficient for most workloads
2. For very large filesystems with millions of files, you may benefit from increasing the cache size
3. The cache is most effective for metadata-heavy workloads with many repeated operations
4. For very write-heavy workloads, consider disabling CSC as invalidation traffic may offset benefits

## Troubleshooting

If you experience crashes or instability with CSC enabled:

1. Update to the latest JuiceFS version which contains important fixes for CSC
2. Try reducing the cache size with `client-cache-size`
3. Check Redis server logs for any memory or client tracking issues
4. Make sure your Redis server version is 6.0 or higher
5. If problems persist, disable CSC by removing the `client-cache` parameter

JuiceFS includes robust error handling for various Redis CSC-specific responses to ensure stable operation even when Redis sends unexpected response formats due to client tracking.

## References

- [Redis Client-Side Caching Documentation](https://redis.io/docs/latest/develop/reference/client-side-caching)


================================================
FILE: docs/en/reference/spec-limits.md
================================================
---
sidebar_position: 7
---

# Specification Limits

## File System Limits

Below are theoretical limits for JuiceFS, in real use, performance and file system size will be limited by the metadata engine and object storage of your choice.

* Directory tree depth: unlimited
* File name length: 255 Bytes
* Symbolic link length: 4096 Bytes
* Number of hard links: 2^31
* Number of files in single directory: 2^31
* Number of files in a single volume: unlimited
* Single file size: 2^(26+31)
* Total file size: 4EiB


================================================
FILE: docs/en/release_notes.md
================================================
# Release Notes

:::tip
For all versions, please see [GitHub Releases](https://github.com/juicedata/juicefs/releases).
:::

## Version number {#version-number}

JuiceFS Community Edition uses [semantic versioning](https://semver.org) to label its releases. Each version number consists of three numbers in the format `x.y.z`, representing the major version number (x), the minor version number (y), and the patch number (z).

1. **Major version number (x)**: When the major version number is greater than or equal to `1`, it indicates that the version is suitable for production environments. When the major version number changes, it indicates that this version may have added major features, architectural changes, or data format changes that are not backward compatible. For example, `v0.8.3` → `v1.0.0` means production-ready, `v1.0.0` → `v2.0.0` represents an architectural or functional change.
2. **Minor version number (y)**: The minor version number indicates that the version adds some new features, performance optimizations, bug fixes, etc. that can be backward compatible. For example, `v1.0.0` → `v1.1.0`.
3. **Patch version number (z)**: The patch version number indicates a minor update or bug fix for the software, which is only some minor changes or fixes to existing features and will not affect the compatibility of the softwares. For example, `v1.0.3` → `v1.0.4`.

## Upgrade {#changes}

JuiceFS client has only one binary file, so usually you only need to replace the old binary with the new one when upgrading JuiceFS.

### JuiceFS v1.1

:::tip
If you are using JuiceFS version prior to v1.0, please [upgrade to v1.0](#juicefs-v10) first.
:::

In v1.1 (specifically, v1.1.0-beta2) JuiceFS added [**Directory Statistics**](https://juicefs.com/docs/community/guide/dir-stats) and [**Directory Quota**](https://juicefs.com/docs/community/guide/quota#directory-quota). These two features were not available in older versions of the client, and writing with the old client when they were turned on would result in large deviations in the statistics. When upgrading to v1.1, if you do not intend to enable these two new features, you can simply replace the client without additional action. If you do, it is recommended that you read the following content before upgrading.

#### Default configuration

The default configurations for these two features are:

- For newly created filesystems they are automatically enabled.

- For existing filesystems, they are disabled.
  - Directory statistics can be enabled independently by `juicefs config` command.
  - When setting directory quotas the directory statistics will be enabled automatically.

#### Recommended Upgrade Steps

1. Upgrade all client binaries to v1.1 version.
2. Deny re-connections from versions prior to v1.1: `juicefs config META-URL --min-client-version 1.1.0-A`.
3. Restart the service at a proper time (remount, restart gateway, etc.)
4. Make sure that all online clients are version v1.1 or higher: `juicefs status META-URL | grep -w Version`
5. Enable the new features, see [Directory Statistics](https://juicefs.com/docs/community/guide/dir-stats) and [Directory Quota](https://juicefs.com/docs/community/guide/quota#directory-quota).

### JuiceFS v1.0

JuiceFS has two compatibility changes in version v1.0 (specifically, v1.0.0-beta3). If you are using an older version of the client, it is recommended that you read the following content before upgrading.

#### SQL: Update table schema to support encoding other than UTF-8

JuiceFS v1.0 has changed the table schema to support encoding other than UTF-8. For existing file systems, you need to upgrade the table schema manually to support that. It's recommended to upgrade all clients first and then the table schema.

:::note
Table schema upgrades are optional, and they are required only if you need to use non-UTF-8 characters. In addition, database performance may degrade when upgrading SQL table schemas, affecting running services.
:::

##### MySQL/MariaDB

```sql
alter table jfs_edge
    modify name varbinary(255) not null;
alter table jfs_symlink
    modify target varbinary(4096) not null;
```

##### PostgreSQL

```sql
alter table jfs_edge
    alter column name type bytea using name::bytea;
alter table jfs_symlink
    alter column target type bytea using target::bytea;
```

##### SQLite

SQLite does not support modifying columns, but you can migrate columns by `dump` and `load` commands, refer to [JuiceFS Metadata Backup and Recovery](administration/metadata_dump_load.md) for details.

#### New session management format

JuiceFS v1.0 uses a new session management format. The previous versions of clients cannot see the sessions generated by v1.0 clients via `juicefs status` or `juicefs destroy`, whereas the new versions are able to see all the sessions.


================================================
FILE: docs/en/security/encryption.md
================================================
---
sidebar_position: 1
---
# Data Encryption

JuiceFS provides data encryption from two aspects:

1. Data Encryption In Transit
2. Data Encryption At Rest

## Data Encryption In Transit {#in-transit}

Running JuiceFS generally involves the network connection between database and object storage, which is determined by the architecture of JuiceFS. As long as the servers support encryption connections, JuiceFS can be accessed through the encrypted channel.

### Connect to object storage via HTTPS

Public cloud object storage generally supports both HTTP and HTTPS. If no scheme is specified, JuiceFS uses HTTPS by default. For example, the client will identify the bucket in following command as `https://myjfs.s3.ap-southeast-1.amazonaws.com`.

```shell {2}
juicefs format --storage s3 \
  --bucket myjfs.s3.ap-southeast-1.amazonaws.com \
  ...
```

With the above command, the client will recognize the bucket as `https://myjfs.s3.ap-southeast-1.amazonaws.com`.

In the case where server and object storage run on the same VPC network, explicitly set the URL scheme to `http` if you don't need an encrypted connection, e.g., `--bucket http://myjfs.s3.ap-southeast-1.amazonaws.com`.

### Connect to database via TLS/SSL

For [all the supported metadata engines](../reference/how_to_set_up_metadata_engine.md), as long as the database supports encryption and has been configured with encryption such as TLS/SSL, JuiceFS can connect to the database through its encrypted channel. For instance, a Redis database configured with TLS can use `rediss://` for connecting.

```shell {3}
juicefs format --storage s3 \
  --bucket myjfs.s3.ap-southeast-1.amazonaws.com \
  "rediss://myredis.ap-southeast-1.amazonaws.com:6379/1" myjfs
```

## Data Encryption At Rest {#at-rest}

JuiceFS provides Data Encryption At Rest support, which encrypts first, then uploads. All files stored in JuiceFS will be encrypted locally and then uploaded to object storage, effectively preventing data leakage when the object storage itself is compromised.

JuiceFS Data Encryption At Rest adopts a hybrid encryption architecture: symmetric encryption handles data encryption, while asymmetric encryption handles key protection. You only need to provide an private key when creating the file system to enable data encryption functionality, and provide the private key password through the `JFS_RSA_PASSPHRASE` environment variable. In usage, the mount point is completely transparent to applications, meaning the encryption and decryption processes will not affect file system access.

:::caution
The cached data on the client-side is **NOT** encrypted. Only the root user or owner can access this data. To encrypt the cached data, you can put the cached directory in an encrypted file system or block storage.
:::

### Encryption Principles

#### Encryption Architecture Design

JuiceFS adopts a **hybrid encryption architecture** with two encryption layers:

1. **Data Encryption Layer** (Symmetric Encryption - AES-256-GCM or ChaCha20-Poly1305 or SM4-GCM)
   - **Purpose**: Actually encrypts user data content
   - **Mechanism**: Each block generates a unique symmetric key `S` + random seed `N` (both use 256-bit keys)
   - **Advantage**: Both AES-256-GCM and ChaCha20-Poly1305 provide high-speed encryption and integrity verification (AEAD)
   - **Standard**: 256-bit key strength complies with NIST security standards, ChaCha20-Poly1305 is an RFC 8439 standard algorithm

2. **Key Protection Layer** (Asymmetric Encryption)
   - **Purpose**: Protects the secure distribution and storage of symmetric keys
   - **Mechanism**: Uses private key `M` to encrypt each data block's symmetric key `S`
   - **Advantage**: Solves key distribution challenges and avoids key reuse risks
   - **Scheme**: Supports private keys in PKCS#1 or PKCS#8 formats.

Users need to create a global private key `M` for the file system in advance. Each object stored in the object storage will have its own random symmetric key `S`.

Symbol explanation:

- `M` represents private key created by user
- `S` represents 256-bit symmetric key generated by the JuiceFS for each file object
- `N` represents random seed generated by the JuiceFS for each file object
- `K` represents the cipher text of `S` encrypted with private key `M`

![Encryption At-rest](../images/encryption.png)

#### Data Encryption Process

- Before writing to object storage, data blocks are compressed using LZ4 or Zstandard.
- A random 256-bit symmetric key `S` and a random seed `N` are generated for each data block.
- Each data block is encrypted into `encrypted_data` using AES-256-GCM or ChaCha20-Poly1305 or SM4-GCM algorithm with key `S` and seed `N`.
- To avoid the symmetric key `S` from being transmitted in clear text over the network, the symmetric key `S` is encrypted into the cipher text `K` with the RSA private key `M`.
- The encrypted data `encrypted_data`, the ciphertext `K`, and the random seed `N` are combined into an object and then written to the object storage.

#### Data Decryption Process

- Read the entire encrypted object (it may be a bit larger than 4MB).
- Parse the object data to get the ciphertext `K`, the random seed `N`, and the encrypted data `encrypted_data`.
- Decrypt `K` with private key to get symmetric key `S`.
- Decrypt the data `encrypted_data` based on AES-256-GCM or ChaCha20-Poly1305 or SM4-GCM using `S` and `N` to get the data block plaintext.
- Decompress the data block.

### Enable Data Encryption At Rest

:::note
Data Encryption At Rest must be enabled when creating file system. The file system that was created without Data Encryption At Rest enabled cannot enable it later.
:::

The steps to enable Data Encryption At Rest are:

1. Create a private key
2. Create an encrypted file system using the private key
3. Mount the file system

#### Step 1: Create a private key

The private key is crucial for Data Encryption At Rest and is generally manually generated using OpenSSL. The following command will generate a 2048-bit RSA private key named `my-priv-key.pem` in the current directory using the aes256 algorithm:

```shell
openssl genrsa -out my-priv-key.pem -aes256 2048
```

Since the `aes256` encryption algorithm is used, the command line will require you to provide a `Passphrase` of at least 4 characters for this private key. You can simply think of it as a password used to encrypt the RSA private key file itself, which is also the last security safeguard for the RSA private key file.

:::caution Special Attention
The security of the private key is extremely important, and special attention needs to be paid to the following points:

- **Passphrase Leakage Risk**: If the private key's passphrase is leaked, attackers may decrypt the private key stored in the metadata engine, thereby jeopardizing the security of all encrypted data
- **Private Key File Leakage**: If the encrypted private key file itself is leaked along with the passphrase, it will lead to serious security risks
- **Data Irrecoverability**: If the correct passphrase cannot be provided to access the private key stored in the metadata engine, **all encrypted data will be permanently lost and unrecoverable**

It is recommended to focus on protecting the security of the passphrase and pass it through environment variables to avoid leakage in command line history.
:::

#### Step 2: Create an encrypted file system

Creating an encrypted file system requires using the `--encrypt-rsa-key` option to specify the private key. The provided private key content will be written to the metadata engine. You need to use the environment variable `JFS_RSA_PASSPHRASE` to specify the private key's passphrase.

JuiceFS supports two encryption algorithm combinations, which can be specified via the `--encrypt-algo` option:

- `aes256gcm-rsa` (default): Uses AES-256-GCM + RSA (or other private key)
- `chacha20-rsa`: Uses ChaCha20-Poly1305 + RSA (or other private key)
- `sm4gcm`: Uses SM4-GCM + SM2 (or other private key)

1. Set passphrase using environment variable

    ```shell
    export JFS_RSA_PASSPHRASE=the-passwd-for-rsa
    ```

2. Create file system (using default AES-256-GCM encryption)

    ```shell {2}
    juicefs format --storage s3 \
      --encrypt-rsa-key my-priv-key.pem \
      ...
    ```

    Or explicitly specify ChaCha20-Poly1305 encryption:

    ```shell {2,3}
    juicefs format --storage s3 \
      --encrypt-rsa-key my-priv-key.pem \
      --encrypt-algo chacha20-rsa \
      ...
    ```

3. (Optional) Delete local private key file

   JuiceFS securely stores the private key content in the metadata engine during file system formatting. Therefore, after completing file system creation (unless there are specific compliance requirements), we recommend deleting your local private key file:

   ```shell
   rm my-priv-key.pem
   ```

   This way, you only need to ensure the security of the `JFS_RSA_PASSPHRASE` environment variable, and subsequent file system mounting and access only require providing the correct passphrase.

   If you need to retain the private key file due to compliance requirements or other reasons, please ensure the private key file is stored in a secure location with strict access permissions, and keep the private key file and passphrase separately.

#### Step 3: Mount file system

There is no need to specify extra options while mounting an encrypted file system. However, the passphrase of the private key needs to be set before mounting using environment variable.

1. Set passphrase using environment variable

    ```shell
    export JFS_RSA_PASSPHRASE=the-passwd-for-rsa
    ```

2. Mount file system

    ```shell
    juicefs mount redis://127.0.0.1:6379/1 /mnt/myjfs
    ```

### Performance Considerations

Enabling encryption does introduce some performance overhead, but modern hardware technologies have made this impact quite manageable. The specific performance impact depends on workload type, hardware configuration (particularly CPU encryption instruction set support), and data access patterns.

Modern CPUs have specialized hardware optimizations for TLS, HTTPS, and AES-256 encryption technologies. In particular, modern Intel and AMD processors include AES-NI instruction sets that can perform AES encryption operations at near-native speeds, significantly reducing the performance impact of data encryption.

#### Encryption Algorithm Selection Recommendations

**AES-256-GCM** (default choice):

- Excellent performance on modern CPUs with AES-NI instruction set support
- Widely supported and validated industry standard
- Suitable for most production environments

**ChaCha20-Poly1305**:

- May provide better performance on CPUs without AES-NI support
- Suitable for ARM architectures or older x86 processors
- Better resistance against timing attacks
- Preferred algorithm by companies like Google for mobile devices and certain server environments

When selecting encryption keys, we recommend using RSA-2048 keys, which provide a good balance between security strength and performance. RSA-4096 provides higher security, but its decryption operations are slower and may impact performance in high-concurrency read scenarios.

It's worth mentioning that encrypted data will be slightly larger than the original data, primarily because both AES-256-GCM and ChaCha20-Poly1305 encryption algorithms require adding authentication tags (16 bytes) and other encryption metadata.

### Security Best Practices

The security of an encryption scheme depends not only on the algorithms themselves but also on how encryption keys are properly managed and used. Here are some important security practice recommendations:

**Key management is at the core of security**. The passphrase you set for your private key should be strong enough—we recommend using at least 16 characters with a combination of uppercase and lowercase letters, numbers, and special symbols. We recommend passing the passphrase through environment variables to avoid leakage in command line history.

While regularly rotating keys is a good practice, it's important to note that changing private keys requires reformatting the entire file system. Therefore, when planning key rotation strategies, you need to balance security requirements with business continuity.

**Access control is equally important**. Ensure your metadata engine (whether Redis, MySQL, or another database) is configured with appropriate authentication and authorization mechanisms. Object storage access permissions should also follow the principle of least privilege, granting only necessary operational permissions.

At the network level, try to use VPC or private networks to isolate communication traffic between the metadata engine and object storage, reducing the risk of man-in-the-middle attacks.

**Monitoring and auditing** can help you detect abnormal situations promptly. We recommend logging all encryption-related operations, regularly checking key usage patterns, and establishing abnormal access detection mechanisms. This way, even if a security incident occurs, you can respond quickly and take appropriate measures.

### Important Considerations

When using JuiceFS encryption features, there are several important technical limitations to be aware of:

First, client-side local cached data is **NOT encrypted**. Although only root users or file owners can access this cached data, if your use case requires end-to-end full encryption, you'll need to consider additional protection measures, such as placing the cache directory on an encrypted file system or block storage.

Secondly, encryption functionality has some inherent limitations. File metadata (such as filenames, sizes, permissions, etc.) is not encrypted, and decrypted data exists in plaintext in memory. Most importantly, once encryption is enabled for a file system, it cannot be turned off—encryption is an irreversible operation.

In deployment planning, please consider that encryption brings additional CPU and memory overhead. To ensure optimal compatibility and stability, we recommend that all clients accessing encrypted file systems use the same or compatible versions of JuiceFS.

### Usage Scenario Analysis

JuiceFS encryption features are particularly suitable for these scenarios: protecting sensitive data in cloud object storage, meeting compliance requirements such as GDPR and HIPAA, long-term secure storage of important business data, and achieving data isolation in multi-tenant environments.

However, if you need client-side local cache encryption, or want to add encryption functionality to existing file systems later, this solution may not be suitable. Similarly, for applications with extremely demanding performance requirements, or scenarios that require frequent key rotation but cannot accept reformatting, careful consideration is needed.


================================================
FILE: docs/en/security/posix_acl.md
================================================
---
title: POSIX ACLs
description: Learn about POSIX ACL support in JuiceFS and how to enable and use ACL permissions.
sidebar_position: 1
---

POSIX ACLs (Portable Operating System Interface for Unix - Access Control Lists) are an access control mechanism in Unix-like operating systems that allows for finer-grained control over file and directory access permissions.

This document introduces how to enable and use POSIX ACL permissions in JuiceFS.

## Versions and compatibility requirements

* Since version 1.2, JuiceFS has supported POSIX ACLs.
* All client versions can mount volumes without ACLs enabled, regardless of their creation by new or old client versions.
* Once ACLs are enabled, they cannot be disabled. Therefore, the `--enable-acl` option is tied to the volume.

:::caution
If you plan to use ACL functionality, it is recommended to upgrade all clients to the latest version to avoid potential issues with older versions affecting the accuracy of ACLs.
:::

## Enable ACLs

As mentioned earlier, you can enable ACLs when creating a new volume or on an existing volume using a new version of the client.

### Create a new volume and enable ACLs

Execute the following command to create a new volume and enable ACLs:

```shell
juicefs format --enable-acl sqlite3://myjfs.db myjfs
```

### Enable ACLs on an existing volume

Use the `config` command to enable ACL functionality on an existing volume:

```
juicefs config --enable-acl sqlite3://myjfs.db
```

## Usage

To set ACL permissions for a file or directory, you can use the `setfacl` command, for example:

```
setfacl -m u:alice:rw- /mnt/jfs/file
```

For detailed rules, guidelines, and implementation of POSIX ACLs, see:

* [POSIX Access Control Lists on Linux](https://www.usenix.org/legacy/publications/library/proceedings/usenix03/tech/freenix03/full_papers/gruenbacher/gruenbacher_html/main.html)
* [setfacl](https://linux.die.net/man/1/setfacl)
* [How We Optimized ACL Implementation for Minimal Performance Impact](https://juicefs.com/en/blog/engineering/access-control-list)

## Notes

* ACL permission checks require [Linux kernel 4.9](https://lkml.iu.edu/hypermail/linux/kernel/1610.0/01531.html) or later.
* Enabling ACLs may impact performance. However, due to memory cache optimization, most usage scenarios experience minimal performance degradation.


================================================
FILE: docs/en/security/trash.md
================================================
---
sidebar_position: 2
---
# Trash

:::note
This feature requires at least JuiceFS v1.0.0, for previous versions, you need to upgrade all JuiceFS clients, and then enable trash using the `config` subcommand, introduced in below sections.
:::

JuiceFS enables the trash feature by default, files deleted will be moved in a hidden directory named `.trash` under the file system root, and kept for specified period of time before expiration. Until actual expiration, file system usage (check using `df -h`) will not change, this is also true with the corresponding object storage data.

When using `juicefs format` command to initialize JuiceFS volume, users are allowed to specify `--trash-days <val>` to set the number of days which files are kept in the `.trash` directory. Within this period, user-removed files are not actually purged, so the file system usage shown in the output of `df` command will not decrease, and the blocks in the object storage will still exist.

To control the expiration settings, use the [`--trash-days`](../reference/command_reference.mdx#format) option which is available for both `juicefs format` and `juicefs config`:

```shell
# Creating a new file system
juicefs format META-URL myjfs --trash-days=7

# Modify an existing file system
juicefs config META-URL --trash-days=7

# Set to 0 to disable Trash
juicefs config META-URL --trash-days=0
```

In addition, the automatic cleaning of the trash relies on the background job of the JuiceFS client. To ensure that the background job can be executed properly, at least one online mount point is required, and the [`--no-bgjob`](../reference/command_reference.mdx#mount-metadata-options) parameter should not be used when mounting the file system.

## Recover files {#recover}

When files are deleted, they will be moved to a directory that takes up the format of `.trash/YYYY-MM-DD-HH/[parent inode]-[file inode]-[file name]`, where `YYYY-MM-DD-HH` is the UTC time of the deletion. You can locate the deleted files and recover them if you remember when they are deleted.

If you have found the desired files in Trash, you can recover them using `mv`:

```shells
mv .trash/2022-11-30-10/[parent inode]-[file inode]-[file name] .
```

Files within the Trash directory lost all their directory structure information, and are stored in a "flatten" style, however the parent directory inode is preserved in the file name, if you have forgotten the file name, look for parent directory inode using [`juicefs info`](../reference/command_reference.mdx#info), and then track down the desired files.

Assuming the mount point being `/jfs`, and you've accidentally deleted `/jfs/data/config.json`, but you cannot directly recover this `config.json` because you've forgotten its name, use the following procedure to locate the parent directory inode, and then locate the corresponding trash files.

```shell
# Use the info subcommand to locate the parent directory inode
juicefs info /jfs/data

# Note the "inode" field in above output, assuming the inode of /jfs/data is 3
# Find all its files within the Trash directory using the find command
find /jfs/.trash -name '3-*'

# Recover all files under that directory
mv /jfs/.trash/2022-11-30-10/3-* /jfs/data
```

Keep in mind that only the root user have write access to the Trash directory, so the method introduced above is only available to the root user. If a normal user happens to have read permission to these deleted files, they can also recover them via a read-only method like `cp`, although this obviously wastes storage capacity.

If you accidentally delete a complicated structured directory, using solely `mv` to recover can be a disaster, for example:

```shell
$ tree data
data
├── app1
│   └── config
│       └── config.json
└── app2
    └── config
        └── config.json

# Delete the above complicated data directory
$ juicefs rmr data

# Files will be flattened inside the Trash directory
$ tree .trash/2023-08-14-05
.trash/2023-08-14-05
├── 1-12-data
├── 12-13-app1
├── 12-15-app2
├── 13-14-config
├── 14-17-config.json
├── 15-16-config
└── 16-18-config.json
```

To resolve such inconvenience, JuiceFS v1.1 provides the [`restore`](../reference/command_reference.mdx#restore) subcommand to quickly restore deleted files, while preserving its original directory structure. Run this procedure as follows:

```shell
# Run the restore command to reconstruct directory structure within the Trash
$ juicefs restore $META_URL 2023-08-14-05

# Preview the rebuilt directory structure, and determine the recovery scope
# You can either recover the entire directory using the below --put-back command, or just a subdir using mv
$ tree .trash/2023-08-14-05
.trash/2023-08-14-05
└── 1-12-data
    ├── app1
    │   └── config
    │       └── config.json
    └── app2
        └── config
            └── config.json

# Add --put-back to recover deleted files
juicefs restore $META_URL 2023-08-14-05 --put-back
```

## Permanently delete files {#purge}

When files in the trash directory reach their expiration time, they will be automatically cleaned up. It is important to note that the file cleaning is performed by the background job of the JuiceFS client, which is scheduled to run every hour by default. Therefore, when there are a large number of expired files, the cleaning speed of the object storage may not be as fast as expected, and it may take some time to see the change in storage capacity.

If you want to permanently delete files before their expiration time, you need to have `root` privileges and use [`juicefs rmr`](../reference/command_reference.mdx#rmr) or the system's built-in `rm` command to delete the files in the `.trash` directory, so that storage space can be immediately released.

For example, to permanently delete a directory in the trash:

```shell
juicefs rmr .trash/2022-11-30-10/
```

If you want to delete expired files more quickly, you can mount multiple mount points to exceed the deletion speed limit of a single client.

## Selectively skipping trash {#skip}

It is possible to skip the trash and permanently delete files directly. The 's' flag using `chattr` can be set on files or directories to enable this feature. When a file or directory has the 's' flag set, the file or directory will be permanently deleted when removed, bypassing the trash. New files or directories created under a directory with the 's' flag will also inherit this behavior. Existing JuiceFS files or directories moved into a directory with the 's' flag set will not inherit the flag.

You will need to enable the mount option `--enable-ioctl` to allow adjusting file attributes using `chattr`.

## Trash and slices {#gc}

Apart from user deleted files, there's another type of data which also resides in Trash, which isn't directly visible from the `.trash` directory, they are stale slices created by file edits and overwrites. Read more in [How JuiceFS stores files](../introduction/architecture.md#how-juicefs-store-files). To sum up, if applications constantly delete or overwrite files, object storage usage will exceed file system usage.

Although stale slices cannot be browsed or manipulated, you can use [`juicefs status`](../reference/command_reference.mdx#status) to observe its scale:

```shell
# The Trash Slices field displayed below is the number of stale slices
$ juicefs status META-URL --more
...
           Trash Files: 0                     0.0/s
           Trash Files: 0.0 b   (0 Bytes)     0.0 b/s
 Pending Deleted Files: 0                     0.0/s
 Pending Deleted Files: 0.0 b   (0 Bytes)     0.0 b/s
          Trash Slices: 27                    26322.2/s
          Trash Slices: 783.0 b (783 Bytes)   753.1 KiB/s
Pending Deleted Slices: 0                     0.0/s
Pending Deleted Slices: 0.0 b   (0 Bytes)     0.0 b/s
...
```

Stale slices are also kept according to the expiration settings, this adds another layer of data security: if files are erroneously edited or overwritten, original state can be recovered through metadata backups (provided that you have already set up metadata backup). If you do need to rollback this type of accident overwrites, you need to obtain a copy of the metadata backup, and then mount using this copy, so that you can visit the file system in its older state, and recover any files before they are tampered. See [Metadata Backup & Recovery](../administration/metadata_dump_load.md) for more.

Due to its invisibility, stale slices can grow to a very large size, if you do need to delete them, follow below procedure:

```shell
# Temporarily disable Trash
juicefs config META-URL --trash-days 0

# Optionally run compaction
juicefs gc --compact

# Purge leaked objects
juicefs gc --delete

# Do not forget to re-enable Trash upon completion
```

## Access privileges {#permission}

All users are allowed to browse the trash directory and see the full list of removed files. However, only root has write privilege to the `.trash` directory. Since JuiceFS keeps the original permission modes even for the trashed files, normal users can read files that they have permission to.

Several caveats on Trash privileges:

* When JuiceFS Client is started by a non-root user, add the `-o allow_root` option or trash cannot be emptied normally.
* The `.trash` directory can only be accessed from the file system root, thus not available for sub-directory mount points.
* User cannot create new files inside the trash directory, and only root are allowed to move or delete files in trash.


================================================
FILE: docs/en/tutorials/aliyun.md
================================================
---
title: Use JuiceFS on Alibaba Cloud
sidebar_position: 7
slug: /clouds/aliyun
description: Learn how to use JuiceFS on Alibaba Cloud.
---

As shown in the figure below, JuiceFS is driven by both the database and the object storage. The files stored in JuiceFS are split into fixed-size data blocks and stored in the object store according to certain rules, while the metadata corresponding to the data is stored in the database.

The metadata is stored completely independently. Retrieval and processing of files do not directly manipulate the data in the object storage. Instead, operations are performed first on the metadata in the database. Interaction with the object storage only occurs when data changes.

This design can effectively reduce the cost of the object storage in terms of the number of requests. It also allows users to significantly experience the performance improvement brought by JuiceFS.

![JuiceFS-arch-new](../images/juicefs-aliyun.png)

This document introduces how to use JuiceFS on Alibaba Cloud.

## Preparation

From the previous architecture description, you can know that JuiceFS needs to be used together with database and object storage. Here we directly use the Alibaba Cloud ECS cloud server, combined with cloud database and OSS object storage.

When you create cloud computing resources, try to choose in the same region, so that resources can access each other through intranet and avoid using public network to incur additional traffic costs.

### ECS

JuiceFS has no special requirements for server hardware. Generally speaking, entry-level cloud servers can also use JuiceFS stably. Typically, you just need to choose the one that can meet your own application requirements.

In particular, you do not need to buy a new server or reinstall the system to use JuiceFS. JuiceFS is not application invasive and does not cause any interference with your existing systems and programs. You can install and use JuiceFS on your running server.

By default, JuiceFS takes up 1 GB of hard disk space for caching, and you can adjust the size of the cache space as needed. This cache is a data buffer layer between the client and the object storage. You can get better performance by choosing a cloud drive with better performance.

In terms of operating system, JuiceFS can be installed on all operating systems provided by Alibaba Cloud ECS.

**The ECS specification used in this document are as follows:**

| **Instance specification** | ecs.t5-lc1m1.small         |
| -------------------------- | -------------------------- |
| **CPU**                    | 1 core                     |
| **MEMORY**                 | 1 GB                       |
| **Storage**                | 40 GB                      |
| **OS**                     | Ubuntu Server 20.04 64-bit |
| **Location**               | Shanghai                   |

### Cloud database

JuiceFS stores all the metadata corresponding to the data in a separate database, which currently supports Redis, MySQL, PostgreSQL, SQLite, and OceanBase.

Depending on the database type, the performance and reliability of metadata are different. For example, Redis runs entirely in memory. While it provides the ultimate performance, it is difficult to operate and maintain and has low reliability. SQLite is a single-file relational database with low performance and is not suitable for large-scale data storage. However, it is configuration-free and suitable for a small amount of data storage on a single machine. In contrast, OceanBase is a distributed relational database that delivers high performance while ensuring data consistency and high reliability (RTO < 8 seconds). It is particularly well-suited for scenarios in industries such as finance, retail, and telecommunications, where transactional consistency and distributed capabilities are critical. By integrating with JuiceFS, OceanBase enhances the efficiency, reduces the latency, and improves the stability of handling massive metadata, meeting the demanding requirements of modern distributed storage systems for underlying databases.

If you just want to evaluate the functionality of JuiceFS, you can build the database manually on ECS. If you want to use JuiceFS in a production environment, and you don't have a professional database operation and maintenance team, the cloud database service is usually a better choice.

You can also use cloud database services provided on other platforms if you wish. But in this case, you have to expose the database port to the public network, which may have some security risks.

If you must access the database through the public network, you can enhance the security of your data by strictly limiting the IP addresses that are allowed to access the database through the whitelist feature provided by the cloud database console.

On the other hand, if you cannot successfully connect to the cloud database through the public network, you can check the whitelist of the database.

|    Database     |                          Redis                          |                      MySQL/PostgreSQL                       |                            SQLite                            |                          OceanBase                          |
| :-------------: | :-----------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
| **Performance** |                          High                           |                            Medium                            |                             Low                              |                          High                           |
| **Management**  |                          High                           |                            Medium                            |                             Low                              |                            Medium                            |
| **Reliability** |                           Low                           |                            Medium                            |                             Low                              |                          High                           |
|  **Scenario**   | Massive data, distributed high-frequency reads and writes | Massive data, distributed low- and medium-frequency reads and writes | Low-frequency reads and writes in single machine for small amounts of data | Distributed scenarios, strong transaction consistency, and high reliability requirements |

**This document uses [ApsaraDB for Redis](https://www.alibabacloud.com/product/apsaradb-for-redis), and the following pseudo address is compiled for demonstration purposes only:**

| Redis version              | 5.0 Community Edition                  |
|----------------------------|----------------------------------------|
| **Instance specification** | 256M Standard master-replica instances |
| **Connection address**     | `herald-sh-abc.redis.rds.aliyuncs.com` |
| **Available zone**         | Shanghai                               |

### Object Storage OSS

JuiceFS stores all data in object storage, which supports almost all object storage services. However, to get the best performance, when using Alibaba Cloud ECS, OSS object storage is usually the optimal choice. However, you must choose ECS and OSS buckets in the same region so that they can be accessed through intranet. This has low latency and does not require additional traffic costs.

You can also use object storage services provided by other cloud platforms if you wish, but this is not recommended. This is because accessing object storage from other cloud platforms through ECS needs the public network, and object storage will incur traffic costs. In addition, the access latency will be higher compared to this, which may affect the performance of JuiceFS.

Alibaba Cloud OSS has different storage levels. Since JuiceFS needs to interact with object storage frequently, it is recommended to use standard tier. You can use it with OSS resource pack to reduce the cost of using object storage.

### API access secret key

Alibaba Cloud OSS needs to be accessed through an API. You need to prepare an access key pair, including an AccessKey ID and an AccessKey secret. [Click here](https://www.alibabacloud.com/help/doc-detail/125558.htm) to see how to obtain the access key pair.

> **Security advisory**: Explicit use of the API access secret key may lead to key compromise. It is recommended to assign a [RAM role](https://www.alibabacloud.com/help/doc-detail/110376.htm) to the cloud server. Once an ECS is granted access to the OSS, the API access key is no longer required to access the OSS.

## Installation

We are currently using Ubuntu Server 20.04 64-bit, so you can download the latest version of the client by running the following command:

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

Alternatively, you can choose another version by visiting the [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) page.

Execute the command, and you will see the help message returned by JuiceFS. This means that the client installation was successful.

```shell
$ juicefs
NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   0.15.2 (2021-07-07T05:51:36Z 4c16847)

COMMANDS:
   format   format a volume
   mount    mount a volume
   umount   unmount a volume
   gateway  S3-compatible gateway
   sync     sync between two storage
   rmr      remove directories recursively
   info     show internal information for paths or inodes
   bench    run benchmark to read/write/stat big/small files
   gc       collect any leaked objects
   fsck     Check consistency of file system
   profile  analyze access log
   status   show status of JuiceFS
   warmup   build cache for target directories/files
   dump     dump metadata into a JSON file
   load     load metadata from a previously dumped JSON file
   help, h  Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             only warning and errors (default: false)
   --trace                 enable trace log (default: false)
   --no-agent              disable pprof (:6060) agent (default: false)
   --help, -h              show help (default: false)
   --version, -V           print only the version (default: false)

COPYRIGHT:
   Apache License 2.0
```

JuiceFS has good cross-platform compatibility and supports Linux, Windows, and macOS. This document focuses on installing and using JuiceFS on Linux. For installation instructions on other systems, [check this document](../getting-started/installation.md).

## Create JuiceFS storage

Once the JuiceFS client is installed, you can create the JuiceFS storage using the Redis database and OSS object storage that you prepared earlier.

Technically speaking, this step should be called "Format a volume." However, given that many users may not understand or care about the standard file system terminology, we will refer to the process simply as "Create JuiceFS storage."

The following command creates a storage named `mystor`, which is a file system, using the `format` subcommand provided by the JuiceFS client:

```shell
$ juicefs format \
    --storage oss \
    --bucket https://<your-bucket-name> \
    --access-key <your-access-key-id> \
    --secret-key <your-access-key-secret> \
    redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    mystor
```

**Option description:**

- `--storage`: Specifies the type of object storage. [Click here](../reference/how_to_set_up_object_storage.md) to view the object storage services supported by JuiceFS.
- `--bucket`: Bucket domain name of the object storage. When using OSS, just fill in the bucket name. There is no need to fill in the full domain name. JuiceFS will automatically identify and fill in the complete address.
- `--access-key` and `--secret-key`: The secret key pair to access the object storage API. [Click here](https://www.alibabacloud.com/help/doc-detail/125558.htm) for instructions on obtaining these keys.

> Redis 6.0 authentication requires username and password parameters in the format of `redis://username:password@redis-server-url:6379/1`. Currently, Alibaba Cloud Redis only provides Reids 4.0 and 5.0 versions, which require only a password for authentication. When setting the Redis server address, leave the username empty, like this: `redis://:password@redis-server-url:6379/1`.

When you are using the RAM role to bind to the ECS, you can create JuiceFS storage by specifying `--storage` and `--bucket` without providing the API access key. The command can be rewritten as follows:

```shell
$ juicefs format \
    --storage oss \
    --bucket https://mytest.oss-cn-shanghai.aliyuncs.com \
    redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    mystor
```

A successful creation of the file system will yield output similar to the following:

```shell
2021/07/13 16:37:14.264445 juicefs[22290] <INFO>: Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1
2021/07/13 16:37:14.277632 juicefs[22290] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/07/13 16:37:14.281432 juicefs[22290] <INFO>: Ping redis: 3.609453ms
2021/07/13 16:37:14.527879 juicefs[22290] <INFO>: Data uses oss://mytest/mystor/
2021/07/13 16:37:14.593450 juicefs[22290] <INFO>: Volume is formatted as {Name:mystor UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:oss Bucket:https://mytest340 AccessKey:LTAI4G4v6ioGzQXy56m3XDkG SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

## Mount JuiceFS

When the file system is created, the information related to the object storage is stored in the database. Therefore, you do not need to enter information such as the bucket domain and secret key when mounting.

Use the `mount` subcommand to mount the file system to the `/mnt/jfs` directory.

```shell
sudo juicefs mount -d redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/jfs
```

> **Note**: When mounting the file system, only the Redis database address is required; the file system name is not necessary. The default cache path is `/var/jfsCache`. Make sure the current user has sufficient read/write permissions.

Output similar to the following means that the file system was mounted successfully:

```shell
2021/07/13 16:40:37.088847 juicefs[22307] <INFO>: Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com/1
2021/07/13 16:40:37.101279 juicefs[22307] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/07/13 16:40:37.104870 juicefs[22307] <INFO>: Ping redis: 3.408807ms
2021/07/13 16:40:37.384977 juicefs[22307] <INFO>: Data use oss://mytest/mystor/
2021/07/13 16:40:37.387412 juicefs[22307] <INFO>: Disk cache (/var/jfsCache/4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b/): capacity (1024 MB), free ratio (10%), max pending pages (15)
.2021/07/13 16:40:38.410742 juicefs[22307] <INFO>: OK, mystor is ready at /mnt/jfs
```

You can use the `df` command to see how the file system is mounted:

```shell
$ df -Th
File system      type         capacity used usable used%  mount point
JuiceFS:mystor   fuse.juicefs  1.0P     64K  1.0P    1%   /mnt/jfs
```

After the file system is successfully mounted, you can store data in the `/mnt/jfs` directory as if you were using a local hard drive.

> **Multi-host sharing**: JuiceFS storage supports being mounted by multiple cloud servers at the same time. You can install the JuiceFS client on other could servers and then use the `redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs. com:6379/1` database address to mount the file system on each host.

## File system status

Use the `status` subcommand of the JuiceFS client to view basic information and connection status of a file system.

```shell
$ juicefs status redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1

2021/07/13 16:56:17.143503 juicefs[22415] <INFO>: Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1
2021/07/13 16:56:17.157972 juicefs[22415] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/07/13 16:56:17.161533 juicefs[22415] <INFO>: Ping redis: 3.392906ms
{
  "Setting": {
    "Name": "mystor",
    "UUID": "4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b",
    "Storage": "oss",
    "Bucket": "https://mytest",
    "AccessKey": "<your-access-key-id>",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0
  },
  "Sessions": [
    {
      "Sid": 3,
      "Heartbeat": "2021-07-13T16:55:38+08:00",
      "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)",
      "Hostname": "demo-test-sh",
      "MountPoint": "/mnt/jfs",
      "ProcessID": 22330
    }
  ]
}
```

## Unmount JuiceFS

You can unmount the file system using the `umount` command provided by the JuiceFS client, for example:

```shell
sudo juicefs umount /mnt/jfs
```

> **Note**: Forcelly unmounting a file system in use may result in data corruption or loss. Therefore, proceed with caution.

## Auto-mount on boot

For details on auto-mounting JuiceFS at boot time, see [Mount JuiceFS at Boot Time](../administration/mount_at_boot.md).


================================================
FILE: docs/en/tutorials/aws.md
================================================
---
title: Use JuiceFS on AWS
sidebar_position: 4
slug: /clouds/aws
---

Amazon Web Services (AWS) is a leading global cloud computing platform that offers a wide range of cloud computing services. With its extensive product line, AWS provides flexible options for creating and utilizing JuiceFS file systems.

## Where can JuiceFS be used? {#where-can-juicefs-be-used}

JuiceFS has a rich set of API interfaces. For AWS, JuiceFS can typically be used in the following products:

- **Amazon EC2**: Use by mounting the JuiceFS file system
- **Amazon Elastic Kubernetes Service (EKS)**: Utilizing the JuiceFS CSI Driver
- **Amazon EMR**: Using the JuiceFS Hadoop Java SDK

## Preparation {#preparation}

A JuiceFS file system consists of two parts:

1. **Object Storage**: Used for data storage.
2. **Metadata Engine**: A database used for storing metadata.

Depending on specific requirements, you can choose to use fully managed databases and S3 object storage on AWS, or deploy them on EC2 and EKS by yourself.

:::tip
This article focuses on the method of creating a JuiceFS file system using AWS fully managed services. For self-hosted scenarios, please refer to the ["JuiceFS Supported Metadata Engines"](../reference/how_to_set_up_metadata_engine.md) and ["JuiceFS Supported Object Storage"](../reference/how_to_set_up_object_storage.md) guides, as well as the corresponding program documentation.
:::

### Object storage {#object-storage}

S3 is the object storage service provided by AWS. You can create a bucket in the corresponding region as needed, or authorize the JuiceFS client to automatically create a bucket through [IAM roles](../reference/how_to_set_up_object_storage.md#aksk).

Amazon S3 provides various [storage classes](https://docs.aws.amazon.com/AmazonS3/latest/userguide/storage-class-intro.html), for example:

- **S3 Standard**: Standard storage, suitable for general-purpose storage with frequent data access, offering real-time access with no retrieval costs.
- **S3 Standard-IA**: Infrequent Access (IA) storage, suitable for data that is accessed less frequently but needs to be stored for the long term, offering real-time access with retrieval costs.
- **S3 Glacier**: Archive storage, suitable for data that is rarely accessed and requires retrieval (thawing) before access.

You can set the storage class when creating or mounting the JuiceFS file system, please refer to [documentation](../reference/how_to_set_up_object_storage.md#storage-class) for details. It is recommended to choose the standard storage class first. Although other storage classes may have lower unit storage prices, they often come with minimum storage duration requirements and retrieval costs.

Furthermore, accessing object storage services requires authentication using Access Key (a.k.a. access key ID) and Secret Key (a.k.a. secret access key). You can refer to the document ["Managing access keys for IAM users"](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) for creating the necessary policies. When accessing S3 from an EC2 cloud server, you can also assign an [IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html) to the EC2 instance to enable the S3 API to be called without using access keys.

### Database {#database}

AWS offers various network-based fully managed databases that can be used to build the JuiceFS metadata engine, mainly including:

- **Amazon MemoryDB for Redis** (hereinafter referred to as MemoryDB): A durable Redis in-memory database service that provides extremely fast performance.
- **Amazon RDS**: Fully managed databases such as MariaDB, MySQL, PostgreSQL, and more.

:::note
Although Amazon ElastiCache for Redis (hereinafter referred to as ElastiCache) also provides services compatible with the Redis protocol, compared with MemoryDB, ElastiCache cannot provide "strong consistency guarantee", so MemoryDB is recommended.
:::

## Using JuiceFS on EC2 {#using-juicefs-on-ec2}

### Installing the JuiceFS client {#installing-the-juicefs-client}

Please refer to the [Installation](../getting-started/installation.md) documentation to install the latest JuiceFS Community Edition client based on the operating system used by your EC2 instance.

For example, if you are using a Linux system, you can use the one-liner installation script to automatically install the client:

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

### Creating a File System {#creating-a-file-system}

#### Preparing object storage {#preparing-object-storage}

You can assign an IAM role with [AmazonS3FullAccess](https://docs.aws.amazon.com/AmazonS3/latest/userguide/security-iam-awsmanpol.html#security-iam-awsmanpol-amazons3fullaccess) permission to your EC2 instance, allowing it to create and use S3 Buckets directly without using Access Key and Secret Key.

#### Preparing the database {#preparing-the-database}

Here we take MemoryDB as an example, please refer to ["Redis Best Practices"](../administration/metadata/redis_best_practices.md) and AWS documentation to create a database.

In order to allow EC2 instances to access the Redis cluster, you need to create them in the same VPC or add rules to the security group of the Redis cluster to allow access from the EC2 instance.

:::note
If you are creating a Redis 7.0 version cluster, you will need to install JuiceFS version 1.1 or above on the client side.
:::

#### Formatting file system {#formatting-file-system}

```shell
juicefs format --storage s3 \
  --bucket https://s3.ap-east-1.amazonaws.com/myjfs \
  rediss://clustercfg.myredis.hc79sw.memorydb.ap-east-1.amazonaws.com:6379/1 \
  myjfs
```

### Mounting file system {#mounting-file-system}

```shell
sudo juicefs mount -d \
  rediss://clustercfg.myredis.hc79sw.memorydb.ap-east-1.amazonaws.com:6379/1 \
  /mnt/myjfs
```

To mount and use the file system created by authorizing S3 access through an IAM role from outside of AWS, you will need to use `juicefs config` to add the Access Key and Secret Key for the file system.

```shell
juicefs config \
  --access-key=<your-access-key> \
  --secret-key=<your-secret-key> \
  rediss://clustercfg.myredis.hc79sw.memorydb.ap-east-1.amazonaws.com:6379/1
```

### Mounting at boot {#mounting-at-boot}

Please refer to the document [Mount JuiceFS at Boot](../administration/mount_at_boot.md) for details on how to automatically mount JuiceFS at boot.

## Using JuiceFS on Amazon EKS {#using-juicefs-on-amazon-eks}

Amazon EKS supports [three types of node](https://docs.aws.amazon.com/eks/latest/userguide/eks-compute.html):

- **EKS managed node groups**: Use Amazon EC2 as compute nodes
- **Self-managed nodes**: Use Amazon EC2 as compute nodes
- **Fargate**: A serverless compute engine

JuiceFS CSI Driver is not currently supported on Fargate. Please create a cluster using "EKS managed node groups" or "self-managed nodes" to use JuiceFS CSI Driver.

Amazon EKS is a standard Kubernetes cluster and can be managed using tools such as `eksctl`, `kubectl`, and `helm`. For installation and usage instructions, please refer to the [JuiceFS CSI Driver documentation](/docs/csi/introduction).

## Using JuiceFS on Amazon EMR {#using-juicefs-on-amazon-emr}

Please refer to the document ["Using JuiceFS in Hadoop Ecosystem"](../deployment/hadoop_java_sdk.md) for instructions.


================================================
FILE: docs/en/tutorials/digitalocean.md
================================================
---
title: Use JuiceFS on DigitalOcean
sidebar_position: 6
slug: /clouds/digitalocean
---

JuiceFS is designed for the cloud, using the cloud platform out-of-the-box storage and database services, and can be configured and put into use in as little as a few minutes. This article uses the DigitalOcean as an example to introduce how to quickly and easily install and use JuiceFS on the cloud computing platform.

## Preparation

JuiceFS is powered by a combination of storage and database, so the things you need to prepare should include.

### 1. Cloud Server

The cloud server on DigitalOcean is called Droplet. If you already have a Droplet, you do not need to purchase a new one separately in order to use JuiceFS. Whichever cloud server needs to use JuiceFS storage on it, install the JuiceFS client for it.

#### Hardware Specifications

JuiceFS has no special hardware requirements, and any size Droplet can be used stably.  However, it is recommended to choose a better performing SSD and reserve at least 1GB for JuiceFS to use as local cache.

#### Operating System

JuiceFS supports Linux, BSD, macOS and Windows. In this article, we will take Ubuntu Server 20.04 as an example.

### 2. Object Storage

JuiceFS uses object storage to store all your data, and using Spaces on DigitalOcean is the easiest solution. Spaces is an S3-compatible object storage service that works right out of the box. It is recommended to choose the same region as Droplet to get the best access speed and also to avoid additional traffic costs.

Of course, you can also use an object storage service from another platform or build it manually using Ceph or MinIO. In short, you are free to choose the object storage you want to use, just make sure that the JuiceFS client can access the object storage.

Here, we created a Spaces storage bucket named `juicefs` with the region `sgp1` in Singapore, and it is accessible at:

- `https://juicefs.sgp1.digitaloceanspaces.com`

In addition, you also need to create `Spaces access keys` in the API menu, which JuiceFS needs to access the Spaces API.

### 3. Database

Unlike normal file systems, JuiceFS stores all metadata corresponding to the data in a separate database, and the larger the size of the stored data, the better the performance. Currently, JuiceFS supports common databases such as Redis, TiKV, MySQL/MariaDB, PostgreSQL, SQLite, etc., while support for other databases is under continuous development. If the database you need is not supported at the moment, please submit [issue](https://github.com/juicedata/juicefs/issues) feedback.

Each database has its own advantages and disadvantages in terms of performance, size and reliability, and you should choose according to the actual needs of the scenario.

Don't worry about the choice of database, the JuiceFS client provides a metadata migration feature that allows you to easily export and migrate metadata from one database to another.

For this article, we use DigitalOcean's Redis 6 database hosting service, choose `Singapore`, and select the same VPC private network as the existing Droplet. It takes about 5 minutes to create the Redis, and we follow the setup wizard to initialize the database.

![DigitalOcean-Redis-guide](../images/digitalocean-redis-guide.png)

By default, the Redis allows all inbound connections. For security reasons, you should select the Droplet that have access to the Redis in the security setting section of the setup wizard in the `Add trusted sources`, that is, only allow the selected host to access the Redis.

In the setting of the eviction policy, it is recommended to select `noeviction`, that is, when the memory is exhausted, only errors are reported and no data is evictioned.

> **Note**: In order to ensure the safety and integrity of metadata, please do not select `allkeys-lru` and `allkey-random` for the eviction policy.

The access address of the Redis can be found in the `Connection Details` of the console. If all computing resources are in DigitalOcean, it is recommended to use the VPC private network for connection first, which can maximize security.

![DigitalOcean-Redis-url](../images/digitalocean-redis-url.png)

## Installation and Use

### 1. Install JuiceFS client

We currently using Ubuntu Server 20.04, execute the following command to install the latest version of the client.

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

Execute the command and see the command help information returned to `juicefs`, which means that the client is installed successfully.

```shell
$ juicefs

NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   0.16.2 (2021-08-25T04:01:15Z 29d6fee)

COMMANDS:
   format   format a volume
   mount    mount a volume
   umount   unmount a volume
   gateway  S3-compatible gateway
   sync     sync between two storage
   rmr      remove directories recursively
   info     show internal information for paths or inodes
   bench    run benchmark to read/write/stat big/small files
   gc       collect any leaked objects
   fsck     Check consistency of file system
   profile  analyze access log
   stats    show runtime stats
   status   show status of JuiceFS
   warmup   build cache for target directories/files
   dump     dump metadata into a JSON file
   load     load metadata from a previously dumped JSON file
   help, h  Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             only warning and errors (default: false)
   --trace                 enable trace log (default: false)
   --no-agent              disable pprof (:6060) agent (default: false)
   --help, -h              show help (default: false)
   --version, -V           print only the version (default: false)

COPYRIGHT:
   Apache License 2.0
```

In addition, you can also visit the [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) page to select other versions for manual installation.

### 2. Create a file system

To create a file system, use the `format` subcommand, the format is:

```shell
juicefs format [command options] META-URL NAME
```

The following command creates a file system named `mystor`:

```shell
$ juicefs format \
    --storage space \
    --bucket https://juicefs.sgp1.digitaloceanspaces.com \
    --access-key <your-access-key-id> \
    --secret-key <your-access-key-secret> \
    rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 \
    mystor
```

**Parameter Description:**

- `--storage`: Specify the data storage engine, here is `space`, click here to view all [supported storage](../reference/how_to_set_up_object_storage.md).
- `--bucket`: Specify the bucket access address.
- `--access-key` and `--secret-key`: Specify the secret key for accessing the object storage API.
- The Redis managed by DigitalOcean needs to be accessed with TLS/SSL encryption, so it needs to use the `rediss://` protocol header. The `/1` added at the end of the link represents the use of Redis's No. 1 database.

If you see output similar to the following, it means that the file system is created successfully.

```shell
2021/08/23 16:36:28.450686 juicefs[2869028] <INFO>: Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:36:28.481251 juicefs[2869028] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/08/23 16:36:28.481763 juicefs[2869028] <INFO>: Ping redis: 331.706µs
2021/08/23 16:36:28.482266 juicefs[2869028] <INFO>: Data uses space://juicefs/mystor/
2021/08/23 16:36:28.534677 juicefs[2869028] <INFO>: Volume is formatted as {Name:mystor UUID:6b0452fc-0502-404c-b163-c9ab577ec766 Storage:space Bucket:https://juicefs.sgp1.digitaloceanspaces.com AccessKey:7G7WQBY2QUCBQC5H2DGK SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

### 3. Mount a file system

To mount a file system, use the `mount` subcommand, and use the `-d` parameter to mount it as a daemon. The following command mounts the newly created file system to the `mnt` directory under the current directory:

```shell
sudo juicefs mount -d \
    rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt
```

The purpose of using `sudo` to perform the mount operation is to allow JuiceFS to have the authority to create a cache directory under `/var/`. Please note that when mounting the file system, you only need to specify the `database address` and the `mount point`, not the name of the file system.

If you see output similar to the following, it means that the file system is mounted successfully.

```shell
2021/08/23 16:39:14.202151 juicefs[2869081] <INFO>: Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:39:14.234925 juicefs[2869081] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/08/23 16:39:14.235536 juicefs[2869081] <INFO>: Ping redis: 446.247µs
2021/08/23 16:39:14.236231 juicefs[2869081] <INFO>: Data use space://juicefs/mystor/
2021/08/23 16:39:14.236540 juicefs[2869081] <INFO>: Disk cache (/var/jfsCache/6b0452fc-0502-404c-b163-c9ab577ec766/): capacity (1024 MB), free ratio (10%), max pending pages (15)
2021/08/23 16:39:14.738416 juicefs[2869081] <INFO>: OK, mystor is ready at mnt
```

Use the `df` command to see the mounting status of the file system:

```shell
$ df -Th
File system    type             capacity used usable used%  mount point
JuiceFS:mystor fuse.juicefs       1.0P   64K  1.0P   1%     /home/herald/mnt
```

As you can see from the output information of the mount command, JuiceFS defaults to sets 1024 MB as the local cache. Setting a larger cache can make JuiceFS have better performance. You can set the cache (in MiB) through the `--cache-size` option when mounting a file system. For example, set a 20GB local cache:

```shell
sudo juicefs mount -d --cache-size 20000 \
    rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt
```

After the file system is mounted, you can store data in the `~/mnt` directory just like using a local hard disk.

### 4. File system status

Use the `status` subcommand to view the basic information and connection status of a file system. You only need to specify the database URL.

```shell
$ juicefs status rediss://default:bn8l7ui2cun4iaji@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:48:48.567046 juicefs[2869156] <INFO>: Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:48:48.597513 juicefs[2869156] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/08/23 16:48:48.598193 juicefs[2869156] <INFO>: Ping redis: 491.003µs
{
  "Setting": {
    "Name": "mystor",
    "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766",
    "Storage": "space",
    "Bucket": "https://juicefs.sgp1.digitaloceanspaces.com",
    "AccessKey": "7G7WQBY2QUCBQC5H2DGK",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0
  },
  "Sessions": [
    {
      "Sid": 1,
      "Heartbeat": "2021-08-23T16:46:14+08:00",
      "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869091
    },
    {
      "Sid": 2,
      "Heartbeat": "2021-08-23T16:47:59+08:00",
      "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869146
    }
  ]
}
```

### 5. Unmount a file system

Use the `umount` subcommand to unmount a file system, for example:

```shell
sudo juicefs umount ~/mnt
```

> **Note**: Force unmount the file system in use may cause data damage or loss, please be careful to operate.

### 6. Auto-mount on boot

Please refer to ["Mount JuiceFS at Boot Time"](../administration/mount_at_boot.md) for more details.

### 7. Multi-host shared

The JuiceFS file system supports being mounted by multiple cloud servers at the same time, and there is no requirement for the geographic location of the cloud server. It can easily realize the real-time data sharing of servers between the same platform, between cross-cloud platforms, and between public and private clouds.

Not only that, the shared mount of JuiceFS can also provide strong data consistency guarantee. When multiple servers mount the same file system, the writes confirmed on the file system will be visible in real time on all hosts.

To use the shared mount, it is important to ensure that the database and object storage service that make up the file system can be accessed by each host to mount it. In the demonstration environment of this article, the Spaces object storage is open to the entire Internet, and it can be read and written through the API as long as the correct access key is used. But for the Redis database managed by DigitalOcean, you need to configure the access strategy reasonably to ensure that the hosts outside the platform have access permissions.

When you mount the same file system on multiple hosts, first create a file system on any host, then install the JuiceFS client on every hosts, and use the same database address to mount it with the `mount` command. Pay special attention to the fact that the file system only needs to be created once, and there should be no need to repeat file system creation operations on other hosts.


================================================
FILE: docs/en/tutorials/juicefs_on_colab.md
================================================
---
title: Use JuiceFS on Colab with Google Cloud SQL and GCS
sidebar_position: 5
slug: /juicefs_on_colab
description: Learn how to use JuiceFS on Google Colab with Google Cloud SQL and GCS, facilitating convenient file storage and sharing in a distributed manner.
---

[Colaboratory](https://colab.research.google.com), or "Colab" for short, is a product by Google Research. Colab enables users to write and execute arbitrary Python code through the browser. It is particularly well suited for machine learning, data analysis, and educational purposes.

Colab supports Google Drive for uploading files to or downloading files from Colab instances. However, in some cases, Google Drive might not be that convenient to use with Colab. This is where JuiceFS can a valuable tool, enabling easy file synchronization between Colab instances or between a Colab instance and a local or on-premises machine.

A demo Colab notebook using JuiceFS is available [here](https://colab.research.google.com/drive/1wA8vRwqiihXkI6ViDU8Ud868UeYtmCo5).

This document outlines the necessary steps for using JuiceFS in the Colab environment. We use Google Cloud SQL as the JuiceFS metadata engine and Google Cloud Storage (GCS) as the JuiceFS object storage.

For other types of metadata engines or object storages, see [How to Set Up a Metadata Engine](../reference/how_to_set_up_metadata_engine.md)
and [How to Set Up Object Storage](../reference/how_to_set_up_object_storage.md).

Many of the steps mentioned here will be quite similar to
the [Getting Started document](../getting-started/for_distributed.md), which you can also use for reference.

## Summary of steps

1. Format a `juicefs` file system from any machine or instance with access to Google Cloud resources.
2. Mount the `juicefs` file system in a Colab Notebook
3. Store sharing files across machines and platforms.

## Prerequisites

This demo uses Google Cloud Platform's Cloud SQL and Google Cloud Storage (GCS) to create a high-performance file storage system of JuiceFS. You need a Google Cloud Platform account to follow this demo document.

If you have another cloud vendor's resources (such as AWS RDBS and S3), you can still use this guide as a reference and with other reference documents provided by JuiceFS to achieve a similar solution.

To make JuiceFS reach the best performance, you might also want the Colab instance is in the same zone or close to the region where Cloud SQL and GCS are deployed. The tutorial works for a randomly hosted Colab instance, but you might notice slower performance due to the latency between the Colab instance and the Cloud SQL/GCS regions. To start Colab instances in a specific region, see [instructions for starting a GCE VM on Colab via GCP Marketplace](https://research.google.com/colaboratory/marketplace.html).

Before diving into the detailed steps, ensure you have the following resources ready:

* A Google Cloud Platform account ready and a *project* created. This demo uses a GCP project
named `juicefs-learning`.
* A Cloud SQL (Postgres) ready for use. This demo uses the `juicefs-learning:europe-west1:juicefs-sql-example-1` instance as the metadata service.
* A GCS bucket created as the object storage service. This demo uses `gs://juicefs-bucket-example-1` as the bucket to store file chunks.
* An IAM service account or an authorized user account that has write access to the Postgres server and GCS buckets.

## Detailed steps

### Step 1: Format and mount a JuiceFS file system folder

This step needs to be done only once, and you can choose to execute it on any machine or instance where you have good connectivity and access to your Google Cloud resources.

1. Use `gcloud auth application-default login` to prepare a local credential, or use `GOOGLE_APPLICATION_CREDENTIALS` to set up a JSON key file.

2. Use [`cloud_sql_proxy`](https://cloud.google.com/sql/docs/mysql/connect-admin-proxy) to open a port (in
this case, 5432) locally to expose your cloud Postgres service to your local machine:

    ```shell
    gcloud auth application-default login

    # Or set up the json key file via GOOGLE_APPLICATION_CREDENTIALS=/path/to/key

    cloud_sql_proxy -instances=juicefs-learning:europe-west1:juicefs-sql-example-1=tcp:0.0.0.0:5432
    ```

3. Use the following command to create a new file system named `myvolume` using the `juicefs format` command. Later, you can mount this file system on any other machines or instances where you have access to your cloud resources.

    You can download `juicefs` [here](https://github.com/juicedata/juicefs/releases).

    ```shell
    juicefs format \
        --storage gs \
        --bucket gs://juicefs-bucket-example-1 \
        "postgres://postgres:mushroom1@localhost:5432/juicefs?sslmode=disable" \
        myvolume
    ```

Note that this step is only required once on any machine you prefer to work on.

### Step 2: Mount the JuiceFS file system on Colab

Once you have completed Step 1, it means you already have a JuiceFS file system (named `myvolume` in this case) defined and ready to be used.

Now, let's open a Colab page and execute the following commands to mount our file system into a folder named `mnt`.

Firstly, download the `juicefs` binary and do the same as Step 1 to get GCP credentials and open the Cloud SQL proxy.

Note that the following commands are run in the Colab environment, so there is a `!` mark at the beginning for running shell commands.

1. Download `juicefs` to the Colab runtime instance:

    ```shell
    ! curl -sSL https://d.juicefs.com/install | sh -
    ```

2. Set up Google Cloud credentials:

    ```shell
    ! gcloud auth application-default login
    ```

3. Open `cloud_sql_proxy`:

    ```shell
    ! wget https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 -O cloud_sql_proxy
    ! chmod +x cloud_sql_proxy
    ! GOOGLE_APPLICATION_CREDENTIALS=/content/.config/application_default_credentials.json nohup ./cloud_sql_proxy -instances=juicefs-learning:europe-west1:juicefs-sql-example-1=tcp:0.0.0.0:5432 >> cloud_sql_proxy.log &
    ```

4. Mount the `myvolumn` JuiceFS file system onto the `mnt` folder:

    ```shell
    ! GOOGLE_APPLICATION_CREDENTIALS=/content/.config/application_default_credentials.json nohup juicefs mount  "postgres://postgres:mushroom1@localhost:5432/juicefs?sslmode=disable" mnt > juicefs.log &
    ```

Now you should be able to use the `mnt` folder as if it were a local file system folder to write and read folders and files in it.

### Step 3: Load data at another time or on another instance

With data stored in the JuiceFS file system in Step 2, you can repeat all the operations mentioned in Step 2 at any time on any other machines to access the previously stored data or to store more data into it.

Congratulations! Now you have learned how to use JuiceFS, specifically with Google Colab to
conveniently share and store data files in a distributed fashion.

Feel free to explore a demo Colab notebook using JuiceFS [here](https://colab.research.google.com/drive/1wA8vRwqiihXkI6ViDU8Ud868UeYtmCo5).

Happy coding :)


================================================
FILE: docs/en/tutorials/juicefs_on_k3s.md
================================================
---
title: Use JuiceFS on K3s
sidebar_position: 2
slug: /juicefs_on_k3s
---

[K3s](https://k3s.io) is a functionally optimized lightweight Kubernetes distribution that is fully compatible with Kubernetes.In other words, almost all operations performed on Kubernetes can also be executed on K3s. K3s packages the entire container orchestration system into a binary program with a size of less than 100MB, significantly reducing the environment dependencies and installation steps required to deploy Kubernetes production clusters. Compared to Kubernetes, K3s has lower performance requirements for the operating system.

In this article, we will build a K3s cluster with two nodes, install and configure [JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver) for the cluster, and lastly create an NGINX Pod for verification.

## Deploy a K3s cluster

K3s has very low **minimum requirements** for hardware:

- **Memory**: 512MB+ (recommend 1GB+)
- **CPU**: 1 core

When deploying a production cluster, it is recommended to start with a minimum hardware configuration of 4 cores and 8GB of memory per node. For more detailed information, please refer to the [Hardware Requirements](https://rancher.com/docs/k3s/latest/en/installation/installation-requirements/#hardware) documentation.

### K3s server node

The IP address of the server node is: `192.168.1.35`

You can use the official script provided by K3s to deploy the server node on a regular Linux distribution.

```shell
curl -sfL https://get.k3s.io | sh -
```

After the deployment is successful, the K3s service will automatically start, and kubectl and other tools will also be installed at the same time.

You can execute the following command to view the status of the node:

```shell
$ sudo kubectl get nodes
NAME     STATUS   ROLES                  AGE   VERSION
k3s-s1   Ready    control-plane,master   28h   v1.21.4+k3s1
```

Get the `node-token`:

```shell
sudo -u root cat /var/lib/rancher/k3s/server/node-token
```

### K3s worker node

The IP address of the worker node is: `192.168.1.36`

Execute the following command and change the value of `K3S_URL` to the IP or domain name of the server node (the default port is `6443`). Replace the value of `K3S_TOKEN` with the `node-token` obtained from the server node.

```shell
curl -sfL https://get.k3s.io | K3S_URL=http://192.168.1.35:6443 K3S_TOKEN=K1041f7c4fabcdefghijklmnopqrste2ec338b7300674f::server:3d0ab12800000000000000006328bbd80 sh -
```

After the deployment is successful, go back to the server node to check the node status:

```shell
$ sudo kubectl get nodes
NAME     STATUS   ROLES                  AGE   VERSION
k3s-s1   Ready    control-plane,master   28h   v1.21.4+k3s1
k3s-n1   Ready    <none>                 28h   v1.21.4+k3s1
```

## Install CSI Driver

It is consistent with the method of [Use JuiceFS on Kubernetes](../deployment/how_to_use_on_kubernetes.md). Therefore, you can install CSI Driver through Helm or kubectl.

Here we use kubectl as an example. Execute the following command to install the CSI Driver:

```shell
kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s.yaml
```

### Create Storage Class

Copy and modify the following code to create a configuration file, for example: `juicefs-sc.yaml`

```yaml
apiVersion: v1
kind: Secret
metadata:
  name: juicefs-sc-secret
  namespace: kube-system
type: Opaque
stringData:
  name: "test"
  metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3"
  storage: "s3"
  bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com"
  access-key: "<your-access-key-id>"
  secret-key: "<your-access-key-secret>"
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: juicefs-sc
provisioner: csi.juicefs.com
reclaimPolicy: Retain
volumeBindingMode: Immediate
parameters:
  csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret
  csi.storage.k8s.io/node-publish-secret-namespace: kube-system
  csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret
  csi.storage.k8s.io/provisioner-secret-namespace: kube-system
```

The `stringData` part of the configuration file is used to set the information related to the JuiceFS file system. It will create the file system based on the information you specify. When you need to use the pre-created file system in the storage class, you only need to fill in the `name` and `metaurl`, and the other items can be deleted or the value can be left blank.

Execute the command to deploy the storage class:

```shell
kubectl apply -f juicefs-sc.yaml
```

View storage class status:

```shell
$ sudo kubectl get sc
NAME                   PROVISIONER             RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE
local-path (default)   rancher.io/local-path   Delete          WaitForFirstConsumer   false                  28h
juicefs-sc             csi.juicefs.com         Retain          Immediate              false                  28h
```

> **Note**: A storage class is associated with a JuiceFS file system. You can create as many storage classes as you need, but be aware of the storage class name in the configuration file as the same name can cause conflicts.

## Use JuiceFS to persist NGINX data

Next, deploy an NGINX Pod using a persistent storage declared by the JuiceFS storage class.

### Deployment

Create a configuration file, for example: `deployment.yaml`

```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: web-pvc
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 10Pi
  storageClassName: juicefs-sc
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-run
  labels:
    app: nginx
spec:
  replicas: 2
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
        - name: nginx
          image: linuxserver/nginx
          ports:
            - containerPort: 80
          volumeMounts:
            - mountPath: /config
              name: web-data
      volumes:
        - name: web-data
          persistentVolumeClaim:
            claimName: web-pvc
```

Deploy it:

```
sudo kubectl apply -f deployment.yaml
```

### Service

Create a configuration file, for example: `service.yaml`

```yaml
apiVersion: v1
kind: Service
metadata:
  name: nginx-run-service
spec:
  selector:
    app: nginx
  ports:
    - name: http
      port: 80
```

Deploy it:

```shell
sudo kubectl apply -f service.yaml
```

### Ingress

K3s is pre-installed with traefik-ingress by default. Create an ingress for NGINX through the following configuration. For example: `ingress.yaml`

```yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: nginx-run-ingress
  annotations:
    traefik.ingress.kubernetes.io/router.entrypoints: web
spec:
  rules:
    - http:
        paths:
          - pathType: Prefix
            path: "/web"
            backend:
              service:
                name: nginx-run-service
                port:
                  number: 80
```

Deploy it:

```shell
sudo kubectl apply -f ingress.yaml
```

### Visit

After the deployment is completed, use the host on the same LAN to access any cluster node, and then you will see the NGINX welcome page.

![K3s-NGINX-welcome](../images/k3s-nginx-welcome.png)

Next, check whether the container has successfully mounted JuiceFS, and execute the following command to check the Pod status:

```shell
$ sudo kubectl get pods
NAME                         READY   STATUS    RESTARTS   AGE
nginx-run-7d6fb7d6df-qhr2m   1/1     Running   0          28h
nginx-run-7d6fb7d6df-5hpv7   1/1     Running   0          24h
```

Executing the following command will show the file system mount status of any Pod:

```shell
$ sudo kubectl exec nginx-run-7d6fb7d6df-qhr2m -- df -Th
Filesystem     Type          Size  Used Avail Use% Mounted on
overlay        overlay        20G  3.2G   17G  17% /
tmpfs          tmpfs          64M     0   64M   0% /dev
tmpfs          tmpfs         2.0G     0  2.0G   0% /sys/fs/cgroup
JuiceFS:jfs    fuse.juicefs  1.0P  174M  1.0P   1% /config
/dev/sda1      ext4           20G  3.2G   17G  17% /etc/hosts
shm            tmpfs          64M     0   64M   0% /dev/shm
tmpfs          tmpfs         2.0G   12K  2.0G   1% /run/secrets/kubernetes.io/serviceaccount
tmpfs          tmpfs         2.0G     0  2.0G   0% /proc/acpi
tmpfs          tmpfs         2.0G     0  2.0G   0% /proc/scsi
tmpfs          tmpfs         2.0G     0  2.0G   0% /sys/firmware
```

As you can see, the file system named `jfs` has been mounted to the `/config` directory of the container, and the used space is 174M.

This indicates that the Pods in the cluster have been successfully configured and used JuiceFS to persist data.


================================================
FILE: docs/en/tutorials/juicefs_on_kubesphere.md
================================================
---
title: Use JuiceFS on KubeSphere
sidebar_position: 3
slug: /juicefs_on_kubesphere
---

[KubeSphere](https://kubesphere.com.cn) is an application-centric multi-tenant container platform built on Kubernetes. It provides full-stack IT automated operation and maintenance capabilities and simplifies the DevOps workflow of the enterprise.

KubeSphere provides a friendly wizard-style operation interface for operation and maintenance, even users who are not experienced in Kubernetes can start management and use relatively easily. It provides a Helm-based application market that can easily install various Kubernetes applications under a graphical interface.

This article will introduce how to deploy JuiceFS CSI Driver in KubeSphere with one click to provide data persistence for various applications on the cluster.

## Prerequisites

1. **Install KubeSphere**

   There are two ways to install KubeSphere. One is installing in Linux, you can refer to the document: [All-in-One Installation of Kubernetes and KubeSphere on Linux](https://kubesphere.com.cn/en/docs/quick-start/all-in-one-on-linux) , One is installing in Kubernetes, you can refer to the document: [Minimal KubeSphere on Kubernetes](https://kubesphere.com.cn/en/docs/quick-start/minimal-kubesphere-on-k8s)

2. **Enable app store in KubeSphere**

   You can refer to the documentation for enabling the app store in KubeSphere: [KubeSphere App Store](https://kubesphere.com.cn/en/docs/pluggable-components/app-store)

## Install JuiceFS CSI Driver

If the version of KubeSphere is v3.2.0 and above, you can install CSI Driver directly in the app store, skip the "Configure Application Template/Application Repository" step, and go directly to the "Install" step; if the KubeSphere version is lower than v3.2.0, follow the steps below to configure application templates/application repository.

### Configure of Application Template/Application Repository

To install JuiceFS CSI Driver, you first need to create an application template. There are two methods.

#### Method one: Application Repository

Click in the workspace to enter the application management, select "App Repositories", click the create button to add JuiceFS CSI Repository, fill in:

- Repository name: `juicefs-csi-driver`
- Index URL: `https://juicedata.github.io/charts/`

![kubesphere_app_shop_en](../images/kubesphere_app_shop_en.png)

#### Method two: Application Template

Download the chart compressed package from the JuiceFS CSI Driver warehouse: [https://github.com/juicedata/juicefs-csi-driver/releases](https://github.com/juicedata/juicefs-csi-driver/releases).

In the "Workspace", click to enter the "App Management", select "App Templates", click "create", upload the chart compression package:

![kubesphere_app_template_en](../images/kubesphere_app_template_en.png)

### Install

Select "Project" where you want to deploy in the "Workspace" (the project in KubeSphere is the namespace in K8s), select "Apps", click the "create" button, select "From App Store", and then Select `juicefs`:

![kubesphere_shop_juicefs_en](../images/kubesphere_shop_juicefs_en.png)

If KubeSphere version is lower than v3.2.0, select button "From App Template" according to the application template configured in the previous step:

![kubesphere_install_csi_en](../images/kubesphere_install_csi_en.png)

It's the same after entering the configuration modification page, modify the following two places:

- namespace: Change to the corresponding project name
- storageClass.backend:
  The `backend` part is used to define the backend database and object storage of the file system. Refer to ["Create a File System"](../getting-started/standalone.md#juicefs-format) for related content.

You can also quickly create databases (such as Redis) and object storage (such as MinIO) by KubeSphere's app store. For example, build on the KubeSphere platform Redis: Select "Apps" in the current project, click the "create" button, select "From App Store", select "Redis", and then quickly deploy. The access URL of Redis can be the service name of the deployed application, as follows:

![kubesphere_redis_en](../images/kubesphere_redis_en.png)

Deploying MinIO on the KubeSphere platform is a similar process, but you can modify the accessKey and secretKey of MinIO before deploying MinIO, and you need to remember the configured values. As shown below:

![kubesphere_create_minio_en](../images/kubesphere_create_minio_en.png)

> Attention: If there are permissions error when deploying MinIO, you can set the `securityContext.enables` in the configuration to false.

MinIO's access URL can be the service name of the deployed application, as follows:

![kubesphere_minio_en](../images/kubesphere_minio_en.png)

After both Redis and MinIO are set up, you can fill in the `backend` value of JuiceFS CSI Driver.

1. `metaurl` is the database address of Redis just created, the access address of Redis can be the service name corresponding to the Redis application, such as `redis://redis-rzxoz6:6379/1`
2. `storage` is type of storage for the object, such as `minio`
3. `bucket` is the available bucket of MinIO just created (JuiceFS will automatically create it, no need to create it manually), the access address of MinIO can be the service name corresponding to the MinIO application, such as `http://minio-qkp9my:9000/minio/test`
4. `accessKey` and `secretKey` are the accessKey and secretKey of MinIO just created

![kubesphere_update_csi_en](../images/kubesphere_update_csi_en.png)

After the configuration is modified, click "Install".

## Usage

### Deploy application

The JuiceFS CSI Driver installed above has created a `StorageClass`, for example, the `StorageClass` created above is `juicefs-sc` , Can be used directly.

Then you need to create a PVC. In "Project", select "Storage Management", then select "Storage Volume", click the " Create" button to create a PVC, and select `juicefs-sc` for the "StorageClass", as follows:

![kubesphere_pvc_en](../images/kubesphere_pvc_en.png)

After the PVC is created, in the "Apps" of "Project", select "Workloads", click "Create" button to deploy the workload, and fill in your favorite name on the "Basic Information" page; the "Container Image" page can fill in the mirror image `centos`; Start command `sh,-c,while true; do echo $(date -u) >> /data/out.txt; sleep 5; done`; "Mount Volume" select "Existing Volume", and then select PVC created in one step, fill in the path in the container with `/data` as follows:

![kubesphere_deployment_en](../images/kubesphere_deployment_en.png)

![kubesphere_workload_en](../images/kubesphere_workload_en.png)

After the deployment completed, you can see the running pod:

![kubesphere_pod_en](../images/kubesphere_pod_en.png)

### Create StorageClass

If you did not create a `StorageClass` when installing JuiceFS CSI Driver, or you need to create a new one, you can follow the steps below:

After preparing the metadata service and object storage service, create a new `Secret`. On the "Platform Management" page, select "Configuration", select "Secret", and click the "Create" button to create a new one:

![kubesphere_secret_en](../images/kubesphere_secret_en.png)

Fill in the metadata service and object storage information in "Data Settings", as follows:

![kubesphere_update_secret_en](../images/kubesphere_update_secret_en.png)

After creating `Secret`, create `StorageClass`, select "Storage" on the "Platform Management" page, select "Storage Classes", click the "Create" button to create a new one, and select "Custom" for "Storage Class":

![kubesphere_sc_create_en](../images/kubesphere_sc_create_en.png)

The setting page information is as follows, where "Storage System" fills in `csi.juicefs.com`, and 4 more parameters are set:

- `csi.storage.k8s.io/provisioner-secret-name`: secret name
- `csi.storage.k8s.io/provisioner-secret-namespace`: project of secret
- `csi.storage.k8s.io/node-publish-secret-name`: secret name
- `csi.storage.k8s.io/node-publish-secret-namespace`: project of secret

![kubesphere_sc_update_en](../images/kubesphere_sc_update_en.png)

After clicking the "Create" button, the `StorageClass` is created.


================================================
FILE: docs/en/tutorials/juicefs_on_rancher.md
================================================
---
title: Use JuiceFS on Rancher
sidebar_position: 2
slug: /juicefs_on_rancher
---

[Rancher](https://rancher.com) is an enterprise-level Kubernetes cluster management system, which can be used to quickly complete the deployment of Kubernetes clusters on various cloud computing platforms.

Rancher provides a browser-based management interface, even users who are not experienced in Kubernetes can start to manage and use easily. It is preset with Helm-based application market by default, and various Kubernetes applications can be installed very easy under the graphical interface.

This article will introduce how to deploy Rancher on a Linux system and create a Kubernetes cluster with it, and then deploy JuiceFS CSI Driver with one click through the application market, thereby providing data persistence for various applications on the cluster.

## Install Rancher

Rancher can be installed on almost all modern Linux distributions. It can be installed directly on the operating system, or on Docker, Kubernetes, K3s or RKE. The installation is "Product-Ready" no matter which environment it is installed in.

Here we choose to install Rancher on Docker, with the following requirements:

- **Operating System**: Linux system with x86-64 architecture
- **Memory**: 4GB or more
- **Docker**: 19.03+

Run the following command to install Rancher:

```shell
sudo docker run --privileged -d --restart=unless-stopped -p 80:80 -p 443:443 rancher/rancher
```

After the container is created, Rancher's management interface can be opened by accessing the IP address of the host.

![Rancher-welcome](../images/rancher-welcome.jpeg)

## Create a Kubernetes cluster

After Rancher is installed, you can see that it has deployed a K3s cluster in the current container, and Rancher related resources are running in this internal K3s cluster, but we don't need to pay attention to this cluster now.

Next, start to create a Kubernetes cluster. In the Cluster section of the welcome page, click `Create` to create a cluster. Rancher supports the creation of Kubernetes clusters on major cloud computing platforms. Here we need to create a cluster directly on Rancher's host, so choose `Custom`. Then fill in the cluster name according to the wizard and select the Kubernetes version.

![Rancher-cluster-create](../images/rancher-cluster-create.jpg)

In the `Cluster Options` page, select the node role to be created, then copy the generated command and execute it on the target host.

![Rancher-cluster-options](../images/rancher-cluster-options.jpg)

After the cluster is created, it will be displayed in Rancher's cluster list.

![Rancher-clusters](../images/rancher-clusters.jpg)

## One-click installation of JuiceFS CSI Driver

In the cluster list, click to enter the Kubernetes cluster, click on the left navigation menu to expand `Apps & Marketplace` -> `Chart Repositories`, click the `Create` button to add JuiceFS CSI repository, fill in:

- **Name**: `juicefs`
- **Index URL**: `https://juicedata.github.io/charts/`

![Rancher-new-repo](../images/rancher-new-repo.jpg)

And then, you can see the new repository in the list.

![Rancher-repos](../images/rancher-repos.jpg)

Then click to open the `Apps & Marketplace` → `Charts` from the left menu, type `juicefs` in the search bar, and then click to open `juicefs-csi-driver`.

![Rancher-chart-search](../images/rancher-chart-search.jpg)

Click the "Install" button on the application details page, the latest version will be installed by default, or you can click to switch to the historical version to install.

![Rancher-chart-info](../images/rancher-chart-info.jpg)

The installation wizard has two steps:

### Step 1: Set up the `Namespace`

The JuiceFS CSI Driver defaults to `kube-system`, and there is no need to set this step.

### Step 2: Adjust configuration parameters

This page provides a YAML editor, you can adjust JuiceFS-related information according to your needs. Usually you only need to modify the `storageClasses` part, where the `backend` part is used to define the backend database and object storage of the file system. If you are using an existing file system, you only need to fill in the two items `metaurl` and `name`, for example:

```yaml
...
storageClasses:
  - backend:
      accessKey: ''
      bucket: ''
      metaurl: 'redis://:mypasswd@efgh123.redis.rds.aliyuncs.com/1'
      name: myjfs
      secretKey: ''
      storage: ''
    enabled: true
    name: juicefs-sc
    reclaimPolicy: Retain
...
```

> **Tip**: If you have multiple JuiceFS file systems that need to be associated with different storageClasses in the Kubernetes cluster, you can add storageClass configuration items after the `storageClasses` array. Pay attention to modify the name of the storage class to avoid conflicts.

Click "Install" and wait for the application installation to complete.

![Rancher-chart-installed](../images/rancher-chart-installed.jpg)

## Use JuiceFS to persist data

When deploying an application, specify `juicefs-sc` in the storage configuration.

![Rancher-PVC](../images/rancher-pvc.jpg)


================================================
FILE: docs/en/tutorials/juicefs_on_wsl.md
================================================
---
title: Use JuiceFS on WSL
sidebar_position: 9
---

WSL is called Windows Subsystem for Linux, which means Windows subsystem for Linux. It allows you to run most GNU/Linux native commands, tools, and programs in a Windows environment without the additional hardware overhead of using a virtual machine or dual system.

## Installing WSL

Using WSL requires Windows 10 2004 or higher or Windows 11.

To check the current system version, you can call up the Run program by pressing <kbd>Win</kbd> + <kbd>R</kbd>. Type and run `winver`.

![WSL/winver-en](../images/wsl/winver-en.png)

After confirming the Windows version, open PowerShell or Windows Command Prompt as an administrator and run the installation command.

```powershell
wsl --install
```

This command will download the latest Linux kernel, install and set WSL 2 as the default version, and install the Linux distribution (Ubuntu by default).

You can also specify the distribution to be installed directly at:

```powershell
wsl --install -d ubuntu
```

:::tip
`wsl --list --online` to view all available distributions.
:::

## Setting up Linux users and passwords

Once the WSL installation is complete, you can find the newly installed Linux distribution in the Start menu.

![WSL/startmenu-en](../images/wsl/startmenu-en.png)

By clicking on the Ubuntu subsystem shortcut, WSL will open the terminal of the Linux subsystem. The first time you run it, you will be asked to set the user and password for managing the Linux subsystem, just follow the prompts.

![WSL/init](../images/wsl/init.png)

There are several points to note about the username and password set here:

- This user is dedicated to the administration of this Linux subsystem and is not related to the users on the Windows system.
- This user will be the default user of the Linux subsystem and will be automatically logged in at boot time.
- this user will be considered as the administrator of the Linux subsystem and will be allowed to execute `sudo` commands.
- Multiple Linux subsystems are allowed to run at the same time in WSL, and each subsystem needs to have an administrative user.

## Using JuiceFS in WSL

Using JuiceFS in WSL means using JuiceFS on a Linux system, and here is an example of the Community Edition.

### Install the client

Install the JuiceFS client on the Linux subsystem by executing the following command.

   ```shell
   curl -sSL https://d.juicefs.com/install | sh -
   ```

### Creating a file system

JuiceFS is a distributed file system with data and metadata separated, usually using object storage as data storage and Redis, PostgreSQL or MySQL as metadata storage. It is assumed here that the following materials have been prepared.

#### Object Storage

View "[JuiceFS Supported Object Storage](../reference/how_to_set_up_object_storage.md)"

- **Bucket Endpoint**: `https://myjfs.oss-cn-shanghai.aliyuncs.com`
- **Access Key ID**: `ABCDEFGHIJKLMNopqXYZ`
- **Access Key Secret**: `ZYXwvutsrqpoNMLkJiHgfeDCBA`

#### Database

View "[JuiceFS Supported Metadata Engines](../reference/how_to_set_up_metadata_engine.md)"

- **Database URL**: `myjfs-sh-abc.redis.rds.aliyuncs.com:6379`
- **Database Password**: `mypassword`

Write private information to environment variables:

```shell
export ACCESS_KEY=ABCDEFGHIJKLMNopqXYZ
export SECRET_KEY=ZYXwvutsrqpoNMLkJiHgfeDCBA
export REDIS_PASSWORD=mypassword
```

Create a file system named `myjfs`:

```shell
juicefs format \
    --storage oss \
    --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \
    redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    myjfs
```

### Mount and use

Write the database password to the environment variable:

```shell
export REDIS_PASSWORD=mypassword
```

:::note
Once the file system is created successfully, the corresponding key information will be written to the database and the JuiceFS client will automatically read it from the database when the file system is mounted, so there is no need to set it again.
:::

Mount the file system to `mnt` in the user's home directory:

```shell
sudo juicefs mount -d redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 $HOME/mnt
```

If you need to access the JuiceFS file system mounted on a Linux subsystem from a Windows system, find the Linux subsystem in the list on the left side of Explorer, then find and open the mount point path.

![WSL/access-jfs-from-win-en](../images/wsl/access-jfs-from-win-en.png)

For more information on the use of JuiceFS, please refer to the official documentation.

## WSL Storage Performance

WSL bridges the Windows and Linux subsystems, allowing them to access each other's files stored on each other's systems.

![WSL/Windows-to-Linux-en](../images/wsl/windows-to-linux-en.png)

Note, however, that accessing the Linux subsystem from Windows or accessing Windows from the Linux subsystem is bound to incur some performance overhead due to switching between systems. Therefore, the recommended practice is to decide where to store the files depending on the system where the program is located, and for programs in the Linux subsystem, the files it will be processing should also be stored in the Linux subsystem for better performance.

In the Linux subsystem, WSL mounts each Windows drive to `/mnt`, for example, the mount point for the C: drive in the Linux subsystem is `/mnt/c`.

![WSL/mount-point](../images/wsl/mount-point.png)

To ensure optimal performance, when using JuiceFS in WSL, both the storage and cache paths should be set in the Linux subsystem. In other words, you should avoid setting the storage or cache on a Windows partition mount point like `/mnt/c`.

Using the `bench` benchmarking tool that comes with JuiceFS, the results show that mounting a file system to Windows (e.g. `/mnt/c`) has about 30% lower performance than mounting it inside a Linux subsystem (e.g. `$HOME/mnt`).

## Known Issues

When copying files to a Linux subsystem via Windows Explorer, WSL automatically appends a file of the same name with the `Zone.Identifier` identifier to each file. This is an NTFS file system security mechanism intended to track the origin of external files, but it is a bug for WSL and has been reported to the Microsoft development team on GitHub [#7456](https://github.com/microsoft/WSL/issues/7456).

This issue also affects the same problem when saving files to a mounted JuiceFS file system in the Linux subsystem via Windows Explorer. However, reading and writing JuiceFS file systems inside the Linux subsystem is not affected by this bug.

![WSL/zone-identifier-en](../images/wsl/zone-identifier-en.png)


================================================
FILE: docs/en/tutorials/qcloud.md
================================================
---
title: Use JuiceFS on Tencent Cloud
sidebar_position: 8
slug: /clouds/qcloud
---

JuiceFS needs to be used with database and object storage together. Here we directly use Tencent Cloud's CVM cloud server, combined with cloud database and COS object storage.

## Preparation

When creating cloud computing resources, try to choose the same region, so that resources can access each other through intranet and avoid extra traffic costs by using public network.

### 1. CVM

JuiceFS has no special requirements for server hardware, and the minimum specification of CVM can use JuiceFS stably, usually you just need to choose the configuration that can meet your business.

In particular, you do not need to buy a new server or reinstall the system to use JuiceFS, JuiceFS is not business invasive and will not cause any interference with your existing systems and programs, you can install and use JuiceFS on your running server.

By default, JuiceFS takes up 1GB of hard disk space for caching, and you can adjust the size of the cache space as needed. This cache is a data buffer layer between the client and the object storage, and you can get better performance by choosing a cloud drive with better performance.

JuiceFS can be installed on all operating systems provided by Tencent Cloud CVM.

**The specifications of CVM used in this article are as follows:**

| Server Specifications |                          |
| --------------------- | ------------------------ |
| **CPU**               | 1 Core                   |
| **RAM**               | 2 GB                     |
| **Storage**           | 50 GB                    |
| **OS**                | Ubuntu Server 20.04 64-bit |
| **Location**          | Shanghai 5               |

### 2. Database

JuiceFS will store all the metadata corresponding to the data in a separate database, and the supported databases are Redis, MySQL, PostgreSQL, TiKV and SQLite.

Depending on the database type, the performance and reliability of metadata varies. For example, Redis runs entirely on memory, which provides the ultimate performance, but is difficult to operate and maintain, and has relatively low reliability. SQLite is a single-file relational database with low performance and is not suitable for large-scale data storage, but it is configuration-free and suitable for scenarios with small amounts of data storage.

If you are just evaluating the capabilities of JuiceFS, you can manually build the database for use in the CVM. When you want to use JuiceFS in a production environment, the cloud database service of Tencent Cloud is usually a better choice if you don't have a professional database operation and maintenance team.

Of course, you can also use cloud database services provided on other cloud platforms if you wish.However, in this case, you can only access the cloud database through the public network, which means that you must expose the database port to the public network, which has some security risks and requires special attention.

If you must access the database through the public network, you can enhance the security of your data by strictly limiting the IP addresses that are allowed to access the database through the whitelist feature provided by the cloud database console. On the other hand, if you cannot connect to the cloud database through the public network, then you can check the whitelist of the database.

|    Database     |                          Redis                          |                      MySQL/PostgreSQL                       |                            SQLite                            |
| :-------------: | :-----------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
| **Performance** |                          High                           |                            Medium                            |                             Low                              |
| **Management**  |                          High                           |                            Medium                            |                             Low                              |
| **Reliability** |                           Low                           |                            Medium                            |                             Low                              |
|  **Scenario**   | Massive data, distributed high-frequency read and write | Massive data, distributed low and medium frequency read and write | Low frequency read and write in single machine for small amount of data |

**This article uses the TencentDB for Redis, which is accessed through a VPC private network interacting with the CVM:**

| Redis version               | 5.0 community edition                      |
| --------------------------- | ------------------------------------------ |
| **Instance Specification**  | 1GB Memory Edition (standard architecture) |
| **Connection Address**      | 192.168.5.5:6379                           |
| **Available Zone**          | Shanghai 5                                 |

Note that the database connection address depends on the VPC network settings you create, and that creating a Redis instance automatically gets the address in the network segment you define.

### 3. Object Storage COS

JuiceFS stores all data in object storage, and it supports almost all object storage services. However, for the best performance, when using Tencent Cloud CVM, pairing it with Tencent Cloud COS Object Storage is usually the optimal choice. However, please note that selecting CVM and COS Bucket in the same region so that they can be accessed through Tencent Cloud's intranet not only has low latency, but also does not require additional traffic costs.

> **Hint**: The unique access address provided by Tencent Cloud COS supports both intranet and extranet access. When accessing through the intranet, COS will automatically resolve to the intranet IP, and the traffic generated at this time is all intranet traffic, which will not incur traffic costs.

Of course, if you want, you can also use object storage services provided by other cloud platforms, but it is not recommended to do so. First of all, if you access the object storage of other cloud platforms through Tencent Cloud CVM, you have to take the public network, and the object storage will incur traffic costs, and the access latency will be higher compared to this, which may affect the performance of JuiceFS.

Tencent Cloud COS has different storage levels, and since JuiceFS needs to interact with object storage frequently, it is recommended to use standard storage. You can use it with COS resource package to reduce the cost.

### API Access Secret Key

Tencent Cloud COS needs to be accessed through API, you need to prepare the access secret key, including `Access Key ID` and `Access Key Secret`, [click here to view](https://intl.cloud.tencent.com/document/product/598/32675) to get the way.

> **Security Advisory**: Explicit use of the API access secret key may lead to key compromise and it is recommended to assign [CAM Service Role](https://intl.cloud.tencent.com/document/product/598/19420) to the cloud server. Once a CVM has been granted COS operation privileges, it can access the COS without using the API access key.

## Installation

Here we are using Ubuntu Server 20.04 64-bit system, and the latest version of the client can be installed by running the following command.

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

You can also choose another version by visiting the [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) page.

Execute the command and see the help message `juicefs` returned, which means the client installation is successful.

```shell
$ juicefs
NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   0.15.2 (2021-07-07T05:51:36Z 4c16847)

COMMANDS:
   format   format a volume
   mount    mount a volume
   umount   unmount a volume
   gateway  S3-compatible gateway
   sync     sync between two storage
   rmr      remove directories recursively
   info     show internal information for paths or inodes
   bench    run benchmark to read/write/stat big/small files
   gc       collect any leaked objects
   fsck     Check consistency of file system
   profile  analyze access log
   status   show status of JuiceFS
   warmup   build cache for target directories/files
   dump     dump metadata into a JSON file
   load     load metadata from a previously dumped JSON file
   help, h  Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             only warning and errors (default: false)
   --trace                 enable trace log (default: false)
   --no-agent              disable pprof (:6060) agent (default: false)
   --help, -h              show help (default: false)
   --version, -V           print only the version (default: false)

COPYRIGHT:
   Apache License 2.0
```

JuiceFS has good cross-platform compatibility and is supported on Linux, Windows and macOS. This article focuses on the installation and use of JuiceFS on Linux, if you need to know how to install it on other systems, please [check the documentation](../getting-started/installation.md).

## Creating JuiceFS

Once the JuiceFS client is installed, you can now create the JuiceFS storage using the Redis database and COS you prepared earlier.

Technically speaking, this step should be called "Format a volume". However, since many users may not understand or care about the standard file system terminology, we will simply call the process "Create JuiceFS Storage".

The following command creates a storage called `mystor`, i.e., a file system, using the `format` subcommand provided by the JuiceFS client.

```shell
$ juicefs format \
    --storage cos \
    --bucket https://<your-bucket-name> \
    --access-key <your-access-key-id> \
    --secret-key <your-access-key-secret> \
    redis://:<your-redis-password>@192.168.5.5:6379/1 \
    mystor
```

**Option description:**

- `--storage`: Specify the type of object storage.
- `---bucket`: Bucket access domain of the object store, which can be found in the COS management console.
- `--access-key` and `--secret-key`: the secret key pair for accessing the Object Storage API, [click here to view](https://intl.cloud.tencent.com/document/product/598/32675) to get it.

> Redis 6.0 authentication requires two parameters, username and password, and the address format is `redis://username:password@redis-server-url:6379/1`. Currently, the Redis version of Tencent Cloud Database only provides Reids 4.0 and 5.0, which only requires a password for authentication. When setting the Redis server address, you only need to leave the username empty, for example: `redis://:password@redis-server-url:6379/1`

Output like the following means the file system was created successfully.

```shell
2021/07/30 11:44:31.904157 juicefs[44060] <INFO>: Meta address: redis://@192.168.5.5:6379/1
2021/07/30 11:44:31.907083 juicefs[44060] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/07/30 11:44:31.907634 juicefs[44060] <INFO>: Ping redis: 474.98µs
2021/07/30 11:44:31.907850 juicefs[44060] <INFO>: Data uses cos://juice-0000000000/mystor/
2021/07/30 11:44:32.149692 juicefs[44060] <INFO>: Volume is formatted as {Name:mystor UUID:dbf05314-57af-4a2c-8ac1-19329d73170c Storage:cos Bucket:https://juice-0000000000.cos.ap-shanghai.myqcloud.com AccessKey:AKIDGLxxxxxxxxxxxxxxxxxxZ8QRBdpkOkp SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

## Mount JuiceFS

When the file system is created, the information related to the object storage is stored in the database, so there is no need to enter information such as the bucket domain and secret key when mounting.

Use the `mount` subcommand to mount the file system to the `/mnt/jfs` directory.

```shell
sudo juicefs mount -d redis://:<your-redis-password>@192.168.5.5:6379/1 /mnt/jfs
```

> **Note**: When mounting the file system, only the Redis database address is required, not the file system name. The default cache path is `/var/jfsCache`, please make sure the current user has enough read/write permissions.

Output similar to the following means that the file system was mounted successfully.

```shell
2021/07/30 11:49:56.842211 juicefs[44175] <INFO>: Meta address: redis://@192.168.5.5:6379/1
2021/07/30 11:49:56.845100 juicefs[44175] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/07/30 11:49:56.845562 juicefs[44175] <INFO>: Ping redis: 383.157µs
2021/07/30 11:49:56.846164 juicefs[44175] <INFO>: Data use cos://juice-0000000000/mystor/
2021/07/30 11:49:56.846731 juicefs[44175] <INFO>: Disk cache (/var/jfsCache/dbf05314-57af-4a2c-8ac1-19329d73170c/): capacity (1024 MB), free ratio (10%), max pending pages (15)
2021/07/30 11:49:57.354763 juicefs[44175] <INFO>: OK, mystor is ready at /mnt/jfs
```

Using the `df` command, you can see how the file system is mounted.

```shell
$ df -Th
File system      type         capacity used usable used%  mount point
JuiceFS:mystor   fuse.juicefs  1.0P     64K  1.0P    1%   /mnt/jfs
```

After the file system is successfully mounted, you can now store data in the `/mnt/jfs` directory as if you were using a local hard drive.

> **Multi-Host Sharing**: JuiceFS storage supports being mounted by multiple cloud servers at the same time. You can install the JuiceFS client on other could server and then use `redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs. com:6379/1` database address to mount the file system on each host.

## File System Status

Use the `status` subcommand of the JuiceFS client to view basic information and connection status of a file system.

```shell
$ juicefs status redis://:<your-redis-password>@192.168.5.5:6379/1

2021/07/30 11:51:17.864767 juicefs[44196] <INFO>: Meta address: redis://@192.168.5.5:6379/1
2021/07/30 11:51:17.866619 juicefs[44196] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/07/30 11:51:17.867092 juicefs[44196] <INFO>: Ping redis: 379.391µs
{
  "Setting": {
    "Name": "mystor",
    "UUID": "dbf05314-57af-4a2c-8ac1-19329d73170c",
    "Storage": "cos",
    "Bucket": "https://juice-0000000000.cos.ap-shanghai.myqcloud.com",
    "AccessKey": "AKIDGLxxxxxxxxxxxxxxxxx8QRBdpkOkp",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0
  },
  "Sessions": [
    {
      "Sid": 1,
      "Heartbeat": "2021-07-30T11:49:56+08:00",
      "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)",
      "Hostname": "VM-5-6-ubuntu",
      "MountPoint": "/mnt/jfs",
      "ProcessID": 44175
    },
    {
      "Sid": 3,
      "Heartbeat": "2021-07-30T11:50:56+08:00",
      "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)",
      "Hostname": "VM-5-6-ubuntu",
      "MountPoint": "/mnt/jfs",
      "ProcessID": 44185
    }
  ]
}
```

## Unmount JuiceFS

The file system can be unmounted using the `umount` command provided by the JuiceFS client, e.g.

```shell
sudo juicefs umount /mnt/jfs
```

> **Note**: Forced unmount of the file system in use may result in data corruption or loss, so please be sure to proceed with caution.

## Auto-mount on boot

Please refer to ["Mount JuiceFS at Boot Time"](../administration/mount_at_boot.md) for more details.


================================================
FILE: docs/en/tutorials/windows.md
================================================
---
title: Using JuiceFS on Windows
sidebar_position: 1
---

## Quick Start Video

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114499784808051&bvid=BV1jtEczZEvq&cid=29939011077&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## Install JuiceFS Client

:::tip Environment Dependency
On Windows, JuiceFS relies on WinFsp to mount the file system. You can download the latest version from the [WinFsp Repository](https://github.com/winfsp/winfsp). After installation, it is recommended to restart your computer to ensure all components are loaded properly.
:::

The [installation guide](../getting-started/installation.md#windows) introduces various ways to install JuiceFS on Windows. Here, we detail the manual installation process.

### Step 1: Download JuiceFS Client

Go to the project's [Release page](https://github.com/juicedata/juicefs/releases) and download the latest JuiceFS client, for example, `juicefs-1.3.0-windows-amd64.tar.gz`.

### Step 2: Create Program Directory

For better management, it is recommended to create a dedicated directory for the JuiceFS client. For example, create a folder named `juicefs` under `C:\`, and place the extracted `juicefs.exe` inside.

### Step 3: Configure Environment Variables

To conveniently use the `juicefs` command in the command line, add the JuiceFS client directory to your system's environment variables:

1. Right-click "This PC" or "Computer" and select "Properties";
2. Click "Advanced system settings";
3. In the "System Properties" window, click the "Environment Variables" button;
4. In the "System variables" section, find the variable named `Path`, select it and click "Edit";
5. In the edit window, click "New" and enter the JuiceFS client directory path, e.g., `C:\juicefs`;
6. Click "OK" to save changes.

![Windows Environment Variable Settings](https://static1.juicefs.com/docs/windows-path-en.png)

### Step 4: Verify Installation

After installation, verify the JuiceFS client via the command line. Open Command Prompt (CMD) or PowerShell and enter:

```bash
juicefs version
```

If installed successfully, you should see output similar to:

```
juicefs version 1.3.0+2025-07-03.30190ca1094d2
```

## Create and Mount File System

The steps to create and mount a JuiceFS file system are similar to other operating systems, but pay attention to Windows command line syntax and path formats.

### Create File System

```shell
juicefs format --storage oss `
        --bucket https://your-bucket.oss-cn-region.aliyuncs.com `
        --access-key your-access-key `
        --secret-key your-secret-key `
        redis://your-redis-host:6379/0 `
        mywinfs
```

> Unlike Linux, Windows command lines use backticks (`) for line continuation.

### Mount File System

On Windows, the mount point must be an unused drive letter (such as X, Y, Z, etc.). This differs from Linux and macOS, which mount file systems to directories.

```shell
juicefs mount -d redis://your-redis-host:6379/0 X:
```

## Environment Variable Configuration

For security, to avoid entering passwords in plain text, you can store sensitive information in environment variables. When mounting the file system or enabling S3 Gateway, the client will automatically read from these variables.

Common environment variables for JuiceFS on Windows:

| Variable Name            | Description                |
|-------------------------|----------------------------|
| `META_PASSWORD`         | Metadata engine password   |
| `MINIO_ROOT_USER`       | S3 Gateway Access Key      |
| `MINIO_ROOT_PASSWORD`   | S3 Gateway Secret Key      |

Set these variables directly in the command line:

```cmd
set META_PASSWORD=your_password
set MINIO_ROOT_USER=your_access_key
set MINIO_ROOT_PASSWORD=your_secret_key
```

Note: This method only works for the current session. Once the window is closed, the variables are lost and need to be reset.

### Persist Environment Variables

To automatically load these variables every time Windows starts, set them as system environment variables:

1. **Open System Environment Variable Settings**
     - Press `Win + S`, search for and open "Edit the system environment variables".
     - Click the "Environment Variables" button.

     ![System Environment Variable Settings](https://static1.juicefs.com/docs/win_env_01.png)

2. **Create System-Level Environment Variable**
     - In the "System variables" area, click "New".
     - **Variable name**: e.g., `META_PASSWORD`
     - **Variable value**: Enter the password or key
     - Click "OK" to save.

     ![Add Environment Variable](https://static1.juicefs.com/docs/win_env_02.png)

     ![Add Environment Variable](https://static1.juicefs.com/docs/win_env_03.png)

3. **Verify Environment Variable**

     Reopen the terminal and try mounting the file system without specifying the password. If successful, the environment variable is effective.

## Auto-Mount on Startup

There are several ways to enable auto-mount on startup in Windows. This section introduces the method using "Task Scheduler".

1. Open "Task Scheduler" and click "Create Task".

     ![Task Scheduler](https://static1.juicefs.com/docs/task_00.png)

2. In the "General" tab, set the task name (e.g., `JuiceFS_AutoMount`) and check "Run with highest privileges".

     ![General Settings](https://static1.juicefs.com/docs/task_01.png)

3. Switch to the "Triggers" tab, click "New", and select "At system startup" as the trigger.

     ![Trigger Settings](https://static1.juicefs.com/docs/task_02.png)

4. Switch to the "Actions" tab, click "New", and fill in:

     - **Program/script**: Browse to select the JuiceFS client path (e.g., `C:\juicefs\juicefs.exe`).
     - **Arguments**: Enter the mount command parameters. It is recommended to use system environment variables for the metadata engine password to avoid plain text input here.

     ![Action Settings](https://static1.juicefs.com/docs/task_03.png)

5. In the "Conditions" tab, check "Start only if the network connection is available" to ensure the mount operation runs when the network is ready.

     ![Condition Settings](https://static1.juicefs.com/docs/task_04.png)

6. Click "OK" to save the task.

**Notes:**

- Ensure the mount command parameters are correct; do not include the password in the command (it is stored in environment variables).
- To unmount the file system: right-click the mounted drive letter and select "Disconnect".


================================================
FILE: docs/zh_cn/administration/destroy.md
================================================
---
title: 销毁文件系统
sidebar_position: 8
---

JuiceFS 客户端提供了 `destroy` 命令用以彻底销毁一个文件系统，销毁操作将会产生以下结果：

- 清空此文件系统的全部元数据记录；
- 清空此文件系统的全部数据块

销毁文件系统的命令格式如下：

```shell
juicefs destroy <METADATA URL> <UUID>
```

- `<METADATA URL>`：元数据引擎的 URL 地址；
- `<UUID>`：文件系统的 UUID。

## 查找文件系统的 UUID

JuiceFS 客户端的 `status` 命令可以查看一个文件系统的详细信息，只需指定文件系统的元数据引擎 URL 即可，例如：

```shell {8}
$ juicefs status redis://127.0.0.1:6379

2022/01/26 21:41:37.577645 juicefs[31181] <INFO>: Meta address: redis://127.0.0.1:6379
2022/01/26 21:41:37.578238 juicefs[31181] <INFO>: Ping redis: 55.041µs
{
  "Setting": {
    "Name": "macjfs",
    "UUID": "eabb96d5-7228-461e-9240-fddbf2b576d8",
    "Storage": "file",
    "Bucket": "jfs/",
    "AccessKey": "",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0,
    "TrashDays": 1
  },
  ...
}
```

## 销毁文件系统

:::danger 危险操作
销毁操作将导致文件系统关联的数据库记录和对象存储中的数据全部被清空，请务必先备份重要数据后再操作！
:::

```shell {1}
$ juicefs destroy redis://127.0.0.1:6379 eabb96d5-7228-461e-9240-fddbf2b576d8

2022/01/26 21:52:17.488987 juicefs[31518] <INFO>: Meta address: redis://127.0.0.1:6379
2022/01/26 21:52:17.489668 juicefs[31518] <INFO>: Ping redis: 55.542µs
 volume name: macjfs
 volume UUID: eabb96d5-7228-461e-9240-fddbf2b576d8
data storage: file://jfs/
  used bytes: 18620416
 used inodes: 23
WARNING: The target volume will be destroyed permanently, including:
WARNING: 1. objects in the data storage
WARNING: 2. entries in the metadata engine
Proceed anyway? [y/N]: y
deleting objects: 68
The volume has been destroyed! You may need to delete cache directory manually.
```

在销毁文件系统时，客户端会发出确认提示，请务必仔细核对文件系统信息，确认无误后输入 `y` 确认。

## 常见错误

```shell
2022/01/26 21:47:30.949149 juicefs[31483] <FATAL>: 1 sessions are active, please disconnect them first
```

如果收到类似上面的错误提示，说明文件系统没有被妥善卸载，请检查并确认卸载了所有挂载点后再行操作。


================================================
FILE: docs/zh_cn/administration/fault_diagnosis_and_analysis.md
================================================
---
title: 问题排查方法
sidebar_position: 5
slug: /fault_diagnosis_and_analysis
description: 本文介绍 JuiceFS 挂载点、CSI 驱动、Hadoop Java SDK、S3 网关等客户端的问题排查方法。
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

## 客户端日志 {#client-log}

JuiceFS 客户端在运行过程中会输出日志用于故障诊断，日志等级从低到高分别是：DEBUG、INFO、WARNING、ERROR、FATAL，默认只输出 INFO 级别以上的日志。如果需要输出 DEBUG 级别的日志，需要在运行 JuiceFS 客户端时显式开启，如加上 `--debug` 选项。

不同 JuiceFS 客户端获取日志的方式不同，以下分别介绍。

### 挂载点

当挂载 JuiceFS 文件系统时加上了 [`-d` 选项](../reference/command_reference.mdx#mount)（表示后台运行），日志会同时输出到系统日志和本地日志文件，取决于挂载文件系统时的运行用户，本地日志文件的路径稍有区别。root 用户对应的日志文件路径是 `/var/log/juicefs.log`，非 root 用户的日志文件路径是 `$HOME/.juicefs/juicefs.log`，具体请参见 [`--log` 选项](../reference/command_reference.mdx#mount)。

取决于你使用的操作系统，你可以通过不同的命令获取系统日志或直接读取本地日志文件：

<Tabs>
  <TabItem value="local-log-file" label="本地日志文件">

```bash
tail -n 100 /var/log/juicefs.log
```

  </TabItem>
  <TabItem value="macos-syslog" label="macOS 系统日志">

```bash
syslog | grep 'juicefs'
```

  </TabItem>
  <TabItem value="debian-syslog" label="Debian 系统日志">

```bash
cat /var/log/syslog | grep 'juicefs'
```

  </TabItem>
  <TabItem value="centos-syslog" label="CentOS 系统日志">

```bash
cat /var/log/messages | grep 'juicefs'
```

  </TabItem>
</Tabs>

你可以使用 `grep` 命令过滤显示不同等级的日志信息，从而进行性能统计和故障追踪，例如：

```shell
cat /var/log/syslog | grep 'juicefs' | grep '<ERROR>'
```

### Kubernetes CSI 驱动

根据你使用的 JuiceFS CSI 驱动版本会有不同的获取日志的方式，具体请参考 [CSI 驱动文档](https://juicefs.com/docs/zh/csi/troubleshooting)。

### S3 网关

S3 网关仅支持在前台运行，因此客户端日志会直接输出到终端。如果你是在 Kubernetes 中部署 S3 网关，需要查看对应 pod 的日志。

### Hadoop Java SDK

使用 JuiceFS Hadoop Java SDK 的应用进程（如 Spark executor）的日志中会包含 JuiceFS 客户端日志，因为和应用自身产生的日志混杂在一起，需要通过特定关键词来过滤筛选（如 `juicefs`，注意这里忽略了大小写）。

## 文件系统访问日志 {#access-log}

每个 JuiceFS 客户端都有一个访问日志，其中详细记录了文件系统上的所有操作，如操作类型、用户 ID、用户组 ID、文件 inode 及其花费的时间。访问日志可以有多种用途，如性能分析、审计、故障诊断。

### 日志格式

访问日志的示例格式如下：

```
2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010>
```

其中每一列的含义为：

- `2021.01.15 08:26:11.003330`：当前操作的时间
- `[uid:0,gid:0,pid:4403]`：当前操作的用户 ID、用户组 ID、进程 ID
- `write`：操作类型
- `(17669,8666,4993160)`：当前操作类型的输入参数，如示例中的 `write` 操作的输入参数分别为写入文件的 inode、写入数据的大小、写入文件的偏移。不同操作类型的参数不同，具体请参考 [`vfs.go`](https://github.com/juicedata/juicefs/blob/main/pkg/vfs/vfs.go) 文件。
- `OK`：当前操作是否成功，如果不成功会输出具体的失败信息。
- `<0.000010>`：当前操作花费的时间（以秒为单位）

访问日志量很大，直接阅读难以把握系统性能情况，推荐使用 [`juicefs profile`](#profile) 直接基于日志进行性能可视化分析。

不同 JuiceFS 客户端获取访问日志的方式不同，以下分别介绍。

### 挂载点

在 JuiceFS 文件系统挂载点的根目录中有一个名为 `.accesslog` 的虚拟文件，通过 `cat` 命令可以查看其中的内容（命令不会退出），例如（假设挂载点根目录为 `/jfs`）：

```bash
cat /jfs/.accesslog
```

```output
2021.01.15 08:26:11.003330 [uid:0,gid:0,pid:4403] write (17669,8666,4993160): OK <0.000010>
2021.01.15 08:26:11.003473 [uid:0,gid:0,pid:4403] write (17675,198,997439): OK <0.000014>
2021.01.15 08:26:11.003616 [uid:0,gid:0,pid:4403] write (17666,390,951582): OK <0.000006>
```

### Kubernetes CSI 驱动

请参考 [CSI 驱动文档](https://juicefs.com/docs/zh/csi/troubleshooting)及根据你使用的 JuiceFS CSI 驱动版本来找到 mount pod 或者 CSI 驱动 pod，在 pod 内的 JuiceFS 文件系统挂载点根目录查看 `.accesslog` 文件即可。Pod 内的挂载点路径为 `/jfs/<pv_volumeHandle>`，假设 mount pod 的名称叫 `juicefs-1.2.3.4-pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373`，`<pv_volumeHandle>` 为 `pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373`，可以使用如下命令查看：

```bash
kubectl -n kube-system exec juicefs-1.2.3.4-pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373 -- cat /jfs/pvc-d4b8fb4f-2c0b-48e8-a2dc-530799435373/.accesslog
```

### S3 网关

需要在启动 S3 网关时新增 [`--access-log` 选项](../reference/command_reference.mdx#gateway)，指定访问日志输出的路径，默认 S3 网关不输出访问日志。

### Hadoop Java SDK

需要在 JuiceFS Hadoop Java SDK 的[客户端配置](../deployment/hadoop_java_sdk.md#其它配置)中新增 `juicefs.access-log` 配置项，指定访问日志输出的路径，默认不输出访问日志。

## 使用 debug 子命令收集各类信息 {#debug}

`juicefs debug` 子命令可以自动搜集指定挂载点的各类信息，方便进行故障诊断。

```shell
juicefs debug <mountpoint>
```

该命令会收集以下信息：

1. JuiceFS version
2. 操作系统版本与内核版本
3. JuiceFS .config 内部文件内容
4. JuiceFS .stat 内部文件的内容并且在 5s 后再记录一次
5. mount 命令行参数
6. Go pprof
7. JuiceFS 日志（默认最后 5000 行）

默认会在当前目录下创建 debug 目录，并将收集到的信息保存在该目录下。下面是一个示例：

```shell
$ juicefs debug /tmp/mountpoint

$ tree ./debug
./debug
├── tmp-test1-20230609104324
│   ├── config.txt
│   ├── juicefs.log
│   ├── pprof
│   │   ├── juicefs.allocs.pb.gz
│   │   ├── juicefs.block.pb.gz
│   │   ├── juicefs.cmdline.txt
│   │   ├── juicefs.goroutine.pb.gz
│   │   ├── juicefs.goroutine.stack.txt
│   │   ├── juicefs.heap.pb.gz
│   │   ├── juicefs.mutex.pb.gz
│   │   ├── juicefs.profile.30s.pb.gz
│   │   ├── juicefs.threadcreate.pb.gz
│   │   └── juicefs.trace.5s.pb.gz
│   ├── stats.5s.txt
│   ├── stats.txt
│   └── system-info.log
└── tmp-test1-20230609104324.zip
```

## 实时性能监控 {#performance-monitor}

JuiceFS 客户端提供 `profile` 和 `stats` 两个子命令来对性能数据进行可视化呈现。其中，`profile` 命令通过读取[「文件系统请求日志」](#access-log)进行汇总输出，而 `stats` 则依赖[客户端监控数据](../administration/monitoring.md)。

### `juicefs profile` {#profile}

[`juicefs profile`](../reference/command_reference.mdx#profile) 会对[「文件系统访问日志」](#access-log)进行汇总，运行 `juicefs profile MOUNTPOINT` 命令，便能看到根据最新访问日志获取的各个文件系统操作的实时统计信息：

![JuiceFS-profiling](../images/juicefs-profiling.gif)

除了对挂载点进行实时分析，该命令还提供回放模式，可以对预先收集的日志进行回放分析：

```shell
# 预先收集日志
cat /jfs/.accesslog > /tmp/juicefs.accesslog

# 性能问题复现后，重放日志，分析各调用耗时，找出性能瓶颈
juicefs profile /tmp/juicefs.accesslog
```

如果认为回放日志的速度太快，可以用 <kbd>Enter/Return</kbd> 暂停／继续回放。如果太慢，则设置 `--interval 0` 来立即回放整个日志文件并直接显示统计结果。

如果只对某个用户或进程感兴趣，可以通过指定其 ID 来过滤掉其他用户或进程。例如：

```bash
juicefs profile /tmp/juicefs.accesslog --uid 12345
```

### `juicefs stats` {#stats}

[`juicefs stats`](../reference/command_reference.mdx#stats) 命令通过读取 JuiceFS 客户端的监控数据，以类似 Linux `dstat` 工具的形式实时打印各个指标的每秒变化情况：

![juicefs_stats_watcher](../images/juicefs_stats_watcher.png)

各个板块指标介绍：

#### `usage`

- `cpu`：进程的 CPU 使用率。
- `mem`：进程的物理内存使用量。
- `buf`：进程已使用的[读写缓冲区](../guide/cache.md#buffer-size)大小，如果该数值逼近甚至超过客户端所设置的 [`--buffer-size`](../reference/command_reference.mdx#mount-data-cache-options)，说明读写缓冲区空间不足，需要视情况扩大，或者降低应用读写负载。
- `cache`：内部指标，无需关注。

#### `fuse`

- `ops`/`lat`：通过 FUSE 接口处理的每秒请求数及其平均时延，单位为毫秒。
- `read`/`write`：通过 FUSE 接口处理的读写带宽。

#### `meta`

- `ops`/`lat`：每秒处理的元数据请求数和平均时延，单位为毫秒。注意部分能在缓存中直接处理的元数据请求未列入统计，以更好地体现客户端与元数据引擎交互的耗时。
- `txn`/`lat`：元数据引擎每秒处理的写事务个数及其平均时延，单位为毫秒。只读请求如 `getattr` 只会计入 `ops` 而不会计入 `txn`。
- `retry`：元数据引擎每秒重试写事务的次数。

#### `blockcache`

`blockcache` 代表本地数据缓存，如果读请求已经被内核缓存，那么流量将不会体现在 `blockcache` 相关指标下。因此如果反复读取相同文件，却发现持续产生 `blockcache` 流量，说明文件始终未能被内核页缓存收录，考虑往该方向排查（比如内存吃紧，不足以缓存更多文件）。

- `read`/`write`：客户端本地数据缓存的每秒读写流量。

#### `object`

`object` 代表与对象存储相关指标，在缓存场景下，读请求穿透到对象存储，将会明显降低读性能，可以用该指标来断定数据是否完整缓存。另一方面，通过对比 GET 请求流量和 FUSE 读流量的关系，也能初步判断[读放大](./troubleshooting.md#read-amplification)的情况。

- `get`/`get_c`/`lat`：对象存储每秒处理读请求的带宽值，请求个数及其平均时延（单位为毫秒）。
- `put`/`put_c`/`lat`：对象存储每秒处理写请求的带宽值，请求个数及其平均时延（单位为毫秒）。
- `del_c`/`lat`：对象存储每秒处理删除请求的个数和平均时延（单位为毫秒）。

## 用 pprof 获取运行时信息 {#runtime-information}

JuiceFS 客户端默认会通过 [pprof](https://pkg.go.dev/net/http/pprof) 在本地监听一个 TCP 端口用以获取运行时信息，如 Goroutine 堆栈信息、CPU 性能统计、内存分配统计。你可以通过挂载点下的 `.config` 文件查看当前 JuiceFS 客户端监听的具体端口号：

```shell
# 假设挂载点是 /jfs
$ cat /jfs/.config | grep 'DebugAgent'
  "DebugAgent": "127.0.0.1:6064",
```

默认 pprof 监听的端口号范围是从 6060 开始至 6099 结束，从上面的示例中可以看到实际的端口号是 6064。在获取到监听端口号以后就可以通过 `http://localhost:<port>/debug/pprof` 地址查看所有可供查询的运行时信息，一些重要的运行时信息如下：

- Goroutine 堆栈信息：`http://localhost:<port>/debug/pprof/goroutine?debug=1`
- CPU 性能统计：`http://localhost:<port>/debug/pprof/profile?seconds=30`
- 内存分配统计：`http://localhost:<port>/debug/pprof/heap`

为了便于分析这些运行时信息，可以将它们保存到本地，例如：

```bash
curl 'http://localhost:<port>/debug/pprof/goroutine?debug=1' > juicefs.goroutine.txt
```

```bash
curl 'http://localhost:<port>/debug/pprof/profile?seconds=30' > juicefs.cpu.pb.gz
```

```bash
curl 'http://localhost:<port>/debug/pprof/heap' > juicefs.heap.pb.gz
```

:::tip 建议
你也可以使用 `juicefs debug` 命令自动收集这些运行时信息并保存到本地，默认保存到当前目录下的 `debug` 目录中，例如：

```bash
juicefs debug /mnt/jfs
```

关于 `juicefs debug` 命令的更多信息，请查看[命令参考](../reference/command_reference.mdx#debug)。
:::

如果你安装了 `go` 命令，那么可以通过 `go tool pprof` 命令直接分析，例如分析 CPU 性能统计：

```bash
$ go tool pprof 'http://localhost:<port>/debug/pprof/profile?seconds=30'
Fetching profile over HTTP from http://localhost:<port>/debug/pprof/profile?seconds=30
Saved profile in /Users/xxx/pprof/pprof.samples.cpu.001.pb.gz
Type: cpu
Time: Dec 17, 2021 at 1:41pm (CST)
Duration: 30.12s, Total samples = 32.06s (106.42%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top
Showing nodes accounting for 30.57s, 95.35% of 32.06s total
Dropped 285 nodes (cum <= 0.16s)
Showing top 10 nodes out of 192
      flat  flat%   sum%        cum   cum%
    14.73s 45.95% 45.95%     14.74s 45.98%  runtime.cgocall
     7.39s 23.05% 69.00%      7.41s 23.11%  syscall.syscall
     2.92s  9.11% 78.10%      2.92s  9.11%  runtime.pthread_cond_wait
     2.35s  7.33% 85.43%      2.35s  7.33%  runtime.pthread_cond_signal
     1.13s  3.52% 88.96%      1.14s  3.56%  runtime.nanotime1
     0.77s  2.40% 91.36%      0.77s  2.40%  syscall.Syscall
     0.49s  1.53% 92.89%      0.49s  1.53%  runtime.memmove
     0.31s  0.97% 93.86%      0.31s  0.97%  runtime.kevent
     0.27s  0.84% 94.70%      0.27s  0.84%  runtime.usleep
     0.21s  0.66% 95.35%      0.21s  0.66%  runtime.madvise
```

也可以将运行时信息导出为可视化图表，以更加直观的方式进行分析。可视化图表支持导出为多种格式，如 HTML、PDF、SVG、PNG 等。例如导出内存分配统计信息为 PDF 文件的命令如下：

:::note 注意
导出为可视化图表功能依赖 [Graphviz](https://graphviz.org)，请先将它安装好。
:::

```bash
go tool pprof -pdf 'http://localhost:<port>/debug/pprof/heap' > juicefs.heap.pdf
```

关于 pprof 的更多信息，请查看[官方文档](https://github.com/google/pprof/blob/main/doc/README.md)。

### 使用 Pyroscope 进行性能剖析 {#use-pyroscope}

![Pyroscope](../images/pyroscope.png)

[Pyroscope](https://github.com/pyroscope-io/pyroscope) 是一个开源的持续性能剖析平台。它能够帮你：

+ 找出源代码中的性能问题和瓶颈
+ 解决 CPU 利用率高的问题
+ 理解应用程序的调用树（call tree）
+ 追踪随一段时间内变化的情况

JuiceFS 支持使用 `--pyroscope` 选项传入 Pyroscope 服务端地址，指标以每隔 10 秒的频率推送到服务端。如果服务端开启了权限校验，校验信息 API Key 可以通过环境变量 `PYROSCOPE_AUTH_TOKEN` 传入：

```bash
export PYROSCOPE_AUTH_TOKEN=xxxxxxxxxxxxxxxx
juicefs mount --pyroscope http://localhost:4040 redis://localhost /mnt/jfs
juicefs dump --pyroscope http://localhost:4040 redis://localhost dump.json
```


================================================
FILE: docs/zh_cn/administration/metadata/_category_.yml
================================================
label: "Metadata Engine Best Practices"
position: 1

================================================
FILE: docs/zh_cn/administration/metadata/etcd_best_practices.md
================================================
---
sidebar_label: etcd
sidebar_position: 4
slug: /etcd_best_practices
---

# etcd 最佳实践

## 数据规模

etcd 默认设置了 2GB 的[存储配额](https://etcd.io/docs/latest/op-guide/maintenance/#space-quota)，大概能够支撑存储两百万文件的元数据，可以通过 `--quota-backend-bytes` 选项进行调整，[官方建议](https://etcd.io/docs/latest/dev-guide/limit)不要超过 8GB。

默认情况下，etcd 会保留所有数据的修改历史，直到数据量超过存储配额导致无法提供服务，建议加上如下选项启用[自动数据合并](https://etcd.io/docs/latest/op-guide/maintenance/#auto-compaction)：

```
--auto-compaction-mode revision --auto-compaction-retention 1000000
```

当数据量达到配额导致无法写入时，可以通过手动压缩（`etcdctl compact`）和整理碎片（`etcdctl defrag`）的方式来减少容量。**强烈建议对 etcd 集群的节点逐个进行这些操作，否则可能会导致整个 etcd 集群不可用。**

## 性能

etcd 提供强一致的读写访问，并且所有操作都会涉及到多机事务以及磁盘的数据持久化。**建议使用高性能的 SSD 来部署**，否则会影响到文件系统的性能。更多硬件配置建议请参考[官方文档](https://etcd.io/docs/latest/op-guide/hardware)。

如果 etcd 集群都有掉电保护，或者其它能够保证不会导致所有节点同时宕机的措施，也可以通过 `--unsafe-no-fsync` 选项关闭数据同步落盘，以降低访问时延提高文件系统的性能。**此时如果有两个节点同时宕机，会有数据丢失风险。**

## Kubernetes

建议在 Kubernetes 环境中搭建独立的 etcd 服务供 JuiceFS 使用，而不是使用集群中默认的 etcd 服务，避免当文件系统访问压力高时影响 Kubernetes 集群的稳定性。


================================================
FILE: docs/zh_cn/administration/metadata/fdb_best_practices.md
================================================
---
sidebar_label: FoundationDB
sidebar_position: 6
slug: /fdb_best_practices
---

# FoundationDB 最佳实践

fdb 支持横向扩容，一旦数据存储达到集群的最高负载，只需要在集群中添加新的机器即可。配置集群的详细教程可见官网 [https://apple.github.io/foundationdb/configuration.html](https://apple.github.io/foundationdb/configuration.html) ，对于不同场景不同机器数量的性能测试可见 [https://apple.github.io/foundationdb/benchmarking.html](https://apple.github.io/foundationdb/benchmarking.html)。

## 系统要求

- 以下 64 位操作系统之一
  - 受支持的 Linux 发行版
    - RHEL/CentOS 6.x and 7.x
    - Ubuntu 12.04 或更高版本
  - 未受支持的 Linux 发行版
    - 内核版本介于 2.6.33 和 3.0.x（含）或 3.7 或更高版本之间
    - 最好是.deb 或者.rpm
  - macOS 10.7 或更高版本
- 每个 fdbserver 需要至少 4GB 内存
- 存储
  - 存储数据小于内存时使用内存存储引擎
  - 存储数据大于内存时使用 SSD 存储引擎

## 如何配置 FoundationDB

### 在单机上配置 FoundationDB

**[Ubuntu](https://apple.github.io/foundationdb/getting-started-linux.html)**

```
//下载server和client deb包
wget https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-clients_6.3.23-1_amd64.deb
wget https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-server_6.3.23-1_amd64.deb
//安装
sudo dpkg -i foundationdb-clients_6.3.23-1_amd64.deb \
foundationdb-server_6.3.23-1_amd64.deb
```

**[RHEL/CentOS6/CentOS7](https://apple.github.io/foundationdb/getting-started-linux.html)**

```
//下载server和client rpm包
wget https://github.com/apple/foundationdb/releases/download/6.3.12/foundationdb-clients-6.3.23-1.el7.x86_64.rpm
wget https://github.com/apple/foundationdb/releases/download/6.3.23/foundationdb-server-6.3.23-1.el7.x86_64.rpm
//安装
sudo rpm -Uvh foundationdb-clients-6.3.23-1.el7.x86_64.rpm \
foundationdb-server-6.3.23-1.el7.x86_64.rpm
```

**[macOS](https://apple.github.io/foundationdb/getting-started-linux.html)**

详情请移步 FoundationDB 官网

### [在多台机器上配置 FoundationDB 集群](https://apple.github.io/foundationdb/administration.html#adding-machines-to-a-cluster)

> 部署单台机器的步骤与上述一致。

- 首先在每台机器上部署好单个 FoundationDB
- 选择一个节点将其 fdb.cluster 文件修改（路径默认`/etc/foundationdb/fdb.cluster`），此文件由一行字符串组成，格式为 description:ID@IP:PORT,IP:PORT,...，仅添加其他机器的 IP:PORT 即可。
- 将此修改完的 fdb.cluster 拷贝到其他节点
- 将机器重启（`sudo service foundationdb restart`）

## 冗余模式

FoundationDB 支持多种冗余模式。这些模式定义了存储要求、所需的集群大小和故障恢复能力，用户可根据不同的机器配置选择相对应的冗余模式。要更改冗余模式，请使用 的 configure 命令 fdbcli。示例如下：

```
user@host$ fdbcli
Using cluster file `/etc/foundationdb/fdb.cluster'.

The database is available.

Welcome to the fdbcli. For help, type `help'.
fdb> configure double
Configuration changed.
```

### `single` mode（1-2 台机器）

FoundationDB 不复制数据，只需要一台物理机器就可以进行处理。由于数据没有被复制，数据库没有容错能力。

建议在单个开发机器上进行测试时使用此模式。(单模式将用于由两台或两台以上计算机组成的集群，并将数据进行分区以提高性能，但集群不会容忍任何机器的丢失)

### `double` mode（3-4 台机器）

FoundationDB 将数据复制到两台机器上，因此需要两台或两台以上的机器进行处理。一台机器的丢失可以在不丢失数据的情况下存活，但如果最初只有两台机器，则数据库将不可用，直到恢复第二台机器、添加另一台机器或更改复制模式。

### `triple` mode（5+ 台机器）

FoundationDB 将数据复制到三台机器上，并且至少需要三台可用的机器才能进行处理。对于一个数据中心中有五台或更多机器的集群，推荐使用这种模式。

## 存储引擎

fdb 提供`ssd`和`memory`两种存储引擎，根据数据量大小来选择不同的存储引擎。我们在实际测试中发现两种存储引擎的性能相差不大，而`ssd`存储引擎支持较大的数据量，故推荐使用`ssd`存储引擎。

```
user@host$ fdbcli
Using cluster file `/etc/foundationdb/fdb.cluster'.

The database is available.

Welcome to the fdbcli. For help, type `help'.
fdb> configure ssd
Configuration changed.
```

### `ssd` 存储引擎（推荐）

数据以 B 树的格式存储在磁盘中，一般使用固态硬盘而非机械硬盘。当有合适的磁盘硬件时，这个引擎更加健壮，因为它可以存储大量数据。

关于性能，固态硬盘提供了很不错的随机读写性能，再加上热点数据的缓存，基本上于`memory`存储引擎相差无几，对于`JUICEFS`的元数据存储也是极力推荐使用`ssd`存储引擎。

需要注意的是，固态硬盘在损坏之后数据有可能不可恢复，所以需要注意硬盘的磨损程度以更换新的硬盘。

由于该存储引擎是针对于 SSD（固态硬盘），因此如果使用的机械硬盘，性能会受到很大影响。

### `memory` 存储引擎

数据存储在内存中，其通过顺序写日志的方式对数据进行持久化，数据库重启时通过回放日志的方式来进行数据恢复，此过程一般需要一些时间（几秒钟到几分钟）。

默认情况下，每个使用内存存储引擎的进程只能存储 1GB 的数据 (包括开销)。这个限制可以通过在`foundationdb.conf`中记录的`storage_memory`参数来更改。


================================================
FILE: docs/zh_cn/administration/metadata/mysql_best_practices.md
================================================
---
sidebar_label: MySQL
sidebar_position: 2
slug: /mysql_best_practices
---
# MySQL 最佳实践

对于数据与元数据分离存储的分布式文件系统，元数据的读写性能直接影响整个系统的工作效率，元数据的安全也直接关系着整个系统的数据安全。

在生产环境中，建议您优先选择云计算平台提供的托管型云数据库，并搭配恰当的高可用性架构。

不论自行搭建，还是采用云数据库，使用 JuiceFS 应该始终关注元数据的完整和安全。

## 通过环境变量传递数据库信息

虽然直接在元数据 URL 中设置数据库密码简单方便，但日志或程序输出中可能会泄漏密码，为了保证数据安全，应该始终通过环境变量传递数据库密码。

环境变量名称可以自由定义，例如：

```shell
export $MYSQL_PASSWORD=mypassword
```

在元数据 URL 中通过环境变量传递数据库密码：

```shell
juicefs mount -d "mysql://user:$MYSQL_PASSWORD@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

或者使用指定的环境变量 (META_PASSWORD) ，例如：

```shell
export $META_PASSWORD=mypassword
```

在元数据 URL 中通过直接省略密码：

```shell
juicefs mount -d "mysql://user:@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

## 连接数控制

MySQL 后端采用多线程模式，每一个连接对应后端一个线程，控制数据库的连接总数和减少数据库连接的动态创建都是非常必要的。JuiceFS 提供 4 个数据库连接相关的控制选项：

- max_open_conns：控制当前挂载点到数据库的最大连接数，默认值为 0，表示没有限制。如果设置了一个固定值，并且所有连接都被使用了，新的请求就需要等待其他请求释放数据库连接，过小的值可能会影响性能，请根据实际业务压力情况动态调整。
- max_idle_conns：控制当前挂载点到数据库的最大空闲连接数，默认值为 CPU 的逻辑核心数的两倍。如果设置的值过大，这些连接一直空闲着，可能会消耗或浪费后端的资源，引起后端连接数过高，导致其他挂载点需要新建连接时无法连接成功。
- max_idle_time：一个连接的最长空闲时间，默认值为 300 秒。如果一个连接一直未被使用，和后端数据库无任何交互，超过指定时间后，会自动断开连接，以节约后端资源。设置过小的值可能会引起频繁地创建数据据连接，影响性能。
- max_life_time：一个连接的最大生命周期，默认为 0，表示无限制。一个数据库连接会被各种请求循环复用，在服务请求的过程中会申请一些临时资源，比如内存等，可能存在清理不干净或资源碎片的情况，可以考虑设置一个合理的生命周期，达到周期并且服务完当前请求后会自动断开来优化资源使用。

可在元数据 URL 中直接传递上述控制选项：

```shell
juicefs mount -d "mysql://user:@(192.168.1.6:3306)/juicefs?max_open_conns=30&max_life_time=3600" /mnt/jfs
```

请参考 Go 模块文档 [Database/SQL](https://pkg.go.dev/database/sql#SetConnMaxIdleTime) 了解更多信息。

## 定期备份

请参考官方手册 [Chapter 9. Backup and Recovery](https://dev.mysql.com/doc/refman/8.0/en/backup-and-recovery.html) 了解如何备份和恢复数据库。

建议制定数据库备份计划，并遵照计划定期备份 MySQL 数据库，与此同时，还应该在实验环境中尝试恢复数据，确认备份是有效的。

## 高可用

MySQL 官方文档 [Chapter 19. Replication](https://dev.mysql.com/doc/refman/8.0/en/replication.html) 和 [Chapter 20. Group Replication](https://dev.mysql.com/doc/refman/8.0/en/group-replication.html) 是常用的数据库高可用方案，请根据实际业务需要选择恰当的高可用方案。

:::note 注意
JuiceFS 需要使用[事务功能]来保证元数据操作的原子性，因此需要使用支持事务的存储引擎，例如 [InnoDB](https://dev.mysql.com/doc/refman/8.0/en/innodb-storage-engine.html) 。一些基于 MySQL 的 Shared Nothing 分布式架构可能会存在事务的兼容性问题，目前未对分布式架构做 JuiceFS 元数据做兼容性研发和测试。
:::


================================================
FILE: docs/zh_cn/administration/metadata/postgresql_best_practices.md
================================================
---
sidebar_label: PostgreSQL
sidebar_position: 3
slug: /postgresql_best_practices
---
# PostgreSQL 最佳实践

对于数据与元数据分离存储的分布式文件系统，元数据的读写性能直接影响整个系统的工作效率，元数据的安全也直接关系着整个系统的数据安全。

在生产环境中，建议您优先选择云计算平台提供的托管型云数据库，并搭配恰当的高可用性架构。

不论自行搭建，还是采用云数据库，使用 JuiceFS 应该始终关注元数据的完整和安全。

## 通信安全

默认情况下，JuiceFS 客户端会采用 SSL 加密协议连接 PostgreSQL，如果数据库未启用 SSL 加密，则需要在元数据 URL 中需要附加 `sslmode=disable` 参数。

建议配置并始终开启数据库服务端 SSL 加密。

## 通过环境变量传递数据库信息

虽然直接在元数据 URL 中设置数据库密码简单方便，但日志或程序输出中可能会泄漏密码，为了保证数据安全，应该始终通过环境变量传递数据库密码。

环境变量名称可以自由定义，例如：

```shell
export $PG_PASSWD=mypassword
```

在元数据 URL 中通过环境变量传递数据库密码：

```shell
juicefs mount -d "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs" /mnt/jfs
```

## 连接数控制

PostgreSQL 后端采用多进程模式，每一个连接对应后端一个进程，控制数据库的连接总数和减少数据库连接的动态创建都是非常必要的。JuiceFS 提供 4 个数据库连接相关的控制选项：

- max_open_conns：控制当前挂载点到数据库的最大连接数，默认值为 0，表示没有限制。如果设置了一个固定值，并且所有连接都被使用了，新的请求就需要等待其他请求释放数据库连接，过小的值可能会影响性能，请根据实际业务压力情况动态调整。
- max_idle_conns：控制当前挂载点到数据库的最大空闲连接数，默认值为 CPU 的逻辑核心数的两倍。如果设置的值过大，这些连接一直空闲着，可能会消耗或浪费后端的资源，引起后端连接数过高，导致其他挂载点需要新建连接时无法连接成功。
- max_idle_time：一个连接的最长空闲时间，默认值为 300 秒。如果一个连接一直未被使用，和后端数据库无任何交互，超过指定时间后，会自动断开连接，以节约后端资源。设置过小的值可能会引起频繁地创建数据据连接，影响性能。
- max_life_time：一个连接的最大生命周期，默认为 0，表示无限制。一个数据库连接会被各种请求循环复用，在服务请求的过程中会申请一些临时资源，比如内存等，可能存在清理不干净或资源碎片的情况，可以考虑设置一个合理的生命周期，达到周期并且服务完当前请求后会自动断开来优化资源使用。

可在元数据 URL 中直接传递上述控制选项：

```shell
juicefs mount -d "postgres://user:$PG_PASSWD@192.168.1.6:5432/juicefs?max_open_conns=30&max_life_time=3600" /mnt/jfs
```

请参考 Go 模块文档 [Database/SQL](https://pkg.go.dev/database/sql#SetConnMaxIdleTime) 了解更多信息。

## 定期备份

请参考官方手册 [Chapter 26. Backup and Restore](https://www.postgresql.org/docs/current/backup.html) 了解如何备份和恢复数据库。

建议制定数据库备份计划，并遵照计划定期备份 PostgreSQL 数据库，与此同时，还应该在实验环境中尝试恢复数据，确认备份是有效的。

## 使用连接池

连接池是客户端与数据库之间的中间层，由它作为中介提升连接效率，降低短连接的损耗。常用的连接池有 [PgBouncer](https://www.pgbouncer.org) 和 [Pgpool-II](https://www.pgpool.net) 。

## 高可用

PostgreSQL 官方文档 [High Availability, Load Balancing, and Replication](https://www.postgresql.org/docs/current/different-replication-solutions.html) 对比了几种常用的数据库高可用方案，请根据实际业务需要选择恰当的高可用方案。

:::note 注意
JuiceFS 使用[事务](https://www.postgresql.org/docs/current/tutorial-transactions.html)保证元数据操作的原子性。由于 PostgreSQL 尚不支持 Multi-Shard (Distributed) 分布式事务，因此请勿将多服务器分布式架构用于 JuiceFS 元数据存储。
:::


================================================
FILE: docs/zh_cn/administration/metadata/redis_best_practices.md
================================================
---
sidebar_label: Redis
sidebar_position: 1
slug: /redis_best_practices
---

# Redis 最佳实践

为保证元数据服务稳定，我们建议使用云平台提供的 Redis 托管服务，详情查看[「推荐的 Redis 托管服务」](#推荐的-redis-托管服务)。

## 内存使用量

JuiceFS 元数据引擎的使用空间主要与文件系统中的文件数量有关，根据我们的经验，每一个文件的元数据会大约占用 300 字节内存。因此，如果要存储 1 亿个文件，大约需要 30GiB 内存。

你可以通过 Redis 的 [`INFO memory`](https://redis.io/commands/info) 命令查看具体的内存使用量，例如：

```
> INFO memory
used_memory: 19167628056
used_memory_human: 17.85G
used_memory_rss: 20684886016
used_memory_rss_human: 19.26G
...
used_memory_overhead: 5727954464
...
used_memory_dataset: 13439673592
used_memory_dataset_perc: 70.12%
```

其中 `used_memory_rss` 是 Redis 实际使用的总内存大小，这里既包含了存储在 Redis 中的数据大小（也就是上面的 `used_memory_dataset`），也包含了一些 Redis 的[系统开销](https://redis.io/commands/memory-stats)（也就是上面的 `used_memory_overhead`）。前面提到每个文件的元数据大约占用 300 字节是通过 `used_memory_dataset` 来计算的，如果你发现你的 JuiceFS 文件系统中单个文件元数据占用空间远大于 300 字节，可以尝试运行 [`juicefs gc`](../../reference/command_reference.mdx#gc) 命令来清理可能存在的冗余数据。

## 数据可用性

### 哨兵模式 {#sentinel-mode}

[Redis 哨兵](https://redis.io/docs/manual/sentinel) 是 Redis 官方的高可用解决方案，它提供以下功能：

- **监控**，哨兵会不断检查您的 master 实例和 replica 实例是否按预期工作。
- **通知**，当受监控的 Redis 实例出现问题时，哨兵可以通过 API 通知系统管理员或其他计算机程序。
- **自动故障转移**，如果 master 没有按预期工作，哨兵可以启动一个故障转移过程，其中一个 replica 被提升为 master，其他的副本被重新配置为使用新的 master，应用程序在连接 Redis 服务器时会被告知新的地址。
- **配置提供程序**，哨兵会充当客户端服务发现的权威来源：客户端连接到哨兵以获取当前 Redis 主节点的地址。如果发生故障转移，哨兵会报告新地址。

**Redis 2.8 开始提供稳定版本的 Redis 哨兵**。Redis 2.6 提供的第一版 Redis 哨兵已被弃用，不建议使用。

在使用 Redis 哨兵之前，先了解一些[基础知识](https://redis.io/docs/manual/sentinel#fundamental-things-to-know-about-sentinel-before-deploying)：

1. 您至少需要三个哨兵实例才能进行稳健的部署。
2. 这三个哨兵实例应放置在彼此独立的计算机或虚拟机中。例如，分别位于不同的可用区域上的不同物理服务器或虚拟机上。
3. **由于 Redis 使用异步复制，无法保证在发生故障时能够保留已确认的写入。** 然而，有一些部署 哨兵的方法，可以使丢失写入的窗口限于某些时刻，当然还有其他不太安全的部署方法。
4. 如果您不在开发环境中经常进行测试，就无法确保 HA 的设置是安全的。在条件允许的情况，如果能够在生产环境中进行验证则更好。错误的配置往往都是在你难以预期和响应的时间出现（比如，凌晨 3 点你的 master 节点悄然罢工）。
5. **哨兵、Docker 或其他形式的网络地址转换或端口映射应谨慎混用**：Docker 执行端口重映射，会破坏其他哨兵进程的哨兵自动发现以及 master 的 replicas 列表。

更多信息请阅读[官方文档](https://redis.io/docs/manual/sentinel)。

部署了 Redis 服务器和哨兵以后，`META-URL` 可以指定为 `redis[s]://[[USER]:PASSWORD@]MASTER_NAME,SENTINEL_ADDR[,SENTINEL_ADDR]:SENTINEL_PORT[/DB]`，例如：

```shell
juicefs mount redis://:password@masterName,1.2.3.4,1.2.5.6:26379/2 ~/jfs
```

:::tip 提示
对于 JuiceFS v0.16 及以上版本，URL 中提供的密码会用于连接 Redis 服务器，哨兵的密码需要用环境变量 `SENTINEL_PASSWORD` 指定。对于更早的版本，URL 中的密码会同时用于连接 Redis 服务器和哨兵，也可以通过环境变量 `SENTINEL_PASSWORD` 和 `REDIS_PASSWORD` 来覆盖。
:::

自 JuiceFS v1.0.0 版本开始，支持在挂载文件系统时仅连接 Redis 的副本节点，以降低 Redis 主节点的负载。为了开启这个特性，必须以只读模式挂载 JuiceFS 文件系统（即设置 `--read-only` 挂载选项），并通过 Redis 哨兵连接元数据引擎，最后需要在元数据 URL 末尾加上 `?route-read=replica`，例如：`redis://:password@masterName,1.2.3.4,1.2.5.6:26379/2?route-read=replica`。

需要注意由于 Redis 主节点的数据是异步复制到副本节点，因此有可能读到的元数据不是最新的。

### 集群模式 {#cluster-mode}

:::note 注意
此特性需要使用 1.0.0 及以上版本的 JuiceFS
:::

JuiceFS 同样支持集群模式的 Redis 作为元数据引擎，Redis 集群模式的 `META-URL` 为 `redis[s]://[[USER]:PASSWORD@]ADDR:PORT,[ADDR:PORT],[ADDR:PORT][/DB]`，例如：

```shell
juicefs format redis://127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002/1 myjfs
```

:::tip 提示
Redis 集群不再支持多数据库，而是将所有 keys 分散到 16384 个 hash slots 中，再将这些 hash slots 打散到多个 Redis master 节点来存储。JuiceFS 利用了 Redis 集群的 [Hash Tag](https://redis.io/docs/reference/cluster-spec/#hash-tags) 特性，通过将 `{DB}` 作为 key 的前缀来将一个文件系统中的所有 keys 都存放在同一个 hash slot，以保证集群模式下操作的事务性。另外，通过设置不同的 `DB` 可以让一个 Redis 集群同时作为多个 JuiceFS 的元数据库。
:::

## 数据持久性

Redis 提供了不同范围的[持久性](https://redis.io/docs/manual/persistence)选项：

- **RDB**：以指定的时间间隔生成当前数据集的快照。
- **AOF**：记录服务器收到的每一个写操作，在服务器启动时重建原始数据集。命令使用与 Redis 协议本身相同的格式以追加写（append-only）的方式记录。当日志变得太大时，Redis 能够在后台重写日志。
- **RDB+AOF** <Badge type="success">建议</Badge>：组合使用 AOF 和 RDB。在这种情况下，当 Redis 重新启动时，AOF 文件将用于重建原始数据集，因为它保证是最完整的。

当使用 AOF 时，您可以有不同的 fsync 策略：

1. 没有 fsync；
2. 每秒 fsync <Badge type="primary">默认</Badge>；
3. 每次查询 fsync。

默认策略「每秒 fsync」是不错的选择（fsync 是使用后台线程执行的，当没有 fsync 正在进行时，主线程会努力执行写入），**但你可能丢失最近一秒钟的写入**。

Redis 对数据备份非常友好，因为您可以在数据库运行时复制 RDB 文件，RDB 一旦生成就永远不会被修改，当它被生成时，它使用一个临时名称，并且只有在新快照完成时才使用 `rename` 原子地重命名到其最终目的地。您还可以复制 AOF 文件以创建备份。

更多信息请阅读[官方文档](https://redis.io/docs/manual/persistence)。

## 备份 Redis 数据

磁盘可能会损坏，虚拟机可能出意外，即使采用 RBD+AOF 模式，**依然需要定期备份 Redis 数据**。

默认情况下，Redis 将数据集的快照保存在磁盘上，名为 `dump.rdb` 的二进制文件中。你可以根据需要，将 Redis 配置为当数据集至少发生 M 次变化时，每 N 秒保存一次，也可以手动调用 [`SAVE`](https://redis.io/commands/save) 或 [`BGSAVE`](https://redis.io/commands/bgsave) 命令。

Redis 对数据备份非常友好，因为您可以在数据库运行时复制 RDB 文件：RDB 一旦生成就永远不会被修改，当它被生成时，它使用一个临时名称，并且只有在新快照完成时才使用 `rename(2)` 原子地重命名到其最终目的地。

这意味着在服务器运行时复制 RDB 文件是完全安全的。以下是我们的建议：

- 在您的服务器中创建一个 cron 任务，在一个目录中创建 RDB 文件的每小时快照，并在另一个目录中创建每日快照。
- 每次 cron 脚本运行时，请务必调用 `find` 命令以确保删除太旧的快照：例如，您可以保留最近 48 小时的每小时快照，以及一至两个月的每日快照。要确保使用数据和时间信息来命名快照。
- 确保每天至少一次将 RDB 快照从运行 Redis 的实例传输至 _数据中心以外_ 或至少传输至 _物理机以外_。

更多信息请阅读[官方文档](https://redis.io/docs/manual/persistence)。

## 恢复 Redis 数据

当生成 AOF 或者 RDB 备份文件以后，可以将备份文件拷贝到新 Redis 实例的 `dir` 配置对应的路径中来恢复数据，你可以通过 [`CONFIG GET dir`](https://redis.io/commands/config-get) 命令获取当前 Redis 实例的配置信息。

如果 AOF 和 RDB 同时开启，Redis 启动时会优先使用 AOF 文件来恢复数据，因为 AOF 保证是最完整的数据。

在恢复完 Redis 数据以后，可以继续通过新的 Redis 地址使用 JuiceFS 文件系统。建议运行 [`juicefs fsck`](../../reference/command_reference.mdx#fsck) 命令检查文件系统数据的完整性。

## 推荐的 Redis 托管服务

### Amazon MemoryDB for Redis

[Amazon MemoryDB for Redis](https://aws.amazon.com/memorydb) 是一种持久的内存数据库服务，可提供超快的性能。MemoryDB 与 Redis 兼容，使用 MemoryDB，你的所有数据都存储在内存中，这使你能够实现微秒级读取和数毫秒的写入延迟和高吞吐。MemoryDB 还使用多可用区事务日志跨多个可用区持久存储数据，以实现快速故障切换、数据库恢复和节点重启。

### Google Cloud Memorystore for Redis

[Google Cloud Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis) 是针对 Google Cloud 的完全托管的 Redis 服务。通过利用高度可扩展、可用且安全的 Redis 服务，在 Google Cloud 上运行的应用程序可以实现卓越的性能，而无需管理复杂的 Redis 部署。

### Azure Cache for Redis

[Azure Cache for Redis](https://azure.microsoft.com/en-us/services/cache) 是一个完全托管的内存缓存，支持高性能和可扩展的架构。使用它来创建云或混合部署，以亚毫秒级延迟处理每秒数百万个请求——所有这些都具有托管服务的配置、安全性和可用性优势。

### 阿里云云数据库 Redis 版

[阿里云云数据库 Redis 版](https://www.aliyun.com/product/kvstore)是一种兼容原生 Redis 协议的数据库服务。它支持混合内存和硬盘以实现数据持久性。云数据库 Redis 版提供高可用的热备架构，可扩展以满足高性能、低延迟的读写操作需求。

### 腾讯云云数据库 Redis

[腾讯云云数据库 Redis](https://cloud.tencent.com/product/crs) 是一种兼容 Redis 协议的缓存和存储服务。丰富多样的数据结构选项，帮助您开发不同类型的业务场景，提供主从热备份、容灾自动切换、数据备份、故障转移、实例监控、在线等一整套数据库服务缩放和数据回滚。

## 使用 Redis 兼容的产品

如果想要使用 Redis 兼容产品作为元数据引擎，需要确认是否完整支持 JuiceFS 所需的以下 Redis 数据类型和命令。

### JuiceFS 使用到的 Redis 数据类型

+ [String](https://redis.io/docs/data-types/strings)
+ [Set](https://redis.io/docs/data-types/sets)
+ [Sorted Set](https://redis.io/docs/data-types/sorted-sets)
+ [Hash](https://redis.io/docs/data-types/hashes)
+ [List](https://redis.io/docs/data-types/lists)

### JuiceFS 使用到的 Redis 特性

+ [管道](https://redis.io/docs/manual/pipelining)

### JuiceFS 使用到的 Redis 命令

#### String

+ [DECRBY](https://redis.io/commands/decrby)
+ [DEL](https://redis.io/commands/del)
+ [GET](https://redis.io/commands/get)
+ [INCR](https://redis.io/commands/incr)
+ [INCRBY](https://redis.io/commands/incrby)
+ [DECR](https://redis.io/commands/decr)
+ [MGET](https://redis.io/commands/mget)
+ [MSET](https://redis.io/commands/mset)
+ [SETNX](https://redis.io/commands/setnx)
+ [SET](https://redis.io/commands/set)

#### Set

+ [SADD](https://redis.io/commands/sadd)
+ [SMEMBERS](https://redis.io/commands/smembers)
+ [SREM](https://redis.io/commands/srem)

#### Sorted Set

+ [ZADD](https://redis.io/commands/zadd)
+ [ZRANGEBYSCORE](https://redis.io/commands/zrangebyscore)
+ [ZRANGE](https://redis.io/commands/zrange)
+ [ZREM](https://redis.io/commands/zrem)
+ [ZSCORE](https://redis.io/commands/zscore)

#### Hash

+ [HDEL](https://redis.io/commands/hdel)
+ [HEXISTS](https://redis.io/commands/hexists)
+ [HGETALL](https://redis.io/commands/hgetall)
+ [HGET](https://redis.io/commands/hget)
+ [HINCRBY](https://redis.io/commands/hincrby)
+ [HKEYS](https://redis.io/commands/hkeys)
+ [HSCAN](https://redis.io/commands/hscan)
+ [HSETNX](https://redis.io/commands/hsetnx)
+ [HSET](https://redis.io/commands/hset)（需要支持设置多个 field 和 value）

#### List

+ [LLEN](https://redis.io/commands/llen)
+ [LPUSH](https://redis.io/commands/lpush)
+ [LRANGE](https://redis.io/commands/lrange)
+ [LTRIM](https://redis.io/commands/ltrim)
+ [RPUSHX](https://redis.io/commands/rpushx)
+ [RPUSH](https://redis.io/commands/rpush)
+ [SCAN](https://redis.io/commands/scan)

#### 事务

+ [EXEC](https://redis.io/commands/exec)
+ [MULTI](https://redis.io/commands/multi)
+ [WATCH](https://redis.io/commands/watch)
+ [UNWATCH](https://redis.io/commands/unwatch)

#### 连接管理

+ [PING](https://redis.io/commands/ping)

#### 服务管理

+ [CONFIG GET](https://redis.io/commands/config-get)
+ [CONFIG SET](https://redis.io/commands/config-set)
+ [DBSIZE](https://redis.io/commands/dbsize)
+ [FLUSHDB](https://redis.io/commands/flushdb)（可选）
+ [INFO](https://redis.io/commands/info)

#### 集群管理

+ [CLUSTER INFO](https://redis.io/commands/cluster-info)

#### 脚本（可选）

+ [EVALSHA](https://redis.io/commands/evalsha)
+ [SCRIPT LOAD](https://redis.io/commands/script-load)


================================================
FILE: docs/zh_cn/administration/metadata/tikv_best_practices.md
================================================
---
sidebar_label: TiKV
sidebar_position: 5
slug: /tikv_best_practices
---
# TiKV 最佳实践

TiKV 通过 Raft 协议保证多副本数据一致性以及高可用，所以建议生产环境中至少部署三个以上副本以保证数据安全和服务稳定。
TiKV 有很好的横向扩容能力，适用于大规模且对性能有一定要求的文件系统场景。

## 垃圾回收

TiKV 原生支持了 MVCC（多版本并发控制）机制，当新写入的数据覆盖旧的数据时，旧的数据不会被替换掉，而是与新写入的数据同时保留，并以时间戳来区分版本。垃圾回收 (GC) 的任务便是清理不再需要的旧数据。

### JuiceFS 的配置

TiKV 根据一个集群变量 `safe-point`（时间戳）来决定是否要清理某个时间之前的旧版本数据。JuiceFS 在 v1.0.4 之前不会设置`safe-point`，TiKV 元数据引擎需要依赖 TiDB 才能正常进行垃圾回收。而在 v1.0.4 之后，JuiceFS 客户端会周期性地设置 `safe-point`，默认会清除三小时之前的旧版本数据，这个时间可在挂载时通过 meta url 的 `gc-interval` 设置。

- 默认 `gc-interval` 的挂载 log

```bash
> sudo ./juicefs mount tikv://localhost:2379 ~/mnt/jfs
2023/04/06 20:23:34.741432 juicefs[17286] <INFO>: Meta address: tikv://localhost:2379 [interface.go:491]
2023/04/06 20:23:34.741561 juicefs[17286] <INFO>: TiKV gc interval is set to 3h0m0s [tkv_tikv.go:84]
...
```

- 设置 `gc-interval` 后的挂载 log

```bash
> sudo ./juicefs mount tikv://localhost:2379\?gc-interval=1h ~/mnt/jfs
2023/04/06 20:25:58.134999 juicefs[17395] <INFO>: Meta address: tikv://localhost:2379?gc-interval=1h [interface.go:491]
2023/04/06 20:25:58.135113 juicefs[17395] <INFO>: TiKV gc interval is set to 1h0m0s [tkv_tikv.go:84]
...
```

#### 主动设置 `safe-point`

JuiceFS 客户端会周期性设置 `safe-point`，除此之外我们也可以通过 gc 子命令来主动设置。

```bash
> ./juicefs gc -v tikv://localhost:2379\?gc-interval=1h --delete
...
2023/04/06 20:41:57.145692 juicefs[18531] <DEBUG>: TiKV GC returns new safe point: 440606737600086016 (2023-04-06 19:41:57.139 +0800 CST) [tkv_tikv.go:248]
...
```

:::tip 提示
此命令同时会清理 JuiceFS 产生的「泄漏对象」和「待清理对象」，请参考[状态检查 & 维护](../status_check_and_maintenance.md#gc)以确认您是否应该使用。
:::

### TiKV 的垃圾回收模式

- gc-worker

可以在通过 TiKV 配置来启用 gc-worker。gc-worker 模式下垃圾会被及时回收，但大量额外的磁盘读写可能会影响元数据引擎性能。

```toml
[gc]
enable-compaction-filter = false
```

- compaction-filter

TiKV 默认通过 [compaction-filter](https://docs.pingcap.com/zh/tidb/dev/garbage-collection-configuration#gc-in-compaction-filter-%E6%9C%BA%E5%88%B6) 进行垃圾回收，由 RocksDB 的 Compaction 过程来进行 GC，而不再使用一个单独的 GC worker 线程。这样做的好处是避免了 GC 引起的额外磁盘读取，以及避免清理掉的旧版本残留大量删除标记影响顺序扫描性能。

由于此回收模式依赖 RocksDB compaction，所以设置`safe-point`之后垃圾并不会被及时回收，需要后续持续写入触发 compaction 才能进行 GC。如果您需要主动触发 GC，可以通过 [`tikv-ctl`](https://docs.pingcap.com/zh/tidb/dev/tikv-control) 工具主动进行集群 compaction，从而触发全局 GC。

```bash
> tikv-ctl --pd 127.0.0.1:2379 compact-cluster -b -c default,lock,write
```

## 元数据备份

对于大规模文件系统，需要调高 [tikv_gc_life_time](https://docs.pingcap.com/zh/tidb/stable/dev-guide-timeouts-in-tidb#gc-%E8%B6%85%E6%97%B6) 参数，否则可能会因为 `GC life time is shorter than transaction duration` 导致备份失败。

## 运行环境与调优

### 硬件选型

根据[TiDB 软件和硬件环境建议配置](https://docs.pingcap.com/zh/tidb/stable/hardware-and-software-requirements)，TiKV 支持部署和运行在 Intel x86-64 架构的 64 位通用硬件服务器平台或者 ARM 架构的硬件服务器平台。对于开发、测试及生产环境的服务器硬件配置（不包含操作系统 OS 本身的占用）有以下要求和建议：

+ **开发与测试环境**

| 组件 |CPU| 内存 | 本地存储 | 网络 | 实例数量 (最低要求)|
|-|-|-|-|-|-|
|PD|4 核 +|8 GB+|SAS, 200 GB+| 千兆网卡 |1|
|TiKV|8 核 +|32 GB+|SSD, 200 GB+| 千兆网卡 |3|

:::note 说明

+ 如进行性能相关的测试，避免采用低性能存储和网络硬件配置，防止对测试结果的正确性产生干扰。
+ TiKV 的 SSD 盘推荐使用 NVME 接口以保证读写更快。

:::

+ **生产环境**

| 组件 |CPU| 内存 | 本地存储 | 网络 | 实例数量 (最低要求)|
|-|-|-|-|-|-|
|PD|8 核 +|16 GB+|SSD| 万兆网卡（2 块最佳）|3|
|TiKV|16 核 +|64 GB+|SSD| 万兆网卡（2 块最佳）|3|

:::note 说明
TiKV 硬盘大小配置建议 PCI-E SSD 不超过 2 TB，普通 SSD 不超过 1.5 TB。
:::

### 网络要求

TiKV 正常运行需要网络环境提供如下的网络端口配置要求，管理员可根据实际环境中组件部署的方案，在网络侧和主机侧开放相关端口：

| 组件 | 默认端口 | 说明 |
|-|-|-|
|TiKV|20160|TiKV 通信端口 |
|TiKV|20180|TiKV 状态信息上报通信端口 |
|PD|2379| 提供 TiDB 和 PD 通信端口 |
|PD|2380|PD 集群节点间通信端口 |

### 磁盘空间要求

| 组件 | 磁盘空间要求 | 健康水位使用率 |
|-|-|-|
|PD| 数据盘和日志盘建议最少各预留 20 GB| 低于 90%|
|TiKV| 数据盘和日志盘建议最少各预留 100 GB| 低于 80%|

## 硬件调优

各种数据库官方都有硬件有一定要求，TiKV 等组件都有最低的 CPU、内存、硬盘、网卡要求。本章节在满足这些需求的基础上，探讨下硬件参数优化，主要参考[数据库硬件调优](https://tidb.net/book/tidb-monthly/2022/2022-03/usercase/tuning-hardware)。

### CPU

+ **CPU 选型**

可以分为计算型和存储型。计算型往往需要更多的 CPU 核心和更高的主频。存储型的 CPU 可能就配置稍微低些。对于计算型和存储型 CPU 选择，拿 JuiceFS 的使用场景来说，PD 和 TiKV 以存储型为主，没有太高的计算负载，可以提前规划使得硬件采购更加合理，节省成本。

+ **CPU 架构：X86/ARM**

X86 架构出现在 intel/AMD 的 CPU 架构中，采用复杂指令集，也是目前最主流服务器的 CPU 架构。ARM 架构 CPU 在手机，mac 笔记本，以及华为等国产服务器厂商中出现。目前各大公司主要采购的是 X86-64 架构的 CPU，也对 ARM 服务器进行了 web 和数据库应用的验证。TiKV 对两种架构均有支持，可根据实际部署情况进行选择。

+ **Numa 绑核**

多核心 CPU 的各核心会被分配到不同的 NUMA node，每个 NUMA node 都有自己专属/本地的主存，访问本地的主存比其跨 NUMA node 访问内存更快，开启 NUMA 会优先就近使用内存。在单机多节点部署时推荐此配置。

+ **CPU-动态节能技术**

cpufreq 是一个动态调整 CPU 频率的模块，可支持五种模式。为保证服务性能应选用 performance 模式，将 CPU 频率固定工作在其支持的最高运行频率上，从而获取最佳的性能，一般都是默认 powersave，可以通过 cpupower frequency-set 修改。

### Memory

+ **关闭 Swap**

swap 用硬盘来承接到达一定阀值的内存访问，由 `vm.swappiness` 参数控制，默认 60，也就是系统内存使用到 40% 时开始使用，TiKV 运行需要有足够的内存。如果内存不足，不建议使用 swap 作为内存不足的缓冲，因为这会降低性能。建议关闭系统 swap。

+ **设置`min_free_kbytes`**

`min_free_kbytes` 内核参数控制了多少内存应该保持空闲而不被文件系统缓存占用。通常情况下，内核会用文件系统缓存占据几乎所有的空闲内存，并根据需要释放内存供进程分配。由于数据库会共享内存中执行大量的分配，默认的内核值可能会导致意外的 OOM（Out-of-Memory kill），在总内存大于 40G 的情况下，建议将该参数配置为至少 1GB，但是不建议超过总内存的 5%，这可以确保 Linux 始终保持足够的内存可用。

+ **关闭透明大页（Transparent Huge Pages，THP）**

数据库的内存访问模式往往是稀疏的而非连续的。当高阶内存碎片化比较严重时，分配 THP 页面会出现较高的延迟，若开启针对 THP 的直接内存规整功能，也会出现系统 CPU 使用率激增的现象，因此建议关闭 THP。

+ **调整虚拟内存 `dirty_ratio`/`dirty_background_ratio` 参数**

`dirty_ratio` 是绝对的脏页百分比值限限制。当脏的 page cache 总量达到系统内存总量的这一百分比后，系统将开始使用 pdflush 操作将脏的 page cache 写入磁盘。默认值为 20％，也就是说如果到达该值时可能会导致应用进程的 IO 等待，通常不需调整。

`dirty_background_ratio` 百分比值。当脏的 page cache 总量达到系统内存总量的这一百分比后，系统开始在后台将脏的 page cache 写入磁盘。默认值为 10％，如果后台刷脏页的慢，而数据写的快就容易触发 dirty_ratio 的限制。通常不需调整。对于高性能 SSD，比如 NVMe 设备来说，设置较低的值有利于提高内存回收时的效率。

### 数据存储

#### 硬盘选型

1. SAS 一般跟 RAID 卡搭配，实现 raid 0/1/10/5 等阵列扩展。
2. SATA 支持热插拔，接口最高 6G/s。
3. PCIE 传输速率更高 8G/s，但是支持多通道，可以线性扩展速率。之前网卡/显卡都在用。上面 3 个接口协议不同，AHCI 转为 SAS 和 SATA 设计，NVMe 协议为 PCIE SSD 设计性能更优。一般核心的 + 高 I/O 的数据库都采用该类型 SSD。
4. 持久内存：傲腾，它提供丰富的底层接口，成本很高，对于需要极致写入性能的，可以考虑。

#### I/O 调度算法

##### noop(no operation)

noop 调度算法是内核中最简单的 IO 调度算法。noop 调度算法将 IO 请求放入到一个 FIFO 队列中，然后逐个执行这些 IO 请求，当然对于一些在磁盘上连续的 IO 请求，noop 调度会适当做一些合并。这个调度算法特别适合那些不希望调度器重新组织 IO 请求顺序的应用，因为内核的 I/O 调度操作会导致性能损失。NVMe SSD 这种高速 I/O 设备可以直接将请求下发给硬件，从而获取更好的性能。

##### CFQ(Completely Fair Queuing)

CFQ 尝试提供由发起 I/O 进程决定的公平的 I/O 调度，该算法为每一个进程分配一个时间窗口，在该时间窗口内，允许进程发出 IO 请求。通过时间窗口在不同进程间的移动，保证了对于所有进程而言都有公平的发出 IO 请求的机会，假如少数进程存在大量密集的 I/O 请求的情况，会出现明显的 I/O 性能下降。

##### deadline

deadline 调度算法主要针对 I/O 请求的延时，每个 I/O 请求都被附加一个最后执行期限。读请求和写请求被分成了两个队列，默认优先处理读 IO，除非写快到 deadline 时才调度。当系统中存在的 I/O 请求进程数量比较少时，与 CFQ 算法相比，deadline 算法可以提供较高的 I/O 吞吐率。

## 常见问题

### 多机并发读写同一个目录，如何避免持续的事务重启现象？

当多客户端在同一个目录下频繁创建/删除子目录时，可能会出现持续的事务重启现象。JuiceFS v1.1 版本开始提供 `--skip-dir-nlink value` 挂载选项，用以指定跳过目录的 nlink 检查之前的重试次数，默认为 20 次。可以适当调小该值，或者设置为 0 禁止重试，从而避免持续的事务重启现象，详情参考[元数据相关的挂载选项](https://juicefs.com/docs/zh/community/command_reference#mount-metadata-options)。


================================================
FILE: docs/zh_cn/administration/metadata_dump_load.md
================================================
---
title: 元数据备份和恢复
sidebar_position: 2
slug: /metadata_dump_load
---

:::tip 提示

- JuiceFS v1.0.0 开始支持元数据自动备份
- JuiceFS v1.0.4 开始支持通过 `load` 命令恢复加密的元数据备份
- JuiceFS v1.3.0 开始支持二进制格式的元数据备份和恢复

:::

JuiceFS 支持[多种元数据引擎](../reference/how_to_set_up_metadata_engine.md)，且各引擎内部的数据管理格式各有不同。为了便于管理，JuiceFS 提供了 [`dump`](../reference/command_reference.mdx#dump) 命令允许将所有元数据以统一格式写入到 JSON 或二进制文件进行备份。同时，JuiceFS 也提供了 [`load`](../reference/command_reference.mdx#load) 命令，允许将备份恢复或迁移到任意元数据存储引擎。这个导出导入流程也可以用来将 JuiceFS 社区版文件系统迁移到企业版（参考[企业版文档](https://juicefs.com/docs/zh/cloud/administration/metadata_dump_load)），反之亦然。

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114267739133259&bvid=BV1eAfTYcE6h&cid=29198519414&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 元数据备份 {#backup}

:::note 注意

* `juicefs dump` 不提供全局时间点快照的功能，如果在导出过程中业务仍在写入，最终结果会包含不同时间点的信息，对于特定应用（比如数据库），这可能意味着导出文件不可用。如果对一致性有更高要求，可能需要在导出前确保应用停写。
* 对大规模文件系统，如果直接在线上环境进行导出，可能影响业务稳定性。

:::

## 文件格式

JuiceFS 支持两种格式的元数据备份：JSON 格式和二进制格式。二进制格式在 v1.3.0 版本中引入，主要用于大规模文件系统的导入导出和迁移。二进制格式的备份体积更小，内存占用更低，并且支持并发导入导出。

| 格式类型     | 结构特点         | 适用场景           | 体积大小         | 内存占用         | 版本要求   |
|------------|------------------|--------------------|------------------|------------------|------------|
| **JSON 格式**   | 完整目录树结构，易读 | 中小规模文件系统；问题定位 | 较大             | 较高             | 所有版本   |
| **二进制格式**  | 扁平化结构，高效紧凑 | 大规模导入导出和迁移     | 约为 JSON 的 1/3 | < 1GiB（1 亿文件） | v1.3.0+    |

### 手动备份 {#backup-manually}

使用 JuiceFS 客户端提供的 `dump` 命令可以将元数据导出到文件，例如：

```shell
# 导出为 JSON 格式
juicefs dump redis://192.168.1.6:6379/1 meta-dump

# 导出为二进制格式
juicefs dump redis://192.168.1.6:6379/1 meta-dump --binary
```

上例中 `meta-dump` 是导出的备份文件，你可以随意调整它的文件名和扩展名。特别地，如果文件的扩展名为 `.gz`（如 `meta-dump.gz`），将会使用 Gzip 算法对导出的数据进行压缩。将会使用 Gzip 算法对导出的数据进行压缩。v1.3 版本之后也支持 Zstandard 压缩算法，使用 `.zstd` 作为扩展名。

`dump` 命令默认从根目录 `/` 开始，深度遍历目录树下所有文件，将每个文件的元数据信息按 JSON 格式进行输出。出于数据安全的考虑，对象存储的认证信息不会被导出，但可以通过 `--keep-secret-key` 选项保留。

`juicefs dump` 的价值在于它能将完整的元数据信息以统一的 JSON 格式导出，便于管理和保存，而且不同的元数据存储引擎都可以识别并导入。

在实际应用中，`dump` 命令与数据库自带的备份工具应该共同使用，相辅相成。比如，Redis 有 [Redis RDB](https://redis.io/topics/persistence#backing-up-redis-data)，MySQL 有 [`mysqldump`](https://dev.mysql.com/doc/mysql-backup-excerpt/5.7/en/mysqldump-sql-format.html) 等。

### 自动备份 {#backup-automatically}

从 JuiceFS v1.0.0 开始，不论文件系统通过 `mount` 命令挂载，还是通过 JuiceFS S3 网关及 Hadoop Java SDK 访问，客户端每小时都会自动备份元数据并拷贝到对象存储。

备份的文件存储在对象存储的 `meta` 目录中，它是一个独立于数据存储的目录，在挂载点中不可见，也不会与数据存储之间产生影响，用对象存储的文件浏览器即可查看和管理。

![meta-auto-backup-list](../images/meta-auto-backup-list.png)

默认情况下，JuiceFS 客户端每小时备份一次元数据，自动备份的频率可以在挂载文件系统时通过 `--backup-meta` 选项进行调整，例如，要设置为每 8 个小时执行一次自动备份：

```shell
juicefs mount -d --backup-meta 8h redis://127.0.0.1:6379/1 /mnt
```

备份频率可以精确到秒，支持的单位如下：

- `h`：精确到小时，如 `1h`；
- `m`：精确到分钟，如 `30m`、`1h30m`；
- `s`：精确到秒，如 `50s`、`30m50s`、`1h30m50s`;

值得一提的是，备份操作耗时会随着文件系统内文件数的增多而增加，因此当文件数较多（默认为达到一百万）且自动备份频率为默认值 1 小时的情况下 JuiceFS 会自动跳过元数据备份，并打印相应的告警日志。此时可以选择挂载一个新客户端并设置较大的 `--backup-meta` 参数来重新启用自动备份。

作为参考，当使用 Redis 作为元数据引擎时，备份一百万文件的元数据大约需要 1 分钟，消耗约 1GB 内存。

:::caution 注意
使用 `--read-only` 只读挂载时，元数据不会自动备份。
:::

#### 自动备份策略

虽然自动备份元数据成为了客户端的默认动作，但在多主机共享挂载同一个文件系统时并不会发生备份冲突。

JuiceFS 维护了一个全局的时间戳，确保同一时刻只有一个客户端执行备份操作。当客户端之间设置了不同的备份周期，那么就会以周期最短的设置为准进行备份。

#### 备份清理策略

JuiceFS 会按照以下规则定期清理备份：

- 保留 2 天以内全部的备份；
- 超过 2 天不足 2 周的，保留每天中的 1 个备份；
- 超过 2 周不足 2 月的，保留每周中的 1 个备份；
- 超过 2 个月的，保留每个月中的 1 个备份。

## 元数据恢复与迁移 {#recovery-and-migration}

使用 [`load`](../reference/command_reference.mdx#load) 命令可以将 `dump` 命令导出的元数据恢复到一个空数据库中，比如：

```shell
# 从 JSON 文件导入
juicefs load redis://192.168.1.6:6379/1 meta-dump

# 从二进制备份导入
juicefs load redis://192.168.1.6:6379/1 meta-dump --binary
```

导入元数据时，JuiceFS 会重新计算文件系统的统计信息，包括空间使用量、inode 计数器等，最后在数据库中生成一份全局一致的元数据。如果你对 JuiceFS 的元数据设计有深入理解，还可以在恢复前对元数据备份文件进行修改，以此来进行调试。

`dump` 命令导出的 JSON 格式数据是统一且通用的，所有元数据引擎都能识别和导入。因此，你不但可以把备份恢复到原有类型的数据库中，还可以恢复到其它数据库，从而实现元数据引擎的迁移。

例如将元数据从 Redis 迁移到 MySQL：

1. 从 Redis 导出元数据备份：

   ```shell
   juicefs dump redis://192.168.1.6:6379/1 meta-dump.json
   ```

1. 将元数据恢复到一个全新的 MySQL 数据库：

   ```shell
   juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs meta-dump.json
   ```

另外，也可以通过系统的管道直接迁移：

```shell
juicefs dump redis://192.168.1.6:6379/1 | juicefs load mysql://user:password@(192.168.1.6:3306)/juicefs
```

需要注意的是，由于 `dump` 导出的备份中默认排除了对象存储的 API 访问密钥，不论恢复还是迁移元数据，完成操作后都需要使用 [`juicefs config`](../reference/command_reference.mdx#config) 命令把文件系统关联的对象存储的认证信息再添加回去，例如：

```shell
juicefs config --secret-key xxxxx mysql://user:password@(192.168.1.6:3306)/juicefs
```

### 加密文件系统 {#encrypted-file-system}

对于[加密的文件系统](../security/encryption.md)，所有文件都会在本地加密后才上传到后端对象存储，包括元数据自动备份文件，也会加密后才上传至对象存储。这与 `dump` 命令不同，`dump` 导出的元数据永远是明文的。

对于加密文件系统，在恢复自动备份的元数据时需要额外设置 `JFS_RSA_PASSPHRASE` 环境变量，以及指定 RSA 私钥和加密算法：

```shell
export JFS_RSA_PASSPHRASE=xxxxxx
juicefs load \
  --encrypt-rsa-key my-private.pem \
  --encrypt-algo aes256gcm-rsa \
  redis://192.168.1.6:6379/1 \
  dump-2023-03-16-090750.json.gz
```

## 元数据检视 {#inspection}

除了可以导出完整的元数据信息，`dump` 命令还支持导出特定子目录中的元数据。可以直观地查看到指定目录树下所有文件的内部信息，因此常被用来辅助排查问题。

```shell
juicefs dump redis://192.168.1.6:6379/1 meta-dump.json --subdir /path/in/juicefs
```

另外，也可以使用 `jq` 等工具对导出文件进行分析。

### 二进制备份内容分析与排查

二进制备份还支持直接查看类型统计、分段（Segment）信息等：

```shell
# 查看备份元数据类型统计信息
juicefs load meta-dump --binary --stat

# 查看备份元数据 Segments 信息（获取 offset）
juicefs load meta-dump --binary --stat --offset=-1

# 查看备份元数据指定 Segment（指定 offset）信息
juicefs load meta-dump --binary --stat --offset=123416309
```

示例输出：

```
Backup Version: 1
-----------------------
Name      | Num
-----------------------
acl           | 0
chunk      | 1111179
counter    | 6
delFile     | 0
edge        | 1112124
format      | 1
…
Segment: format
Value: {
"Name": "test2",
"UUID": "15b92123-1395-40e4-a5aa-edb38918985a",
"Storage": "file",
"Bucket": "/home/hjf/.juicefs/local/",
"BlockSize": 4096,
"Compression": "none",
"EncryptAlgo": "aes256gcm-rsa",
"TrashDays": 1,
"MetaVersion": 1,
"MinClientVersion": "1.1.0-A",
"DirStats": true,
"EnableACL": false
}
```

> 二进制备份为 PB 格式，也可自定义工具对备份进行校验和查看。


================================================
FILE: docs/zh_cn/administration/monitoring.md
================================================
---
title: 监控与数据可视化
sidebar_position: 3
description: 了解 JuiceFS 的监控指标，以及如何通过 Prometheus 和 Grafana 实现数据可视化。
---

JuiceFS 提供了丰富的监控指标，本文介绍如何收集这些指标，并通过 Prometheus 和 Grafana 实现类似下图的可视化监控系统。

![grafana_dashboard](../images/grafana_dashboard.png)

搭建流程大致如下：

1. 配置 Prometheus 抓取 JuiceFS 监控指标
2. 让 Grafana 读取 Prometheus 中的监控数据
3. 用 JuiceFS 官方的 Grafana 仪表盘模板展现监控指标

:::tip 提示
本文使用开源版的 Grafana 和 Prometheus 作为例子，如果你想使用 Grafana Cloud 来构建可视化监控系统，可以参考这篇文章 [「如何使用 Grafana 监控文件系统状态」](https://juicefs.com/zh-cn/blog/usage-tips/use-grafana-monitor-file-system-status)。
:::

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114640176616226&bvid=BV1oJTVzaEJT&cid=30364404417&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 1. 配置 Prometheus 抓取 JuiceFS 监控指标 {#add-scrape-config}

JuiceFS 挂载后，默认会通过 `http://localhost:9567/metrics` 地址实时输出 Prometheus 格式的指标数据。为了查看各项指标在一个时间范围内的状态变化，需要搭建 Prometheus 并配置定时抓取和保存这些指标数据。

![Prometheus-client-data](../images/prometheus-client-data.jpg)

不同挂载或访问方式（如 FUSE 挂载、CSI 驱动、S3 网关、Hadoop SDK 等）收集指标数据的方式略有区别，详见[「收集监控指标」](#collect-metrics)。

这里以最常见的 FUSE 挂载方式为例介绍，如果还没安装 Prometheus，可以参考[官方文档](https://prometheus.io/docs/prometheus/latest/installation)。

编辑 [`prometheus.yml`](https://prometheus.io/docs/prometheus/latest/configuration/configuration) 配置文件，在抓取配置部分（`scrape_configs`）添加新的任务，定义 JuiceFS 客户端输出监控指标的地址：

```yaml {20-22}
global:
  scrape_interval: 15s
  evaluation_interval: 15s

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

scrape_configs:
  - job_name: "prometheus"
    static_configs:
      - targets: ["localhost:9090"]

  - job_name: "juicefs"
    static_configs:
      - targets: ["localhost:9567"]
```

启动 Prometheus 服务：

```shell
./prometheus --config.file=prometheus.yml
```

访问 `http://localhost:9090` 即可看到 Prometheus 的界面。

## 2. 让 Grafana 读取 Prometheus 中的监控数据 {#grafana}

Prometheus 开始抓取 JuiceFS 的监控指标后，接下来要配置 Grafana 读取 Prometheus 中的数据。

如果还没安装 Grafana，可以参考[官方文档](https://grafana.com/docs/grafana/latest/installation)。

在 Grafana 中新建 Prometheus 类型的数据源：

- **Name**：为了便于识别，可以填写文件系统的名称。
- **URL**：Prometheus 的数据接口，默认为 `http://localhost:9090`。

![Grafana-data-source](../images/grafana-data-source.jpg)

## 3. 用 JuiceFS 官方的 Grafana 仪表盘模板展现监控指标 {#grafana-dashboard}

在 Grafana Dashboard 仓库中可以找到 JuiceFS 官方维护的仪表盘模板，可以直接在 Grafana 中通过 `https://grafana.com/grafana/dashboards/20794/` 链接导入，也可以通过 ID `20794` 导入。

Grafana 仪表盘如下图：

![grafana_dashboard](../images/grafana_dashboard.png)

## 收集监控指标 {#collect-metrics}

根据部署 JuiceFS 方式的不同可以有不同的收集监控指标的方法，下面分别介绍。

### FUSE 挂载 {#mount-point}

当通过 [`juicefs mount`](../reference/command_reference.mdx#mount) 命令挂载 JuiceFS 文件系统后，可以通过 `http://localhost:9567/metrics` 这个地址收集监控指标，你也可以通过 `--metrics` 选项自定义。如：

```shell
juicefs mount --metrics localhost:9567 ...
```

你可以使用命令行工具查看这些监控指标：

```shell
curl http://localhost:9567/metrics
```

除此之外，每个 JuiceFS 文件系统的根目录还有一个叫做 `.stats` 的隐藏文件，通过这个文件也可以查看监控指标。例如（这里假设挂载点的路径是 `/jfs`）：

```shell
cat /jfs/.stats
```

:::tip 提示
如果想要实时查看监控指标，可以使用 [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) 命令。
:::

### Kubernetes {#kubernetes}

参考 [CSI 驱动文档](https://juicefs.com/docs/zh/csi/administration/going-production#monitoring)。

### S3 网关 {#s3-gateway}

:::note 注意
该特性需要运行 0.17.1 及以上版本 JuiceFS 客户端
:::

[JuiceFS S3 网关](../guide/gateway.md)默认会在 `http://localhost:9567/metrics` 这个地址提供监控指标，你也可以通过 `--metrics` 选项自定义。如：

```shell
juicefs gateway --metrics localhost:9567 ...
```

如果你是[在 Kubernetes 中部署](../guide/gateway.md#deploy-in-kubernetes) JuiceFS S3 网关，可以参考 [Kubernetes](#kubernetes) 小节的 Prometheus 配置来收集监控指标（区别主要在于 `__meta_kubernetes_pod_label_app_kubernetes_io_name` 这个标签的正则表达式），例如：

```yaml {6-8}
scrape_configs:
  - job_name: 'juicefs-s3-gateway'
    kubernetes_sd_configs:
      - role: pod
    relabel_configs:
      - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
        action: keep
        regex: juicefs-s3-gateway
      - source_labels: [__address__]
        action: replace
        regex: ([^:]+)(:\d+)?
        replacement: $1:9567
        target_label: __address__
      - source_labels: [__meta_kubernetes_pod_node_name]
        target_label: node
        action: replace
```

#### 通过 Prometheus Operator 收集 {#prometheus-operator}

[Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) 让用户在 Kubernetes 环境中能够快速部署和管理 Prometheus，借助 Prometheus Operator 提供的 `ServiceMonitor` CRD 可以自动生成抓取配置。例如（假设 JuiceFS S3 网关的 `Service` 部署在 `kube-system` 名字空间）：

```yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: juicefs-s3-gateway
spec:
  namespaceSelector:
    matchNames:
      - kube-system
  selector:
    matchLabels:
      app.kubernetes.io/name: juicefs-s3-gateway
  endpoints:
    - port: metrics
```

有关 Prometheus Operator 的更多信息，请查看[官方文档](https://prometheus-operator.dev/docs/user-guides/getting-started)。

### Hadoop Java SDK {#hadoop}

[JuiceFS Hadoop Java SDK](../deployment/hadoop_java_sdk.md) 支持把监控指标上报到 [Pushgateway](https://github.com/prometheus/pushgateway) 或者 [Graphite](https://graphiteapp.org)。

#### Pushgateway

启用指标上报到 Pushgateway：

```xml
<property>
  <name>juicefs.push-gateway</name>
  <value>host:port</value>
</property>
```

同时可以通过 `juicefs.push-interval` 配置修改上报指标的频率，默认为 10 秒上报一次。

:::info 说明
根据 [Pushgateway 官方文档](https://github.com/prometheus/pushgateway/blob/master/README.md#configure-the-pushgateway-as-a-target-to-scrape)的建议，Prometheus 的[抓取配置](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config)中需要设置 `honor_labels: true`。

需要特别注意，Prometheus 从 Pushgateway 抓取的指标的时间戳不是 JuiceFS Hadoop Java SDK 上报时的时间，而是抓取时的时间，具体请参考 [Pushgateway 官方文档](https://github.com/prometheus/pushgateway/blob/master/README.md#about-timestamps)。

默认情况下 Pushgateway 只会在内存中保存指标，如果需要持久化到磁盘上，可以通过 `--persistence.file` 选项指定保存的文件路径以及 `--persistence.interval` 选项指定保存到文件的频率（默认 5 分钟保存一次）。
:::

:::note 注意
每一个使用 JuiceFS Hadoop Java SDK 的进程会有唯一的指标，而 Pushgateway 会一直记住所有收集到的指标，导致指标数持续积累占用过多内存，也会使得 Prometheus 抓取指标时变慢，建议定期清理 Pushgateway 上的指标。

定期使用下面的命令清理 Pushgateway 的指标数据，清空指标不影响运行中的 JuiceFS Hadoop Java SDK 持续上报数据。**注意 Pushgateway 启动时必须指定 `--web.enable-admin-api` 选项，同时以下命令会清空 Pushgateway 中的所有监控指标。**

```bash
curl -X PUT http://host:9091/api/v1/admin/wipe
```

:::

有关 Pushgateway 的更多信息，请查看[官方文档](https://github.com/prometheus/pushgateway/blob/master/README.md)。

#### Graphite

启用指标上报到 Graphite：

```xml
<property>
  <name>juicefs.push-graphite</name>
  <value>host:port</value>
</property>
```

同时可以通过 `juicefs.push-interval` 配置修改上报指标的频率，默认为 10 秒上报一次。

JuiceFS Hadoop Java SDK 支持的所有配置参数请参考[文档](../deployment/hadoop_java_sdk.md#客户端配置参数)。

### 使用 Consul 作为注册中心 {#use-consul}

:::note 注意
该特性需要运行 1.0.0 及以上版本 JuiceFS 客户端
:::

JuiceFS 支持使用 Consul 作为监控指标 API 的注册中心，默认的 Consul 地址是 `127.0.0.1:8500`，你也可以通过 `--consul` 选项自定义。如：

```shell
juicefs mount --consul 1.2.3.4:8500 ...
```

当配置了 Consul 地址以后，`--metrics` 选项不再需要配置，JuiceFS 将会根据自身网络与端口情况自动配置监控指标 URL。如果同时设置了 `--metrics`，则会优先尝试监听配置的 URL。

注册到 Consul 上的每个服务，其[服务名](https://developer.hashicorp.com/consul/docs/services/configuration/services-configuration-reference#name)都为 `juicefs`，[服务 ID](https://developer.hashicorp.com/consul/docs/services/configuration/services-configuration-reference#id) 的格式为 `<IP>:<mount-point>`，例如：`127.0.0.1:/tmp/jfs`。

每个服务的 [`meta`](https://developer.hashicorp.com/consul/docs/services/configuration/services-configuration-reference#meta) 都包含 `hostname` 与 `mountpoint` 两个 key，对应的值分别表示挂载点所在的主机名和挂载点路径。特别地，S3 网关的 `mountpoint` 值总是为 `s3gateway`。

成功注册到 Consul 上以后，需要在 `prometheus.yml` 中新增 [`consul_sd_config`](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) 配置，在 `services` 中填写 `juicefs`。

## 监控指标索引 {#metrics-reference}

参考[「JuiceFS 监控指标」](../reference/p8s_metrics.md)。


================================================
FILE: docs/zh_cn/administration/mount_at_boot.md
================================================
---
title: 启动时自动挂载 JuiceFS
sidebar_position: 3
slug: /mount_juicefs_at_boot_time
---

在确认挂载成功，可以正常使用以后，可以参考本节内容设置开机自动挂载。

## Linux

从 JuiceFS v1.1.0 开始，挂载命令的 `--update-fstab` 选项能自动帮你设置好开机自动挂载：

```bash
$ sudo juicefs mount --update-fstab --max-uploads=50 --writeback --cache-size 204800 <META-URL> <MOUNTPOINT>
$ grep <MOUNTPOINT> /etc/fstab
<META-URL> <MOUNTPOINT> juicefs _netdev,max-uploads=50,writeback,cache-size=204800 0 0
$ ls -l /sbin/mount.juicefs
lrwxrwxrwx 1 root root 29 Aug 11 16:43 /sbin/mount.juicefs -> /usr/local/bin/juicefs
```

如果你有意自行控制，请注意：

* 需要创建一个从 `/sbin/mount.juicefs` 到 JuiceFS 可执行文件的软链接，比如 `ln -s /usr/local/bin/juicefs /sbin/mount.juicefs`。
* 挂载命令所包含的各种选项，也需要在 fstab options 列加以声明，注意去掉 `-` 前缀，并将选项取值以 `=` 连接，举例说明：

  ```bash
  $ sudo juicefs mount --update-fstab --max-uploads=50 --writeback --cache-size 204800 -o max_read=99 <META-URL> /jfs
  # -o 是 FUSE options，在 fstab 中需特殊对待
  $ grep jfs /etc/fstab
  redis://localhost:6379/1  /jfs juicefs _netdev,max-uploads=50,max_read=99,writeback,cache-size=204800 0 0
  ```

:::tip 提示
默认情况下，CentOS 6 在启动后不会自动挂载网络文件系统，你可以使用下面的命令开启它：

```bash
sudo chkconfig --add netfs
```

:::

### 使用 systemd.mount 实现自动挂载

基于安全考虑，JuiceFS 将命令行中的一些选项隐藏在环境变量中，所以像数据库访问密码、S3 访问密钥和密钥等设置不能直接应用于 `/etc/fstab` 文件。在这种情况下，你可以使用 systemd 来挂载 JuiceFS 实例。

以下是如何设置 systemd 配置文件的步骤：

1. 创建文件 `/etc/systemd/system/juicefs.mount`，并添加以下内容：

    ```conf
    [Unit]
    Description=Juicefs
    Before=docker.service

    [Mount]
    Environment="ALICLOUD_ACCESS_KEY_ID=mykey" "ALICLOUD_ACCESS_KEY_SECRET=mysecret" "META_PASSWORD=mypassword"
    What=mysql://juicefs@(mysql.host:3306)/juicefs
    Where=/juicefs
    Type=juicefs
    Options=_netdev,allow_other,writeback_cache

    [Install]
    WantedBy=remote-fs.target
    WantedBy=multi-user.target
    ```

    你可以根据需要更改环境变量、挂载选项等。

2. 使用以下命令启用和启动 JuiceFS 挂载：

    ```sh
    ln -s /usr/local/bin/juicefs /sbin/mount.juicefs
    systemctl enable juicefs.mount
    systemctl start juicefs.mount
    ```

完成这些步骤后，就可以访问 `/juicefs` 目录来存取文件了。

## macOS

在 `~/Library/LaunchAgents` 下创建名为 `io.juicefs.<NAME>.plist` 的文件。替换 `<NAME>` 为 JuiceFS 文件系统的名字。添加如下内容到文件中（再次替换 `NAME`、`PATH-TO-JUICEFS`、`META-URL`、`MOUNTPOINT` 和 `MOUNT-OPTIONS` 为适当的值）：

```xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
        <key>Label</key>
        <string>io.juicefs.NAME</string>
        <key>ProgramArguments</key>
        <array>
                <string>PATH-TO-JUICEFS</string>
                <string>mount</string>
                <string>META-URL</string>
                <string>MOUNTPOINT</string>
                <string>MOUNT-OPTIONS</string>
        </array>
        <key>RunAtLoad</key>
        <true/>
</dict>
</plist>
```

:::tip 提示
如果有多个挂载选项可以分为多行依次设置，例如：

```xml
                <string>--max-uploads</string>
                <string>50</string>
                <string>--cache-size</string>
                <string>204800</string>
```

:::

使用以下命令加载上一步创建的文件，并测试加载是否成功。**请确保元数据引擎已正常运行。**

```bash
launchctl load ~/Library/LaunchAgents/io.juicefs.<NAME>.plist
launchctl start ~/Library/LaunchAgents/io.juicefs.<NAME>
ls <MOUNTPOINT>
```

如果挂载失败，可以将以下配置添加到 `io.juicefs.<NAME>.plist` 文件来调试：

```xml
        <key>StandardOutPath</key>
        <string>/tmp/juicefs.out</string>
        <key>StandardErrorPath</key>
        <string>/tmp/juicefs.err</string>
```

使用以下命令重新加载最新的配置并检查输出：

```bash
launchctl unload ~/Library/LaunchAgents/io.juicefs.<NAME>.plist
launchctl load ~/Library/LaunchAgents/io.juicefs.<NAME>.plist
cat /tmp/juicefs.out
cat /tmp/juicefs.err
```

如果你是使用 Homebrew 安装的 Redis 服务，你可以使用以下命令让其在机器启动时启动它：

```bash
brew services start redis
```

然后添加以下配置到 `io.juicefs.<NAME>.plist` 文件确保 Redis 服务已经启动：

```xml
        <key>KeepAlive</key>
        <dict>
                <key>OtherJobEnabled</key>
                <string>homebrew.mxcl.redis</string>
        </dict>
```


================================================
FILE: docs/zh_cn/administration/status_check_and_maintenance.md
================================================
---
title: 状态检查 & 维护
sidebar_position: 4
---

任何一种存储系统在投入使用之后都需要定期进行检查和维护，尽早发现并修复潜在的问题，从而保证文件系统可靠运行、存储的数据完整一致。

JuiceFS 提供了一系列检查和维护文件系统的工具，不但可以帮助我们了解文件系统的基本信息、运行状态，还能够帮助我们更容易地发现和修复潜在的问题。

## status

`juicefs status` 命令用来查看一个 JuiceFS 文件系统的基本信息，所有活跃的会话状态（包括挂载、SDK 访问、S3 网关、WebDAV 连接）以及统计信息。

文件系统的基本信息中包括名称、UUID、存储类型、对象存储 Bucket、回收站状态等；统计信息默认有文件系统的配额与用量。

```shell
juicefs status redis://xxx.cache.amazonaws.com:6379/1
```

```json
{
  "Setting": {
    "Name": "myjfs",
    "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766",
    "Storage": "s3",
    "Bucket": "https://xxx.s3.amazonaws.com",
    "AccessKey": "xxx",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "TrashDays": 1,
    "MetaVersion": 1
  },
  "Sessions": [
    {
      "Sid": 2,
      "Heartbeat": "2021-08-23T16:47:59+08:00",
      "Version": "1.0.0+2022-08-08.cf0c269",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869146
    }
  ],
  "Statistic": {
    "UsedSpace": 4886528,
    "AvailableSpace": 1125899901956096,
    "UsedInodes": 643,
    "AvailableInodes": 10485760,
  }
}
```

通过 `--session, -s` 选项指定会话的 `Sid` 可以进一步显示该会话的更多信息：

```shell
juicefs status --session 2 redis://xxx.cache.amazonaws.com:6379/1
```

```json
{
  "Sid": 2,
  "Heartbeat": "2021-08-23T16:47:59+08:00",
  "Version": "1.0.0+2022-08-08.cf0c269",
  "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
  "MountPoint": "/home/herald/mnt",
  "ProcessID": 2869146
}
```

根据会话的状态，信息中还可能包括：

- Sustained inodes：这些是已经被删掉的文件，但是因为在这个会话中已经被打开，因此会被暂时保留直至文件关闭。
- Flocks：被这个会话加锁的文件的 BSD 锁信息
- Plocks：被这个会话加锁的文件的 POSIX 锁信息

通过 `--more, -m` 选项扫描 trash 中的文件和 slice，以及延迟删除的文件和 slice：

```shell
juicefs status -m redis://xxx.cache.amazonaws.com:6379/1
```

```json
{
  "Setting": {
    "Name": "myjfs",
    "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766",
    "Storage": "s3",
    "Bucket": "https://xxx.s3.amazonaws.com",
    "AccessKey": "xxx",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "TrashDays": 1,
    "MetaVersion": 1
  },
  "Sessions": [
    {
      "Sid": 2,
      "Heartbeat": "2021-08-23T16:47:59+08:00",
      "Version": "1.0.0+2022-08-08.cf0c269",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869146
    }
  ],
  "Statistic": {
    "UsedSpace": 4886528,
    "AvailableSpace": 1125899901956096,
    "UsedInodes": 643,
    "AvailableInodes": 10485760,
    "TrashFileCount": 277,
    "TrashFileSize": 1152597,
    "PendingDeletedFileCount": 156,
    "PendingDeletedFileSize": 1313577,
    "TrashSliceCount": 581,
    "TrashSliceSize": 1845292,
    "PendingDeletedSliceCount": 1378,
    "PendingDeletedSliceSize": 26245344,
  }
```

## info

`juicefs info` 用于检查指定文件或目录的元数据信息，其中包括该文件对应的每个 block 在对象存储上的对象路径以及作用于该文件的 flock 与 plock。

### 检查一个文件的元数据

```shell
$ juicefs info mnt/luggage-6255515.jpg

mnt/luggage-6255515.jpg :
  inode: 36
  files: 1
   dirs: 0
 length: 789.02 KiB (807955 Bytes)
   size: 792.00 KiB (811008 Bytes)
   path: /luggage-6255515.jpg
objects:
+------------+------------------------------+--------+--------+--------+
| chunkIndex |          objectName          |  size  | offset | length |
+------------+------------------------------+--------+--------+--------+
|          0 | myjfs/chunks/0/0/80_0_807955 | 807955 |      0 | 807955 |
+------------+------------------------------+--------+--------+--------+
flocks:
+-----+----------------------+------+
| Sid |         Owner        | Type |
+-----+----------------------+------+
| 4   | 14034871352581537016 |    W |
+-----+----------------------+------+
```

### 检查一个目录的元数据

该命令默认只检查一层目录：

```shell
$ juicefs info ./mnt

mnt :
  inode: 1
  files: 9
   dirs: 4
 length: 2.41 MiB (2532102 Bytes)
   size: 2.44 MiB (2555904 Bytes)
   path: /
```

如果希望递归检查所有子目录，需要指定 `--recursive, -r` 选项：

```shell
$ juicefs info -r ./mnt

./mnt :
  inode: 1
  files: 33
   dirs: 4
 length: 80.29 MiB (84191037 Bytes)
   size: 80.34 MiB (84242432 Bytes)
   path: /
```

默认情况下 `juicefs info -r` 在 `fast` 模式下运行，它结果中的目录用量不一定精准。如果你怀疑其准确性，可以使用 `--strict` 选项查看精准用量：

```shell
$ juicefs info -r ./mnt --strict

./mnt :
  inode: 1
  files: 33
   dirs: 4
 length: 80.29 MiB (84191037 Bytes)
   size: 80.34 MiB (84242432 Bytes)
   path: /
```

### 使用 inode 检查元数据

还可以通过 inode 来反向查找文件路径及数据块的信息，但需要先进入挂载点：

```shell
~     $ cd mnt
~/mnt $ juicefs info -i 36

36 :
  inode: 36
  files: 1
   dirs: 0
 length: 789.02 KiB (807955 Bytes)
   size: 792.00 KiB (811008 Bytes)
   path: /luggage-6255515.jpg
objects:
+------------+------------------------------+--------+--------+--------+
| chunkIndex |          objectName          |  size  | offset | length |
+------------+------------------------------+--------+--------+--------+
|          0 | myjfs/chunks/0/0/80_0_807955 | 807955 |      0 | 807955 |
+------------+------------------------------+--------+--------+--------+
```

## summary

JuiceFS 1.1.0 之后支持 `summary` 子命令，可以递归列出目录树和各层的使用量：

```bash
$ juicefs summary /mnt/jfs/
+---------------------------+---------+------+-------+
|            PATH           |   SIZE  | DIRS | FILES |
+---------------------------+---------+------+-------+
| /                         | 1.0 GiB |  100 |   445 |
| d/                        | 1.0 GiB |    1 |     1 |
| d/test1                   | 1.0 GiB |    0 |     1 |
| pjdfstest/                | 2.8 MiB |   39 |   304 |
| pjdfstest/tests/          | 1.1 MiB |   18 |   240 |
| pjdfstest/autom4te.cache/ | 692 KiB |    1 |     7 |
| pjdfstest/.git/           | 432 KiB |   17 |    26 |
| pjdfstest/configure       | 176 KiB |    0 |     1 |
| pjdfstest/config.log      |  84 KiB |    0 |     1 |
| pjdfstest/pjdfstest.o     |  80 KiB |    0 |     1 |
| pjdfstest/pjdfstest       |  68 KiB |    0 |     1 |
| pjdfstest/aclocal.m4      |  44 KiB |    0 |     1 |
| pjdfstest/pjdfstest.c     |  40 KiB |    0 |     1 |
| pjdfstest/config.status   |  36 KiB |    0 |     1 |
| pjdfstest/...             | 164 KiB |    2 |    24 |
| roa/                      | 2.3 MiB |   59 |   140 |
| roa/.git/                 | 1.4 MiB |   17 |    26 |
| roa/roa/                  | 252 KiB |    9 |    30 |
| roa/integration/          | 148 KiB |   13 |    22 |
| roa/roa-core/             | 124 KiB |    4 |    17 |
| roa/Cargo.lock            |  84 KiB |    0 |     1 |
| roa/roa-async-std/        |  36 KiB |    2 |     6 |
| roa/.github/              |  32 KiB |    2 |     6 |
| roa/examples/             |  32 KiB |    1 |     7 |
| roa/roa-diesel/           |  32 KiB |    2 |     5 |
| roa/assets/               |  28 KiB |    2 |     5 |
| roa/...                   | 108 KiB |    6 |    15 |
+---------------------------+---------+------+-------+
```

可以使用 `--depth value, -d value` 和 `--entries value, -e value` 选项控制目录层级和每层显示的最大数量：

```bash
$ juicefs summary /mnt/jfs/ -d 3 -e 3
+------------------------------------+---------+------+-------+
|                PATH                |   SIZE  | DIRS | FILES |
+------------------------------------+---------+------+-------+
| /                                  | 1.0 GiB |  100 |   445 |
| d/                                 | 1.0 GiB |    1 |     1 |
| d/test1                            | 1.0 GiB |    0 |     1 |
| pjdfstest/                         | 2.8 MiB |   39 |   304 |
| pjdfstest/tests/                   | 1.1 MiB |   18 |   240 |
| pjdfstest/tests/open/              | 112 KiB |    1 |    26 |
| pjdfstest/tests/rename/            | 112 KiB |    1 |    25 |
| pjdfstest/tests/link/              |  76 KiB |    1 |    18 |
| pjdfstest/tests/...                | 776 KiB |   14 |   171 |
| pjdfstest/autom4te.cache/          | 692 KiB |    1 |     7 |
| pjdfstest/autom4te.cache/output.0  | 180 KiB |    0 |     1 |
| pjdfstest/autom4te.cache/output.1  | 180 KiB |    0 |     1 |
| pjdfstest/autom4te.cache/output.2  | 180 KiB |    0 |     1 |
| pjdfstest/autom4te.cache/...       | 148 KiB |    0 |     4 |
| pjdfstest/.git/                    | 432 KiB |   17 |    26 |
| pjdfstest/.git/objects/            | 252 KiB |    3 |     2 |
| pjdfstest/.git/hooks/              |  64 KiB |    1 |    13 |
| pjdfstest/.git/logs/               |  32 KiB |    5 |     3 |
| pjdfstest/.git/...                 |  80 KiB |    7 |     8 |
| pjdfstest/...                      | 692 KiB |    2 |    31 |
| roa/                               | 2.3 MiB |   59 |   140 |
| roa/.git/                          | 1.4 MiB |   17 |    26 |
| roa/.git/objects/                  | 1.3 MiB |    3 |     2 |
| roa/.git/hooks/                    |  64 KiB |    1 |    13 |
| roa/.git/logs/                     |  32 KiB |    5 |     3 |
| roa/.git/...                       |  72 KiB |    7 |     8 |
| roa/roa/                           | 252 KiB |    9 |    30 |
| roa/roa/src/                       | 228 KiB |    7 |    27 |
| roa/roa/README.md                  | 8.0 KiB |    0 |     1 |
| roa/roa/templates/                 | 8.0 KiB |    1 |     1 |
| roa/roa/...                        | 4.0 KiB |    0 |     1 |
| roa/integration/                   | 148 KiB |   13 |    22 |
| roa/integration/diesel-example/    |  52 KiB |    4 |     9 |
| roa/integration/multipart-example/ |  36 KiB |    4 |     5 |
| roa/integration/juniper-example/   |  32 KiB |    2 |     5 |
| roa/integration/...                |  24 KiB |    2 |     3 |
| roa/...                            | 476 KiB |   19 |    62 |
+------------------------------------+---------+------+-------+
```

此命令也支持标准 csv 输出，用于其它软件解析：

```bash
$ juicefs summary /mnt/jfs/ --csv
PATH,SIZE,DIRS,FILES
/,1079132160,100,445
d/,1073745920,1,1
d/test1,1073741824,0,1
pjdfstest/,2969600,39,304
pjdfstest/tests/,1105920,18,240
pjdfstest/autom4te.cache/,708608,1,7
pjdfstest/.git/,442368,17,26
pjdfstest/configure,180224,0,1
pjdfstest/config.log,86016,0,1
pjdfstest/pjdfstest.o,81920,0,1
pjdfstest/pjdfstest,69632,0,1
pjdfstest/aclocal.m4,45056,0,1
pjdfstest/pjdfstest.c,40960,0,1
pjdfstest/config.status,36864,0,1
pjdfstest/...,167936,2,24
roa/,2412544,59,140
roa/.git/,1511424,17,26
roa/roa/,258048,9,30
roa/integration/,151552,13,22
roa/roa-core/,126976,4,17
roa/Cargo.lock,86016,0,1
roa/roa-async-std/,36864,2,6
roa/.github/,32768,2,6
roa/examples/,32768,1,7
roa/roa-diesel/,32768,2,5
roa/assets/,28672,2,5
roa/...,110592,6,15
```

默认情况下 `juicefs summary` 在 `fast` 模式下运行，它结果中的目录用量不一定精准。如果你怀疑其准确性，可以使用 `--strict` 选项查看精准用量。

## gc {#gc}

`juicefs gc` 是一个用来处理「对象泄漏」与「待清理对象」，以及因为覆盖写而产生的碎片数据的工具。它以元数据信息为基准与对象存储中的数据进行逐一扫描比对，从而找出或清理对象存储上需要处理的数据块。

:::info 说明
**对象泄漏**是指数据块在对象存储，但元数据引擎中没有对应的记录的情况。对象泄漏极少出现，成因可能是程序 bug、元数据引擎或对象存储的未预期问题、断电、断网等等。
**待清理对象**是指被原数据引擎标记为删除但还未清理的对象。待删除对象很常见，比如到期的 trash 文件与 slice 和延迟删除的文件与 slice。
:::

:::tip 提示
虽然几乎不会出现对象泄漏的情况，但你仍然可以根据需要进行相应例行检查。文件在上传到对象存储时可能产生临时的中间文件，它们会在写入完成后被清理。为了避免中间文件被误判为泄漏的对象，`juicefs gc` 默认会跳过最近 1 个小时上传的文件。可以通过 `JFS_GC_SKIPPEDTIME` 环境变量调整跳过的时间范围（单位为秒）。例如设置跳过最近 2 个小时的文件：`export JFS_GC_SKIPPEDTIME=7200`。
:::

:::tip 提示
因为 `juicefs gc` 命令会扫描对象存储中的所有对象，所以对于数据量较大的文件系统执行这个命令会有一定开销。另外使用此命令之前请确保您不需要回滚到旧版本元数据，并且建议您备份对象存储数据。
:::

### 扫描

默认情况下 `juicefs gc` 仅执行扫描：

```shell
$ juicefs gc sqlite3://myjfs.db
Pending deleted files: 0                            0.0/s         
 Pending deleted data: 0.0 b   (0 Bytes)            0.0 b/s       
Cleaned pending files: 0                            0.0/s         
 Cleaned pending data: 0.0 b   (0 Bytes)            0.0 b/s       
        Listed slices: 4437                         82800.0/s     
         Trash slices: 0                            0.0/s         
           Trash data: 0.0 b   (0 Bytes)            0.0 b/s       
 Cleaned trash slices: 0                            0.0/s         
   Cleaned trash data: 0.0 b   (0 Bytes)            0.0 b/s       
      Scanned objects: 4741/4741 [==============================================================]  387369.2/s used: 12.247821ms
        Valid objects: 4741                         395521.0/s    
           Valid data: 1.7 GiB (1846388716 Bytes)   143.6 GiB/s   
    Compacted objects: 0                            0.0/s         
       Compacted data: 0.0 b   (0 Bytes)            0.0 b/s       
       Leaked objects: 0                            0.0/s         
          Leaked data: 0.0 b   (0 Bytes)            0.0 b/s       
      Skipped objects: 0                            0.0/s         
         Skipped data: 0.0 b   (0 Bytes)            0.0 b/s       
2023/06/09 10:14:33.683384 juicefs[280403] <INFO>: scanned 4741 objects, 4741 valid, 0 compacted (0 bytes), 0 leaked (0 bytes), 0 delslices (0 bytes), 0 delfiles (0 bytes), 0 skipped (0 bytes) [gc.go:379]
```

### 清理

当 `juicefs gc` 命令扫描到了「泄漏的对象」或「待清理对象」，可以通过 `--delete` 选项对它们进行清理。客户端默认启动 10 个线程执行清理操作，可以使用 `--threads, -p` 选项来调整线程数量。

```shell
$ juicefs gc sqlite3://myjfs.db --delete
Cleaned pending slices: 0                            0.0/s         
 Pending deleted files: 0                            0.0/s         
  Pending deleted data: 0.0 b   (0 Bytes)            0.0 b/s       
 Cleaned pending files: 0                            0.0/s         
  Cleaned pending data: 0.0 b   (0 Bytes)            0.0 b/s       
         Cleaned trash: 0                            0.0/s         
Cleaned detached nodes: 0                            0.0/s         
         Listed slices: 4437                         75803.6/s     
          Trash slices: 0                            0.0/s         
            Trash data: 0.0 b   (0 Bytes)            0.0 b/s       
  Cleaned trash slices: 0                            0.0/s         
    Cleaned trash data: 0.0 b   (0 Bytes)            0.0 b/s       
       Scanned objects: 4741/4741 [==============================================================]  337630.2/s used: 14.056704ms
         Valid objects: 4741                         345974.4/s    
            Valid data: 1.7 GiB (1846388716 Bytes)   125.6 GiB/s   
     Compacted objects: 0                            0.0/s         
        Compacted data: 0.0 b   (0 Bytes)            0.0 b/s       
        Leaked objects: 0                            0.0/s         
           Leaked data: 0.0 b   (0 Bytes)            0.0 b/s       
       Skipped objects: 0                            0.0/s         
          Skipped data: 0.0 b   (0 Bytes)            0.0 b/s       
2023/06/09 10:15:49.819995 juicefs[280474] <INFO>: scanned 4741 objects, 4741 valid, 0 compacted (0 bytes), 0 leaked (0 bytes), 0 delslices (0 bytes), 0 delfiles (0 bytes), 0 skipped (0 bytes) [gc.go:379]
```

随后可以再执行一次 `juicefs gc` 检查是否清理成功。

## fsck

`juicefs fsck` 是一个以数据块为基准与元数据进行逐一扫描比对的工具，主要用来修复文件系统内可能发生而且可以修复的各种问题。它可以帮你找到元数据引擎中存在记录，但对象存储中没有对应数据块的情况，还可以检查文件的属性信息是否存在。

```shell {5}
$ juicefs fsck sqlite3://myjfs2.db

2022/11/10 17:31:19.062348 juicefs[26158] <INFO>: Meta address: sqlite3://myjfs2.db [interface.go:402]
2022/11/10 17:31:19.063132 juicefs[26158] <INFO>: Data use file:///Users/herald/.juicefs/local/myjfs/ [fsck.go:73]
2022/11/10 17:31:19.065857 juicefs[26158] <ERROR>: can't find block 0/1/1063_0_2693747 for file /david-bruno-silva-Z19vToWBDIc-unsplash.jpg: stat /Users/herald/.juicefs/local/myjfs/chunks/0/1/1063_0_2693747: no such file or directory [fsck.go:146]
  Found blocks count: 68
  Found blocks bytes: 34.24 MiB (35904042 Bytes)
 Listed slices count: 65
Scanned slices count: 65 / 65 [=======================================]  done
Scanned slices bytes: 36.81 MiB (38597789 Bytes)
   Lost blocks count: 1
   Lost blocks bytes: 2.57 MiB  (2693747 Bytes)
2022/11/10 17:31:19.066243 juicefs[26158] <FATAL>: 1 objects are lost (2693747 bytes), 1 broken files:
        INODE: PATH
           57: /david-bruno-silva-Z19vToWBDIc-unsplash.jpg [fsck.go:168]
```

从结果可以看到，`juicefs fsck` 扫描发现文件系统中因为丢失了数据块致使一个文件损坏。

虽然结果表明后端存储中的文件已经损坏，但还是有必要去挂载点查验一下文件是否可以访问，因为 JuiceFS 会在本地缓存最近访问过的文件数据，文件损坏之前的版本如果已经缓存在本地，则可以将缓存的文件数据块重新上传以避免丢失数据。你可以在缓存目录（即 `--cache-dir` 选项对应的路径）中根据 `juicefs fsck` 命令输出的数据块路径查找是否存在缓存数据，例如上面例子中丢失的数据块路径为 `0/1/1063_0_2693747`。

### 强制同步目录用量

在[目录用量统计](../guide/dir-stats.md)中我们介绍了这个新功能。虽然 fsck 默认会发现以及修复明显损坏的目录用量，但目录用量仍有可能不精准。我们可以使用 `--sync-dir-stat` 选项来强制检查或修复目录用量：

```bash
$ juicefs fsck redis://localhost --path /d --sync-dir-stat
2023/06/07 15:59:14.080820 juicefs[228395] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/06/07 15:59:14.082555 juicefs[228395] <INFO>: Ping redis latency: 49.904µs [redis.go:3569]
2023/06/07 15:59:14.083412 juicefs[228395] <WARNING>: usage stat of /d should be &{1073741824 1073741824 1}, but got &{0 0 0} [base.go:2026]
2023/06/07 15:59:14.083443 juicefs[228395] <WARNING>: Stat of path /d (inode 10701) should be synced, please re-run with '--path /d --repair --sync-dir-stat' to fix it [base.go:2041]
2023/06/07 15:59:14.083473 juicefs[228395] <FATAL>: some errors occurred, please check the log of fsck [main.go:31]

$ juicefs fsck redis://localhost --path /d --repair --sync-dir-stat
2023/06/07 16:00:43.043851 juicefs[228487] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/06/07 16:00:43.051556 juicefs[228487] <INFO>: Ping redis latency: 577.29µs [redis.go:3569]

# 成功修复
$ juicefs fsck redis://localhost --path /d --sync-dir-stat
2023/06/07 16:01:08.401972 juicefs[228547] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/06/07 16:01:08.404041 juicefs[228547] <INFO>: Ping redis latency: 85.566µs [redis.go:3569]
```

## compact {#compact}

`juicefs compact` 是 v1.2 版本中新增的功能，它是一个用来处理因为覆盖写而产生的碎片数据的工具。它将随机写产生的大量不连续的 slice 进行合并或清理，从而提升文件系统的读性能。

相比于 `juicefs gc` 对整个文件系统进行垃圾回收和碎片整理，`juicefs compact` 可指定目录处理因为覆盖写而产生的碎片数据。

```shell
juicefs compact /mnt/jfs/foo
```

另外，可以使用 `-p, --threads` 选项指定并发线程数，以加快处理速度。默认值为 10，可以根据实际情况调整。

```shell
juicefs compact /mnt/jfs/foo -p 20
```


================================================
FILE: docs/zh_cn/administration/sync_accounts_between_multiple_hosts.md
================================================
---
title: 多主机间同步账户
sidebar_position: 7
slug: /sync_accounts_between_multiple_hosts
---

JuiceFS 支持 Unix 文件权限，以目录或文件的粒度管理权限。该行为与本地文件系统相同。

为了让用户获得直观一致的权限管理体验（例如，用户 A 在主机 X 中访问的文件，在主机 Y 中也应该可以用相同的用户身份访问），想要访问 JuiceFS 存储的同一个用户，应该在所有主机上具有相同的 UID 和 GID。

在这里，我们提供了一个简单的 [Ansible](https://www.ansible.com/community) playbook 来演示如何确保一个帐户在多个主机上具有相同的 UID 和 GID。

:::note 注意
如果你是在 Hadoop 环境使用 JuiceFS，除了在多主机间同步账户以外，也可以指定一个全局的用户列表和所属用户组文件，具体请参见[这里](../deployment/hadoop_java_sdk.md#其它配置)。
:::

## 安装 Ansible

选择一个主机作为 [控制节点](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#managed-node-requirements)，它可以使用 `ssh` 以 `root` 或其他在 sudo 用户组的身份，访问所有。在此主机上安装 Ansible。阅读 [安装 Ansible](https://docs.ansible.com/ansible/latest/installation_guide/intro_installation.html#installing-ansible) 了解更多安装细节。

## 确保所有主机上的帐户相同

创建一个空目录 `account-sync` ，将下面的内容保存在该目录下的 `play.yaml` 中。

```yaml
---
- hosts: all
  tasks:
    - name: "Ensure group {{ group }} with gid {{ gid }} exists"
      group:
        name: "{{ group }}"
        gid: "{{ gid }}"
        state: present

    - name: "Ensure user {{ user }} with uid {{ uid }} exists"
      user:
        name: "{{ user }}"
        uid: "{{ uid }}"
        group: "{{ gid }}"
        state: present
```

在该目录下创建一个名为 `hosts` 的文件，将所有需要创建账号的主机的 IP 地址放置在该文件中，每行一个 IP。

在这里，我们确保在 2 台主机上使用 UID 1200 的帐户 `alice` 和 GID 500 的 `staff` 组：

```shell
~/account-sync$ cat hosts
172.16.255.163
172.16.255.180
~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=staff gid=500 user=alice uid=1200" play.yaml

PLAY [all] ************************************************************************************************

TASK [Gathering Facts] ************************************************************************************
ok: [172.16.255.180]
ok: [172.16.255.163]

TASK [Ensure group staff with gid 500 exists] *************************************************************
ok: [172.16.255.163]
ok: [172.16.255.180]

TASK [Ensure user alice with uid 1200 exists] *************************************************************
changed: [172.16.255.180]
changed: [172.16.255.163]

PLAY RECAP ************************************************************************************************
172.16.255.163             : ok=3    changed=1    unreachable=0    failed=0
172.16.255.180             : ok=3    changed=1    unreachable=0    failed=0
```

现在已经在这 2 台主机上创建了新帐户 `alice:staff`。

如果指定的 UID 或 GID 已分配给某些主机上的另一个用户或组，则创建将失败。

```shell
~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=ubuntu gid=1000 user=ubuntu uid=1000" play.yaml

PLAY [all] ************************************************************************************************

TASK [Gathering Facts] ************************************************************************************
ok: [172.16.255.180]
ok: [172.16.255.163]

TASK [Ensure group ubuntu with gid 1000 exists] ***********************************************************
ok: [172.16.255.163]
fatal: [172.16.255.180]: FAILED! => {"changed": false, "msg": "groupmod: GID '1000' already exists\n", "name": "ubuntu"}

TASK [Ensure user ubuntu with uid 1000 exists] ************************************************************
ok: [172.16.255.163]
    to retry, use: --limit @/home/ubuntu/account-sync/play.retry

PLAY RECAP ************************************************************************************************
172.16.255.163             : ok=3    changed=0    unreachable=0    failed=0
172.16.255.180             : ok=1    changed=0    unreachable=0    failed=1
```

在上面的示例中，组 ID 1000 已分配给主机 `172.16.255.180` 上的另一个组，我们应该 **更改 GID** 或 **删除主机 `172.16.255.180` 上 GID 为 1000** 的组，然后再次运行 playbook。

:::caution 注意
如果用户帐户已经存在于主机上，并且我们将其更改为另一个 UID 或 GID 值，则用户可能会失去对他们以前拥有的文件和目录的权限。例如：

```shell
$ ls -l /tmp/hello.txt
-rw-r--r-- 1 alice staff 6 Apr 26 21:43 /tmp/hello.txt
$ id alice
uid=1200(alice) gid=500(staff) groups=500(staff)
```

我们将 alice 的 UID 从 1200 改为 1201

```shell
~/account-sync$ ansible-playbook -i hosts -u root --ssh-extra-args "-o StrictHostKeyChecking=no" \
--extra-vars "group=staff gid=500 user=alice uid=1201" play.yaml
```

现在我们没有权限删除这个文件，因为它的所有者不是 alice：

```shell
$ ls -l /tmp/hello.txt
-rw-r--r-- 1 1200 staff 6 Apr 26 21:43 /tmp/hello.txt
$ rm /tmp/hello.txt
rm: remove write-protected regular file '/tmp/hello.txt'? y
rm: cannot remove '/tmp/hello.txt': Operation not permitted
```

:::


================================================
FILE: docs/zh_cn/administration/troubleshooting.md
================================================
---
title: 问题排查案例
sidebar_position: 6
---

这里收录常见问题的具体排查步骤。

## 创建文件系统（format）错误 {#format-error}

### 无法重复创建文件系统 {#create-file-system-repeatedly}

元数据引擎已经执行了 `juicefs format`，再次执行时可能无法更新之前的某些配置，将会报错：

```
cannot update volume XXX from XXX to XXX
```

这种情况需要清理元数据引擎中对应的数据，再重试。

### Redis URL 格式错误 {#invalid-redis-url}

使用的 Redis 版本小于 6.0.0 时，执行 `juicefs format` 命令如果指定了 `username` 参数，将会报错：

```
format: ERR wrong number of arguments for 'auth' command
```

只有 Redis 6.0.0 版本以后才支持指定 `username`，因此你需要省略 URL 中的 `username` 参数，例如 `redis://:password@host:6379/1`。

### Redis 哨兵（Sentinel）模式 NOAUTH 错误 {#redis-sentinel-noauth-error}

如果使用 [Redis 哨兵模式](../administration/metadata/redis_best_practices.md#sentinel-mode)时遇到以下错误：

```
sentinel: GetMasterAddrByName master="xxx" failed: NOAUTH Authentication required.
```

请确认是否为 Redis 哨兵实例[设置了密码](https://redis.io/docs/management/sentinel/#configuring-sentinel-instances-with-authentication)，如果设置了，那么需要通过 `SENTINEL_PASSWORD` 环境变量单独配置连接哨兵实例的密码，元数据引擎 URL 里的密码只会用于连接 Redis 服务器。

## 权限问题导致挂载错误 {#mount-permission-error}

使用 [Docker bind mounts](https://docs.docker.com/storage/bind-mounts) 把宿主机上的一个目录挂载到容器中时，可能遇到下方错误：

```
docker: Error response from daemon: error while creating mount source path 'XXX': mkdir XXX: file exists.
```

这往往是因为使用了非 root 用户执行 `juicefs mount` 命令，进而导致 Docker 没有权限访问这个目录。这个问题有两种解决方法：

* 用 root 用户执行 `juicefs mount` 命令
* 在 FUSE 的配置文件，以及挂载命令中增加 [`allow_other`](../reference/fuse_mount_options.md#allow_other) 挂载选项。

使用普通用户执行 `juicefs mount` 命令时，可能遇到下方错误：

```
fuse: fuse: exec: "/bin/fusermount": stat /bin/fusermount: no such file or directory
```

这个错误仅在普通用户执行挂载时出现，意味着找不到 `fusermount` 这个命令。此问题有两种解决方法：

* 用 root 用户执行 `juicefs mount` 命令
* 安装 `fuse` 包（例如 `apt-get install fuse`、`yum install fuse`）

而如果当前用户不具备 `fusermount` 命令的执行权限，则还会遇到以下错误：

```
fuse: fuse: fork/exec /usr/bin/fusermount: permission denied
```

此时可以通过下面的命令检查 `fusermount` 命令的权限：

```shell
# 只有 root 用户和 fuse 用户组的用户有权限执行
$ ls -l /usr/bin/fusermount
-rwsr-x---. 1 root fuse 27968 Dec  7  2011 /usr/bin/fusermount

# 所有用户都有权限执行
$ ls -l /usr/bin/fusermount
-rwsr-xr-x 1 root root 32096 Oct 30  2018 /usr/bin/fusermount
```

## 读写慢与读写失败 {#read-write-error}

### 与对象存储通信不畅（网速慢） {#io-error-object-storage}

如果无法访问对象存储，或者仅仅是网速太慢，JuiceFS 客户端也会发生读写错误。你也可以在日志中找到相应的报错。

```text
# 上传块的速度不符合预期
<INFO>: slow request: PUT chunks/0/0/1_0_4194304 (%!s(<nil>), 20.512s)

# flush 超时通常意味着对象存储上传失败
<ERROR>: flush 9902558 timeout after waited 8m0s
<ERROR>: pending slice 9902558-80: ...
```

如果是网络异常导致无法访问，或者对象存储本身出现服务异常，问题排查相对简单。但在如果是在低带宽场景下希望优化 JuiceFS 的使用体验，需要留意的事情就稍微多一些。

首先，在网速慢的时候，JuiceFS 客户端上传／下载文件容易超时（类似上方的错误日志），这种情况下可以考虑：

* 降低上传并发度，比如 [`--max-uploads=1`](../reference/command_reference.mdx#mount-data-storage-options)，避免上传超时。
* 降低读写缓冲区大小，比如 [`--buffer-size=64`](../reference/command_reference.mdx#mount-data-cache-options) 或者更小。当带宽充裕时，增大读写缓冲区能提升并发性能。但在低带宽场景下使用过大的读写缓冲区，`flush` 的上传时间会很长，因此容易超时。
* 默认 GET／PUT 请求超时时间为 60 秒，因此增大 `--get-timeout` 以及 `--put-timeout`，可以改善读写超时的情况。

此外，低带宽环境下需要慎用[「客户端写缓存」](../guide/cache.md#client-write-cache)特性。先简单介绍一下 JuiceFS 的后台任务设计：每个 JuiceFS 客户端默认都启用后台任务，后台任务中会执行碎片合并（compaction）、异步删除等工作，而如果节点网络状况太差，则会降低系统整体性能。更糟的是如果该节点还启用了客户端写缓存，则容易出现碎片合并后上传缓慢，导致其他节点无法读取该文件的危险情况：

```text
# 由于 writeback，碎片合并后的结果迟迟上传不成功，导致其他节点读取文件报错
<ERROR>: read file 14029704: input/output error
<INFO>: slow operation: read (14029704,131072,0): input/output error (0) <74.147891>
<WARNING>: fail to read sliceId 1771585458 (off:4194304, size:4194304, clen: 37746372): get chunks/0/0/1_0_4194304: oss: service returned error: StatusCode=404, ErrorCode=NoSuchKey, ErrorMessage="The specified key does not exist.", RequestId=62E8FB058C0B5C3134CB80B6
```

为了避免此类问题，我们推荐在低带宽节点上禁用后台任务，也就是为挂载命令添加 [`--no-bgjob`](../reference/command_reference.mdx#mount-metadata-options) 参数。

### 警告日志：找不到对象存储块 {#warning-log-block-not-found-in-object-storage}

规模化使用 JuiceFS 时，往往会在客户端日志中看到类似以下警告：

```
<WARNING>: fail to read sliceId 1771585458 (off:4194304, size:4194304, clen: 37746372): get chunks/0/0/1_0_4194304: oss: service returned error: StatusCode=404, ErrorCode=NoSuchKey, ErrorMessage="The specified key does not exist.", RequestId=62E8FB058C0B5C3134CB80B6
```

出现这一类警告时，如果并未伴随着访问异常（比如日志中出现 `input/output error`），其实不必特意关注，客户端会自行重试，往往不影响文件访问。

这行警告日志的含义是：访问 slice 出错了，因为对应的某个对象存储块不存在，对象存储返回了 `NoSuchKey` 错误。出现此类异常的可能原因有下：

* JuiceFS 客户端会异步运行碎片合并（Compaction），碎片合并完成后，文件与对象存储数据块（Block）的关系随之改变，但此时可能其他客户端正在读取该文件，因此随即报错。
* 某些客户端开启了[「写缓存」](../guide/cache.md#client-write-cache)，文件已经写入，提交到了元数据服务，但对应的对象存储 Block 却并未上传完成（比如[网速慢](#io-error-object-storage)），导致其他客户端在读取该文件时，对象存储返回数据不存在。

再次强调，如果并未出现应用端访问异常，则可安全忽略此类警告。

## 读放大 {#read-amplification}

在 JuiceFS 中，一个典型的读放大现象是：对象存储的下行流量，远大于实际读文件的速度。比方说 JuiceFS 客户端的读吞吐为 200MiB/s，但是在 S3 观察到了 2GiB/s 的下行流量。

JuiceFS 中内置了[预读](../guide/cache.md#client-read-cache)（prefetch）机制：随机读 block 的某一段，会触发整个 block 下载，这个默认开启的读优化策略，在某些场景下会带来读放大。了解这个设计以后，我们就可以开始排查了。

结合先前问题排查方法一章中介绍的[访问日志](./fault_diagnosis_and_analysis.md#access-log)知识，我们可以采集一些访问日志来分析程序的读模式，然后针对性地调整配置。下面是一个实际生产环境案例的排查过程：

```shell
# 收集一段时间的访问日志，比如 30 秒：
cat /jfs/.accesslog | grep -v "^#$" >> access.log

# 用 wc、grep 等工具简单统计发现，访问日志中大多都是 read 请求：
wc -l access.log
grep "read (" access.log | wc -l

# 选取一个文件，通过 inode 追踪其访问模式，read 的输入参数里，第一个就是 inode：
grep "read (148153116," access.log
```

采集到该文件的访问日志如下：

```
2022.09.22 08:55:21.013121 [uid:0,gid:0,pid:0] read (148153116,131072,28668010496): OK (131072) <1.309992>
2022.09.22 08:55:21.577944 [uid:0,gid:0,pid:0] read (148153116,131072,14342746112): OK (131072) <1.385073>
2022.09.22 08:55:22.098133 [uid:0,gid:0,pid:0] read (148153116,131072,35781816320): OK (131072) <1.301371>
2022.09.22 08:55:22.883285 [uid:0,gid:0,pid:0] read (148153116,131072,3570397184): OK (131072) <1.305064>
2022.09.22 08:55:23.362654 [uid:0,gid:0,pid:0] read (148153116,131072,100420673536): OK (131072) <1.264290>
2022.09.22 08:55:24.068733 [uid:0,gid:0,pid:0] read (148153116,131072,48602152960): OK (131072) <1.185206>
2022.09.22 08:55:25.351035 [uid:0,gid:0,pid:0] read (148153116,131072,60529270784): OK (131072) <1.282066>
2022.09.22 08:55:26.631518 [uid:0,gid:0,pid:0] read (148153116,131072,4255297536): OK (131072) <1.280236>
2022.09.22 08:55:27.724882 [uid:0,gid:0,pid:0] read (148153116,131072,715698176): OK (131072) <1.093108>
2022.09.22 08:55:31.049944 [uid:0,gid:0,pid:0] read (148153116,131072,8233349120): OK (131072) <1.020763>
2022.09.22 08:55:32.055613 [uid:0,gid:0,pid:0] read (148153116,131072,119523176448): OK (131072) <1.005430>
2022.09.22 08:55:32.056935 [uid:0,gid:0,pid:0] read (148153116,131072,44287774720): OK (131072) <0.001099>
2022.09.22 08:55:33.045164 [uid:0,gid:0,pid:0] read (148153116,131072,1323794432): OK (131072) <0.988074>
2022.09.22 08:55:36.502687 [uid:0,gid:0,pid:0] read (148153116,131072,47760637952): OK (131072) <1.184290>
2022.09.22 08:55:38.525879 [uid:0,gid:0,pid:0] read (148153116,131072,53434183680): OK (131072) <0.096732>
```

对着日志观察下来，发现读文件的行为大体上是「频繁随机小读」。我们尤其注意到 offset（也就是 `read` 的第三个参数）跳跃巨大，说明相邻的读操作之间跨度很大，难以利用到预读提前下载下来的数据（默认的块大小是 4MiB，换算为 4194304 字节的 offset）。也正因此，我们建议将 `--prefetch` 调整为 0（让预读并发度为 0，也就是禁用该行为），并重新挂载。这样一来，在该场景下的读放大问题得到很好的改善。

## 内存占用过高 {#memory-optimization}

如果 JuiceFS 客户端内存占用过高，考虑按照以下方向进行排查调优，但也请注意，内存优化势必不是免费的，每一项设置调整都将带来相应的开销，请在调整前做好充分的测试与验证。

* 读写缓冲区（也就是 `--buffer-size`）的大小，直接与 JuiceFS 客户端内存占用相关，因此可以通过降低读写缓冲区大小来减少内存占用，但请注意降低以后可能同时也会对读写性能造成影响。更多详见[「读写缓冲区」](../guide/cache.md#buffer-size)。
* JuiceFS 挂载客户端是一个 Go 程序，因此也可以通过降低 `GOGC`（默认 100）来令 Go 在运行时执行更为激进的垃圾回收（将带来更多 CPU 消耗，甚至直接影响性能）。详见[「Go Runtime」](https://pkg.go.dev/runtime#hdr-Environment_Variables)。
* 如果你使用自建的 Ceph RADOS 作为 JuiceFS 的数据存储，可以考虑将 glibc 替换为 [TCMalloc](https://google.github.io/tcmalloc)，后者有着更高效的内存管理实现，能在该场景下有效降低堆外内存占用。

## 卸载错误 {#unmount-error}

卸载 JuiceFS 文件系统时，如果某个文件或者目录正在被使用，那么卸载将会报错（下方假设挂载点为 `/jfs`）：

```shell
# Linux
umount: /jfs: target is busy.
        (In some cases useful info about processes that use
         the device is found by lsof(8) or fuser(1))

# macOS
Resource busy -- try 'diskutil unmount'
```

这种情况下可以：

* 用类似 `lsof /jfs` 的命令，找出该文件系统下正在使用的文件，然后按需处置对应的进程（比如强制退出），然后再次尝试卸载。
* 用 `echo 1 > /sys/fs/fuse/connections/[device-number]/abort` 强制关闭 FUSE 连接，然后再次尝试卸载。其中 `[device-number]` 也许需要你用 `lsof /jfs` 手动确认，不过本机只有一个 FUSE 挂载点的话，那么 `/sys/fs/fuse/connections` 下也只会包含一个目录，不必特意确认。
* 如果并不关心已经打开的文件，只想要尽快卸载，也可以运行 `juicefs umount --force` 来强制卸载，不过注意，强制卸载在 Linux、macOS 上的行为并不一致：
  * 对 Linux 而言，`juicefs umount --force` 意味着 `umount --lazy`，文件系统会被卸载，但已打开的文件不会关闭，而是等进程退出后再退出 FUSE 客户端。
  * 对 macOS 而言，`juicefs umount --force` 意味着 `umount -f`，文件系统会被强制卸载，已打开的文件会强制关闭。

## 系统自动 mount 不生效 {#netmount}

管理员一般通过 `--update-fstab` 更新 `/etc/fstab` 以确保系统在重启后自动 mount JuiceFS 文件系统，但某些最小化的 Linux 发行版本如 Alpine, 可能在其基础镜像中缺少 netmount 或类似功能的包。这个包对于网络文件系统是必要的。如果缺少 netmount 包，系统在重启后无法在 `/etc/fstab` 中自动挂载 JuiceFS 文件系统。为了解决这个问题，需安装 netmount 包并启动相关服务。
以 Alpine 为例：

```bash
# use --update-fstab to add juicefs mount to /etc/fstab

# install and enable netmount service
apk add openrc

rc-update add netmount boot
# * service netmount added to runlevel boot

 rc-service netmount start
# / # rc-service netmount start
# * Mounting network filesystems ...

```

## 开发相关问题 {#development-related-issues}

编译 JuiceFS 需要 GCC 5.4 及以上版本，版本过低可能导致类似下方报错：

```
/go/pkg/tool/linux_amd64/link: running gcc failed: exit status 1
/go/pkg/tool/linux_amd64/compile: signal: killed
```

如果编译环境与运行环境的 glibc 版本不同，会发生如下报错：

```
$ juicefs
juicefs: /lib/aarch64-linux-gnu/libc.so.6: version 'GLIBC_2.28' not found (required by juicefs)
```

这需要你在运行环境重新编译 JuiceFS 客户端，大部分 Linux 发行版都预置了 glibc，你可以用 `ldd --version` 确认其版本。


================================================
FILE: docs/zh_cn/administration/upgrade.md
================================================
---
sidebar_position: 9
---

# 客户端升级

不同 JuiceFS 客户端的升级方式不同，以下分别介绍。

## 挂载点

### 普通升级

JuiceFS 客户端只有一个二进制程序，升级新版只需用新版程序替换旧版程序即可。

- **使用预编译客户端**：可以参照[「安装」](../getting-started/installation.md#install-the-pre-compiled-client)文档中相应系统的安装方法，下载最新的客户端，覆盖旧版客户端即可。
- **手动编译客户端**：可以拉取最新的源代码重新编译，覆盖旧版客户端即可，具体请参考[「安装」](../getting-started/installation.md#manually-compiling)文档。

:::caution 注意
对于已经使用旧版 JuiceFS 客户端挂载好的文件系统，需要先[卸载文件系统](../getting-started/for_distributed.md#7-卸载文件系统)，然后用新版 JuiceFS 客户端重新挂载。

卸载文件系统时需确保没有任何应用正在访问，否则将会卸载失败。不可强行卸载文件系统，有可能造成应用无法继续正常访问。
:::

### 平滑升级

JuiceFS 在 v1.2 版本中开始支持平滑升级功能，即在相同的挂载点再次挂载 JuiceFS 即可实现业务无感的客户端平滑升级。另外该功能还可以用来动态的调整挂载参数。

下面举例说明两个常用的场景

1. 客户端升级
   比如当前存在 `juicefs mount` 进程 `juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs -d`，现希望在不卸载挂载点的情况下部署新的 JuiceFS 客户端，可以执行以下步骤：

   ```shell
    # 1. 备份当前二进制
   cp juicefs juicefs.bak
   
   # 2. 下载新的二进制覆盖当前 juicefs 二进制
   
   # 3. 再次执行 juicefs mount 命令完成平滑升级
   juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs -d
    ```

2. 动态调整挂载参数

  比如当前存在 `juicefs mount` 进程 `juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs -d`，现希望在不卸载挂载点的情况下将日志级别调整为 debug，可以执行以下命令：

```shell
# 调整日志级别
juicefs mount redis://127.0.0.1:6379/0 /mnt/jfs --debug -d
```

一些注意事项：

1. 平滑升级要求新旧进程的 JuiceFS 客户端版本都至少为 v1.2 版本。

2. 新的挂载参数中的 FUSE 参数应该与旧的挂载参数保持一致，否则平滑升级会在当前挂载点上继续覆盖挂载。

3. `enable-xattr` 开启时，平滑升级会在当前挂载点上继续覆盖挂载。

## Kubernetes CSI 驱动

请参考[官方文档](https://juicefs.com/docs/zh/csi/upgrade-csi-driver)了解如何升级 JuiceFS CSI 驱动。

## S3 网关

与[挂载点](#挂载点)一样，升级 S3 网关也是使用新版程序替换旧版程序即可。

如果是[通过 Kubernetes 部署](../guide/gateway.md#deploy-in-kubernetes)，则需要根据具体部署的方式来升级，以下详细介绍。

### 通过 kubectl 升级

下载并修改 S3 网关[部署 YAML](https://github.com/juicedata/juicefs/blob/main/deploy/juicefs-s3-gateway.yaml) 中的 `juicedata/juicefs-csi-driver` 镜像标签为想要升级的版本（关于所有版本的详细说明请参考[这里](https://github.com/juicedata/juicefs-csi-driver/releases)），然后运行以下命令：

```shell
kubectl apply -f ./juicefs-s3-gateway.yaml
```

### 通过 Helm 升级

请依次运行以下命令以升级 S3 网关：

```shell
helm repo update
helm upgrade juicefs-s3-gateway juicefs-s3-gateway/juicefs-s3-gateway -n kube-system -f ./values.yaml
```

## Hadoop Java SDK

请参考[「安装与编译客户端」](../deployment/hadoop_java_sdk.md#安装与编译客户端)文档了解如何安装新版本的 Hadoop Java SDK，然后根据[「部署客户端」](../deployment/hadoop_java_sdk.md#部署客户端)的步骤重新部署新版本客户端即可完成升级。

:::note 注意
某些组件必须重启以后才能使用新版本的 Hadoop Java SDK，具体请参考[「重启服务」](../deployment/hadoop_java_sdk.md#重启服务)文档。
:::


================================================
FILE: docs/zh_cn/benchmark/benchmark.md
================================================
---
title: 常规测试
sidebar_position: 1
slug: .
description: 本文介绍使用 FIO、mdtest 以及 JuiceFS 自带的 bench 命令对文件系统进行性能测试。
---

本章介绍的测试中使用 [Redis](https://redis.io) 作为元数据存储引擎。在该测试条件下，JuiceFS 拥有十倍于 Amazon [EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 的性能表现。

### 基础测试

JuiceFS 提供了 `bench`  子命令来运行一些基本的基准测试，用以评估 JuiceFS 在当前环境的运行情况：

![JuiceFS Bench](../images/juicefs-bench.png)

### 吞吐量

使用 [fio](https://github.com/axboe/fio) 在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行连续读写测试，结果如下：

[![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg)](../images/sequential-read-write-benchmark.svg)

结果表明，JuiceFS 可以提供比另外两个工具大 10 倍的吞吐量，[了解更多](fio.md)。

### 元数据 IOPS

使用 [mdtest](https://github.com/hpc/ior) 在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行简易的 mdtest  基准测试，结果如下：

[![Metadata Benchmark](../images/metadata-benchmark.svg)](../images/metadata-benchmark.svg)

结果表明，JuiceFS 可以提供比另外两个工具更高的元数据 IOPS，[了解更多](mdtest.md)。

### 分析测试结果

如遇性能问题，阅读[「实时性能监控」](../administration/fault_diagnosis_and_analysis.md#performance-monitor)了解如何排查。


================================================
FILE: docs/zh_cn/benchmark/fio.md
================================================
---
title: fio 基准测试
sidebar_position: 7
slug: /fio
---

:::tip 提示
JuiceFS v1.0+ 默认启用了回收站，基准测试会在文件系统中创建和删除临时文件，这些文件最终会被转存到回收站 `.trash` 占用存储空间，为了避免这种情况，可以在基准测试之前关闭回收站 `juicefs config META-URL --trash-days 0`，详情参考[回收站](../security/trash.md)。
:::

## 测试方法

使用 [fio](https://github.com/axboe/fio) 在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行顺序读、顺序写基准测试。

## 测试工具

以下测试使用的工具为 fio 3.1。

顺序读测试 (任务数：1):

```
fio --name=sequential-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G
fio --name=sequential-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G
fio --name=sequential-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G
```

顺序写测试 (任务数：1):

```
fio --name=sequential-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1
fio --name=sequential-write --directory=/efs --rw=write  --refill_buffers --bs=4M --size=4G --end_fsync=1
fio --name=sequential-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --end_fsync=1
```

顺序读测试 (任务数：16):

```
fio --name=big-file-multi-read --directory=/s3fs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16
fio --name=big-file-multi-read --directory=/efs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16
fio --name=big-file-multi-read --directory=/jfs --rw=read --refill_buffers --bs=4M --size=4G --numjobs=16
```

顺序写测试 (任务数：16):

```
fio --name=big-file-multi-write --directory=/s3fs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1
fio --name=big-file-multi-write --directory=/efs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1
fio --name=big-file-multi-write --directory=/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=16 --end_fsync=1
```

## 测试环境

以下测试结果均使用 fio 在亚马逊云 c5d.18xlarge EC2  (72 CPU, 144G RAM) 实例得出，操作系统采用 Ubuntu 18.04 LTS (Kernel 5.4.0) ，JuiceFS 使用同主机的本地 Redis (version 4.0.9) 实例存储元数据。

JuiceFS 挂载命令：

```
./juicefs format --storage=s3 --bucket=https://<BUCKET>.s3.<REGION>.amazonaws.com localhost benchmark
./juicefs mount --max-uploads=150 --io-retries=20 localhost /jfs
```

EFS 挂载命令 (与配置说明中一致):

```
mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, <EFS-ID>.efs.<REGION>.amazonaws.com:/ /efs
```

S3FS (version 1.82) 挂载命令：

```
s3fs <BUCKET>:/s3fs /s3fs -o host=https://s3.<REGION>.amazonaws.com,endpoint=<REGION>,passwd_file=${HOME}/.passwd-s3fs
```

## 测试结果

![Sequential Read Write Benchmark](../images/sequential-read-write-benchmark.svg)


================================================
FILE: docs/zh_cn/benchmark/mdtest.md
================================================
---
title: mdtest 基准测试
sidebar_position: 8
slug: /mdtest
---

:::tip 提示
JuiceFS v1.0+ 默认启用了回收站，基准测试会在文件系统中创建和删除临时文件，这些文件最终会被转存到回收站 `.trash` 占用存储空间，为了避免这种情况，可以在基准测试之前关闭回收站 `juicefs config META-URL --trash-days 0`，详情参考[回收站](../security/trash.md)。
:::

## 测试方法

使用 [mdtest](https://github.com/hpc/ior)，分别在 JuiceFS、[EFS](https://aws.amazon.com/efs) 和 [S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 上执行元数据性能测试。

## 测试工具

以下测试使用 mdtest 3.4。
调整 mdtest 的参数以确保命令可以在 5 分钟内完成。

```
./mdtest -d /s3fs/mdtest -b 6 -I 8 -z 2
./mdtest -d /efs/mdtest -b 6 -I 8 -z 4
./mdtest -d /jfs/mdtest -b 6 -I 8 -z 4
```

## 测试环境

在下面的测试结果中，所有 mdtest 均在亚马逊云 c5.large EC2 实例（2 CPU，4G RAM），Ubuntu 18.04 LTS（Kernel 5.4.0）系统上进行，JuiceFS 使用的 Redis（4.0.9 版本）实例运行在相同区域的 c5.large EC2 实例上。

JuiceFS 挂载命令：

```
./juicefs format --storage=s3 --bucket=https://<BUCKET>.s3.<REGION>.amazonaws.com localhost benchmark
nohup ./juicefs mount localhost /jfs &
```

EFS 挂载命令 (与配置说明保持一致)：

```
mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport, <EFS-ID>.efs.<REGION>.amazonaws.com:/ /efs
```

S3FS (version 1.82) 挂载命令：

```
s3fs <BUCKET>:/s3fs /s3fs -o host=https://s3.<REGION>.amazonaws.com,endpoint=<REGION>,passwd_file=${HOME}/.passwd-s3fs
```

## 测试结果

![Metadata Benchmark](../images/metadata-benchmark.svg)

### S3FS

```
mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s)
Command line used: ./mdtest '-d' '/s3fs/mdtest' '-b' '6' '-I' '8' '-z' '2'
WARNING: Read bytes is 0, thus, a read test will actually just open/close.
Path                : /s3fs/mdtest
FS                  : 256.0 TiB   Used FS: 0.0%   Inodes: 0.0 Mi   Used Inodes: -nan%
Nodemap: 1
1 tasks, 344 files/directories

SUMMARY rate: (of 1 iterations)
   Operation                      Max            Min           Mean        Std Dev
   ---------                      ---            ---           ----        -------
   Directory creation        :          5.977          5.977          5.977          0.000
   Directory stat            :        435.898        435.898        435.898          0.000
   Directory removal         :          8.969          8.969          8.969          0.000
   File creation             :          5.696          5.696          5.696          0.000
   File stat                 :         68.692         68.692         68.692          0.000
   File read                 :         33.931         33.931         33.931          0.000
   File removal              :         23.658         23.658         23.658          0.000
   Tree creation             :          5.951          5.951          5.951          0.000
   Tree removal              :          9.889          9.889          9.889          0.000
```

### EFS

```
mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s)
Command line used: ./mdtest '-d' '/efs/mdtest' '-b' '6' '-I' '8' '-z' '4'
WARNING: Read bytes is 0, thus, a read test will actually just open/close.
Path                : /efs/mdtest
FS                  : 8388608.0 TiB   Used FS: 0.0%   Inodes: 0.0 Mi   Used Inodes: -nan%
Nodemap: 1
1 tasks, 12440 files/directories

SUMMARY rate: (of 1 iterations)
   Operation                      Max            Min           Mean        Std Dev
   ---------                      ---            ---           ----        -------
   Directory creation        :        192.301        192.301        192.301          0.000
   Directory stat            :       1311.166       1311.166       1311.166          0.000
   Directory removal         :        213.132        213.132        213.132          0.000
   File creation             :        179.293        179.293        179.293          0.000
   File stat                 :        915.230        915.230        915.230          0.000
   File read                 :        371.012        371.012        371.012          0.000
   File removal              :        217.498        217.498        217.498          0.000
   Tree creation             :        187.906        187.906        187.906          0.000
   Tree removal              :        218.357        218.357        218.357          0.000
```

### JuiceFS

```
mdtest-3.4.0+dev was launched with 1 total task(s) on 1 node(s)
Command line used: ./mdtest '-d' '/jfs/mdtest' '-b' '6' '-I' '8' '-z' '4'
WARNING: Read bytes is 0, thus, a read test will actually just open/close.
Path                : /jfs/mdtest
FS                  : 1024.0 TiB   Used FS: 0.0%   Inodes: 10.0 Mi   Used Inodes: 0.0%
Nodemap: 1
1 tasks, 12440 files/directories

SUMMARY rate: (of 1 iterations)
   Operation                      Max            Min           Mean        Std Dev
   ---------                      ---            ---           ----        -------
   Directory creation        :       1416.582       1416.582       1416.582          0.000
   Directory stat            :       3810.083       3810.083       3810.083          0.000
   Directory removal         :       1115.108       1115.108       1115.108          0.000
   File creation             :       1410.288       1410.288       1410.288          0.000
   File stat                 :       5023.227       5023.227       5023.227          0.000
   File read                 :       3487.947       3487.947       3487.947          0.000
   File removal              :       1163.371       1163.371       1163.371          0.000
   Tree creation             :       1503.004       1503.004       1503.004          0.000
   Tree removal              :       1119.806       1119.806       1119.806          0.000
```


================================================
FILE: docs/zh_cn/benchmark/metadata_engines_benchmark.md
================================================
---
title: 元数据引擎性能测试
sidebar_position: 6
slug: /metadata_engines_benchmark
description: 本文采用亚马逊云的真实环境，介绍如何对 JuiceFS 的各种元数据引擎性能进行测试和评估。
---

首先展示结论：

- 对于纯元数据操作，MySQL 耗时约为 Redis 的 2～4 倍；TiKV 性能与 MySQL 接近，大部分场景下略优于 MySQL；etcd 的耗时约为 TiKV 的 1.5 倍
- 对于小 IO（～100 KiB）压力，使用 MySQL 引擎的操作总耗时大约是使用 Redis 引擎总耗时的 1～3 倍；TiKV 和 etcd 的耗时与 MySQL 接近
- 对于大 IO（～4 MiB）压力，使用不同元数据引擎的总耗时未见明显差异（此时对象存储成为瓶颈）

:::note 注意

1. Redis 可以通过将 `appendfsync` 配置项由 `always` 改为 `everysec`，牺牲少量可靠性来换取一定的性能提升。更多信息可参见[这里](https://redis.io/docs/manual/persistence)。
2. 测试中 Redis 和 MySQL 数据均仅在本地存储单副本，TiKV 和 etcd 数据会在三个节点间通过 Raft 协议存储三副本。

:::

以下提供了测试的具体细节。这些测试都运行在相同的对象存储（用来存放数据）、客户端和元数据节点上，只有元数据引擎不同。

## 测试环境

### JuiceFS 版本

1.1.0-beta1+2023-06-08.5ef17ba0

### 对象存储

Amazon S3

### 客户端节点

- Amazon c5.xlarge：4 vCPUs，8 GiB 内存，最高 10 Gigabit 网络
- Ubuntu 20.04.1 LTS

### 元数据节点

- Amazon c5d.xlarge：4 vCPUs，8 GiB 内存，最高 10 Gigabit 网络，100 GB SSD（为元数据引擎提供本地存储）
- Ubuntu 20.04.1 LTS
- SSD 数据盘被格式化为 ext4 文件系统并挂载到 `/data` 目录

### 元数据引擎

#### Redis

- 版本：[7.0.9](https://download.redis.io/releases/redis-7.0.9.tar.gz)
- 配置：
  - `appendonly`：`yes`
  - `appendfsync`：分别测试了 `always` 和 `everysec`
  - `dir`：`/data/redis`

#### MySQL

- 版本：8.0.25
- `/var/lib/mysql` 目录被绑定挂载到 `/data/mysql`

#### PostgreSQL

- 版本：15.3
- 数据目录被更改到 `/data/pgdata`

#### TiKV

- 版本：6.5.3
- 配置：
  - `deploy_dir`：`/data/tikv-deploy`
  - `data_dir`：`/data/tikv-data`

#### etcd

- 版本：3.3.25
- 配置：
  - `data-dir`：`/data/etcd`

#### FoundationDB

- 版本：6.3.23
- 配置：
  - `data-dir`：`/data/fdb`

## 测试工具

每种元数据引擎都会运行以下所有测试。

### Golang Benchmark

在源码中提供了简单的元数据基准测试：[`pkg/meta/benchmarks_test.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/benchmarks_test.go)

### JuiceFS Bench

JuiceFS 提供了一个基础的性能测试命令：

```bash
juicefs bench /mnt/jfs -p 4
```

### mdtest

- 版本：mdtest-3.3.0

在 3 个客户端节点上并发执行测试：

```bash
$ cat myhost
client1 slots=4
client2 slots=4
client3 slots=4
```

测试命令：

meta only

```shell
mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -b 3 -z 1 -I 100 -u -d /mnt/jfs
```

12000 * 100KiB files

```shell
mpirun --use-hwthread-cpus --allow-run-as-root -np 12 --hostfile myhost --map-by slot /root/mdtest -F -w 102400 -I 1000 -z 0 -u -d /mnt/jfs
```

### fio

- 版本：fio-3.28

```bash
fio --name=big-write --directory=/mnt/jfs --rw=write --refill_buffers --bs=4M --size=4G --numjobs=4 --end_fsync=1 --group_reporting
```

## 测试结果

### Golang Benchmark

- 展示了操作耗时（单位为 微秒/op），数值越小越好
- 括号内数字是该指标对比 Redis-Always 的倍数（`always` 和 `everysec` 均是 Redis 配置项 `appendfsync` 的可选值）
- 由于元数据缓存缘故，目前 `Read` 接口测试数据均小于 1 微秒，暂无对比意义

  |              | Redis-Always | Redis-Everysec | MySQL        | PostgreSQL   | TiKV       | etcd         | FoundationDB |
  |--------------|--------------|----------------|--------------|--------------|------------|--------------|--------------|
  | mkdir        | 558          | 468 (0.8)      | 2042 (3.7)   | 1076 (1.9)   | 1237 (2.2) | 1916 (3.4)   | 1842 (3.3)   |
  | mvdir        | 693          | 621 (0.9)      | 2693 (3.9)   | 1459 (2.1)   | 1414 (2.0) | 2486 (3.6)   | 1895 (2.7)   |
  | rmdir        | 717          | 648 (0.9)      | 3050 (4.3)   | 1697 (2.4)   | 1641 (2.3) | 2980 (4.2)   | 2088 (2.9)   |
  | readdir_10   | 280          | 288 (1.0)      | 1350 (4.8)   | 1098 (3.9)   | 995 (3.6)  | 1757 (6.3)   | 1744 (6.2)   |
  | readdir_1k   | 1490         | 1547 (1.0)     | 18779 (12.6) | 18414 (12.4) | 5834 (3.9) | 15809 (10.6) | 15276 (10.3) |
  | mknod        | 562          | 464 (0.8)      | 1547 (2.8)   | 849 (1.5)    | 1211 (2.2) | 1838 (3.3)   | 1763 (3.1)   |
  | create       | 570          | 455 (0.8)      | 1570 (2.8)   | 844 (1.5)    | 1209 (2.1) | 1849 (3.2)   | 1761 (3.1)   |
  | rename       | 728          | 627 (0.9)      | 2735 (3.8)   | 1478 (2.0)   | 1419 (1.9) | 2445 (3.4)   | 1911 (2.6)   |
  | unlink       | 658          | 567 (0.9)      | 2365 (3.6)   | 1280 (1.9)   | 1443 (2.2) | 2461 (3.7)   | 1940 (2.9)   |
  | lookup       | 173          | 178 (1.0)      | 557 (3.2)    | 375 (2.2)    | 608 (3.5)  | 1054 (6.1)   | 1029 (5.9)   |
  | getattr      | 87           | 86 (1.0)       | 530 (6.1)    | 350 (4.0)    | 306 (3.5)  | 536 (6.2)    | 504 (5.8)    |
  | setattr      | 471          | 345 (0.7)      | 1029 (2.2)   | 571 (1.2)    | 1001 (2.1) | 1279 (2.7)   | 1596 (3.4)   |
  | access       | 87           | 89 (1.0)       | 518 (6.0)    | 356 (4.1)    | 307 (3.5)  | 534 (6.1)    | 526 (6.0)    |
  | setxattr     | 393          | 262 (0.7)      | 992 (2.5)    | 534 (1.4)    | 800 (2.0)  | 717 (1.8)    | 1300 (3.3)   |
  | getxattr     | 84           | 87 (1.0)       | 494 (5.9)    | 333 (4.0)    | 303 (3.6)  | 529 (6.3)    | 511 (6.1)    |
  | removexattr  | 215          | 96 (0.4)       | 697 (3.2)    | 385 (1.8)    | 1007 (4.7) | 1336 (6.2)   | 1597 (7.4)   |
  | listxattr_1  | 85           | 87 (1.0)       | 516 (6.1)    | 342 (4.0)    | 303 (3.6)  | 531 (6.2)    | 515 (6.1)    |
  | listxattr_10 | 87           | 91 (1.0)       | 561 (6.4)    | 383 (4.4)    | 322 (3.7)  | 565 (6.5)    | 529 (6.1)    |
  | link         | 680          | 545 (0.8)      | 2435 (3.6)   | 1375 (2.0)   | 1732 (2.5) | 3058 (4.5)   | 2402 (3.5)   |
  | symlink      | 580          | 448 (0.8)      | 1785 (3.1)   | 954 (1.6)    | 1224 (2.1) | 1897 (3.3)   | 1764 (3.0)   |
  | newchunk     | 0            | 0 (0.0)        | 1 (0.0)      | 1 (0.0)      | 1 (0.0)    | 1 (0.0)      | 2 (0.0)      |
  | write        | 553          | 369 (0.7)      | 2352 (4.3)   | 1183 (2.1)   | 1573 (2.8) | 1788 (3.2)   | 1747 (3.2)   |
  | read_1       | 0            | 0 (0.0)        | 0 (0.0)      | 0 (0.0)      | 0 (0.0)    | 0 (0.0)      | 0 (0.0)      |
  | read_10      | 0            | 0 (0.0)        | 0 (0.0)      | 0 (0.0)      | 0 (0.0)    | 0 (0.0)      | 0 (0.0)      |

### JuiceFS Bench

|                  | Redis-Always     | Redis-Everysec   | MySQL           | PostgreSQL      | TiKV            | etcd            | FoundationDB    |
|------------------|------------------|------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
| Write big file   | 730.84 MiB/s     | 731.93 MiB/s     | 729.00 MiB/s    | 744.47 MiB/s    | 730.01 MiB/s    | 746.07 MiB/s    | 744.70 MiB/s    |
| Read big file    | 923.98 MiB/s     | 892.99 MiB/s     | 905.93 MiB/s    | 895.88 MiB/s    | 918.19 MiB/s    | 939.63 MiB/s    | 948.81 MiB/s    |
| Write small file | 95.20 files/s    | 109.10 files/s   | 82.30 files/s   | 86.40 files/s   | 101.20 files/s  | 95.80 files/s   | 94.60 files/s   |
| Read small file  | 1242.80 files/s  | 937.30 files/s   | 752.40 files/s  | 1857.90 files/s | 681.50 files/s  | 1229.10 files/s | 1301.40 files/s |
| Stat file        | 12313.80 files/s | 11989.50 files/s | 3583.10 files/s | 7845.80 files/s | 4211.20 files/s | 2836.60 files/s | 3400.00 files/s |
| FUSE operation   | 0.41 ms/op       | 0.40 ms/op       | 0.46 ms/op      | 0.44 ms/op      | 0.41 ms/op      | 0.41 ms/op      | 0.44 ms/op      |
| Update meta      | 2.45 ms/op       | 1.76 ms/op       | 2.46 ms/op      | 1.78 ms/op      | 3.76 ms/op      | 3.40 ms/op      | 2.87 ms/op      |

### mdtest

 展示了操作速率（每秒 OPS 数），数值越大越好

|                    | Redis-Always | Redis-Everysec | MySQL    | PostgreSQL | TiKV      | etcd     | FoundationDB |
|--------------------|--------------|----------------|----------|------------|-----------|----------|--------------|
| **EMPTY FILES**    |              |                |          |            |           |          |              |
| Directory creation | 4901.342     | 9990.029       | 1252.421 | 4091.934   | 4041.304  | 1910.768 | 3065.578     |
| Directory stat     | 289992.466   | 379692.576     | 9359.278 | 69384.097  | 49465.223 | 6500.178 | 17746.670    |
| Directory removal  | 5131.614     | 10356.293      | 902.077  | 1254.890   | 3210.518  | 1450.842 | 2460.604     |
| File creation      | 5472.628     | 9984.824       | 1326.613 | 4726.582   | 4053.610  | 1801.956 | 2908.526     |
| File stat          | 288951.216   | 253218.558     | 9135.571 | 233148.252 | 50432.658 | 6276.787 | 14939.411    |
| File read          | 64560.148    | 60861.397      | 8445.953 | 20013.027  | 18411.280 | 9094.627 | 11087.931    |
| File removal       | 6084.791     | 12221.083      | 1073.063 | 3961.855   | 3742.269  | 1648.734 | 2214.311     |
| Tree creation      | 80.121       | 83.546         | 34.420   | 61.937     | 77.875    | 56.299   | 74.982       |
| Tree removal       | 218.535      | 95.599         | 42.330   | 44.696     | 114.414   | 76.002   | 64.036       |
| **SMALL FILES**    |              |                |          |            |           |          |              |
| File creation      | 295.067      | 312.182        | 275.588  | 289.627    | 307.121   | 275.578  | 263.487      |
| File stat          | 54069.827    | 52800.108      | 8760.709 | 19841.728  | 14076.214 | 8214.318 | 10009.670    |
| File read          | 62341.568    | 57998.398      | 4639.571 | 19244.678  | 23376.733 | 5477.754 | 6533.787     |
| File removal       | 5615.018     | 11573.415      | 1061.600 | 3907.740   | 3411.663  | 1024.421 | 1750.613     |
| Tree creation      | 57.860       | 57.080         | 23.723   | 52.621     | 44.590    | 19.998   | 11.243       |
| Tree removal       | 96.756       | 65.279         | 23.227   | 19.511     | 27.616    | 17.868   | 10.571       |

### fio

|                 | Redis-Always | Redis-Everysec | MySQL     | PostgreSQL | TiKV      | etcd      | FoundationDB |
|-----------------|--------------|----------------|-----------|------------|-----------|-----------|--------------|
| Write bandwidth | 729 MiB/s    | 737 MiB/s      | 736 MiB/s | 768 MiB/s  | 731 MiB/s | 738 MiB/s | 745 MiB/s    |


================================================
FILE: docs/zh_cn/benchmark/performance_evaluation_guide.md
================================================
---
title: 性能评估指南
sidebar_position: 2
slug: /performance_evaluation_guide
---

在进行性能测试之前，最好写下该使用场景的大致描述，包括：

1. 对接的应用是什么？比如 Apache Spark、PyTorch 或者是自己写的程序等
2. 应用运行的资源配置，包括 CPU、内存、网络，以及节点规模
3. 预计的数据规模，包括文件数量和容量
4. 文件的大小和访问模式（大文件或者小文件，顺序读写或者随机读写）
5. 对性能的要求，比如每秒要写入或者读取的数据量、访问的 QPS 或者操作的延迟等

以上这些内容越清晰、越详细，就越容易制定合适的测试计划，以及需要关注的性能指标，来判断应用对存储系统各方面的需求，包括 JuiceFS 元数据配置、网络带宽要求、配置参数等。当然，在一开始就清晰地写出上面所有的内容并不容易，有些内容可以在测试过程中逐渐明确，**但是在一次完整的测试结束时，以上使用场景描述以及相对应的测试方法、测试数据、测试结果都应该是完整的**。

如果上面的内容还不明确，不要紧，JuiceFS 内置的测试工具可以一行命令得到单机基准性能的核心指标。同时本文还会介绍两个 JuiceFS 内置的性能分析工具，在做更复杂的测试时，这两个工具能帮你简单清晰的分析出 JuiceFS 性能表现背后的原因。

## 性能测试快速上手

以下示例介绍 JuiceFS 内置的 bench 工具的基本用法。

### 环境配置

- 测试主机：Amazon EC2 c5.xlarge 一台
- 操作系统：Ubuntu 20.04.1 LTS (Kernel `5.4.0-1029-aws`)
- 元数据引擎：Redis 6.2.3, 存储（dir）配置在系统盘
- 对象存储：Amazon S3
- JuiceFS version：0.17-dev (2021-09-23 2ec2badf)

### 注意事项

JuiceFS v1.0+ 默认启用了回收站，基准测试会在文件系统中创建和删除临时文件，这些文件最终会被转存到回收站 `.trash` 占用存储空间，为了避免这种情况，可以在基准测试之前关闭回收站 `juicefs config META-URL --trash-days 0`，详情参考[回收站](../security/trash.md)。

### `juicefs bench`

[`juicefs bench`](../reference/command_reference.mdx#bench) 命令可以帮助你快速完成单机性能测试，通过测试结果判断环境配置和性能表现是否正常。假设你已经把 JuiceFS 挂载到了测试机器的 `/mnt/jfs` 位置（如果在 JuiceFS 初始化、挂载方面需要帮助，请参考[创建文件系统](../getting-started/standalone.md#juicefs-format)），执行以下命令即可（推荐 `-p` 参数设置为测试机器的 CPU 核数）：

```bash
juicefs bench /mnt/jfs -p 4
```

测试结果以表格形式呈现，其中 `ITEM` 代表测试的项目，`VALUE` 代表每秒的处理能力（吞吐量、文件数、操作数等），`COST` 代表每个文件或操作所需的时间。

各项性能指标会显示为绿色、黄色或红色区分性能表现。若您的结果中有红色指标，请先检查相关配置，需要帮助可以在 [GitHub Discussions](https://github.com/juicedata/juicefs/discussions) 详细描述你的问题。

![bench](../images/bench-guide-bench.png)

`juicefs bench` 基准性能测试的具体流程如下（它的实现逻辑非常简单，有兴趣了解细节的可以直接看[源码](https://github.com/juicedata/juicefs/blob/main/cmd/bench.go))：

1. N 并发各写 1 个 1 GiB 的大文件，IO 大小为 1 MiB
2. N 并发各读 1 个之前写的 1 GiB 的大文件，IO 大小为 1 MiB
3. N 并发各写 100 个 128 KiB 的小文件，IO 大小为 128 KiB
4. N 并发各读 100 个之前写的 128 KiB 的小文件，IO 大小为 128 KiB
5. N 并发各 stat 100 个之前写的 128 KiB 的小文件
6. 清理测试用的临时目录

并发数 N 的值即由 `bench` 命令中的 `-p` 参数指定。

在这用 AWS 提供的几种常用存储类型做个性能比较：

- EFS 1TiB 容量时，读 150MiB/s，写 50MiB/s，价格是 $0.08/GB-month
- EBS st1 是吞吐优化型 HDD，最大吞吐 500MiB/s，最大 IOPS（1MiB I/O）500，最大容量 16TiB，价格是 $0.045/GB-month
- EBS gp2 是通用型 SSD，最大吞吐 250MiB/s，最大 IOPS（16KiB I/O）16000，最大容量 16TiB，价格是 $0.10/GB-month

不难看出，在上面的测试中，JuiceFS 的顺序读写能力明显优于 AWS EFS，吞吐能力也超过了常用的 EBS。但是写小文件的速度不算快，因为每写一个文件都需要将数据持久化到 S3 中，调用对象存储 API 通常有 10~30ms 的固定开销。

:::note 注
Amazon EFS 的性能与容量线性相关（[参考官方文档](https://docs.aws.amazon.com/efs/latest/ug/performance.html#performancemodes)），这样就不适合用在小数据量高吞吐的场景中。
:::

:::note 注
价格参考 [AWS 美东区（US East, Ohio Region）](https://aws.amazon.com/ebs/pricing/?nc1=h_ls)，不同 Region 的价格有细微差异。
:::

:::note 注
以上数据来自 [AWS 官方文档](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html)，性能指标为最大值，EBS 的实际性能与卷容量和挂载 EC2 实例类型相关，总的来说是越大容量，搭配约高配置的 EC2，得到的 EBS 性能越好，但不超过上面提到的最大值。
:::

### `juicefs objbench`

[`juicefs objbench`](../reference/command_reference.mdx#objbench) 命令可以运行一些关于对象存储的测试，用以评估其作为 JuiceFS 的后端存储时的运行情况。以测试 Amazon S3 为例：

```bash
juicefs objbench \
    --storage s3 \
    --access-key myAccessKey \
    --secret-key mySecretKey \
    https://mybucket.s3.us-east-2.amazonaws.com
```

测试结果如下图所示：

![JuiceFS Bench](../images/objbench.png)

其中，结果显示为 `not support` 代表所测试的对象存储不支持该项功能。

#### 测试流程

首先会对对象存储的接口进行功能测试，以下为测试用例：

1. 创建 bucket
2. 上传对象
3. 下载对象
4. 下载不存在的对象
5. 获取对象部分内容
6. 获取对象元信息
7. 删除对象
8. 删除不存在对象
9. 列举对象
10. 上传大对象
11. 上传空对象
12. 分块上传
13. 更改文件拥有者与所属组（需要 `root` 权限运行）
14. 更改文件权限
15. 更改文件的 mtime（最后修改时间）

然后进行性能测试：

1. 将 `--small-objects` 个 `--small-object-size` 大小的对象，以 `--threads` 个并发上传
2. 下载步骤 1 中上传的对象并检查内容
3. 将 `--big-object-size` 大小的对象按照 `--block-size` 的大小拆分后以 `--threads` 并发度上传
4. 下载步骤 3 中上传的对象并检查内容，然后清理步骤 3 上传到对象存储的所有对象
5. 以 `--threads` 并发度列举对象存储中所有的对象 100 次
6. 以 `--threads` 并发度获取步骤 1 中上传的所有对象的元信息
7. 以 `--threads` 并发度更改步骤 1 中上传的所有对象的 mtime（最后修改时间）
8. 以 `--threads` 并发度更改步骤 1 中上传的所有对象的权限
9. 以 `--threads` 并发度更改步骤 1 中上传的所有对象的拥有者与所属组（需要 `root` 权限运行）
10. 以 `--threads` 并发度删除步骤 1 中上传的所有对象

最后清理测试的文件。

## 性能观测和分析工具

接下来介绍两个性能观测和分析工具，是 JuiceFS 测试、使用、调优过程中必备的利器。

### `juicefs stats`

[`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) 命令是一个实时统计 JuiceFS 性能指标的工具，类似 Linux 系统的 `dstat` 命令，可以实时显示 JuiceFS 客户端的指标变化。执行 `juicefs bench` 时，在另一个会话中执行以下命令：

```bash
juicefs stats /mnt/jfs --verbosity 1
```

结果如下，可以将其与上述基准测试流程对照来看，更易理解：

![bench-guide-stats](../images/bench-guide-stats.png)

其中各项指标具体含义参考 [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats)。

### `juicefs profile`

[`juicefs profile`](../administration/fault_diagnosis_and_analysis.md#profile) 命令可以基于[访问日志](../administration/fault_diagnosis_and_analysis.md#access-log)进行性能数据统计，来直观了解 JuiceFS 的运行情况。执行 `juicefs bench` 时，在另一个会话中执行以下命令：

```bash
cat /mnt/jfs/.accesslog > juicefs.accesslog
```

其中 `.accesslog` 是一个虚拟文件，它平时不会产生任何数据，只有在读取（如执行 `cat`）时才会有 JuiceFS 的访问日志输出。结束后使用 <kbd>Ctrl</kbd> + <kbd>C</kbd> 结束 `cat` 命令，并运行：

```bash
juicefs profile juicefs.accesslog --interval 0
```

其中 `--interval` 参数设置访问日志的采样间隔，设为 0 时用于快速重放一个指定的日志文件，生成统计信息，如下图所示：

![bench-guide-profile](../images/bench-guide-profile.png)

从之前基准测试流程描述可知，本次测试过程一共创建了 `(1 + 100) * 4 = 404` 个文件，每个文件都经历了「创建 → 写入 → 关闭 → 打开 → 读取 → 关闭 → 删除」的过程，因此一共有：

- 404 次 `create`，`open` 和 `unlink` 请求
- 808 次 `flush` 请求：每当文件关闭时会自动调用一次 `flush`
- 33168 次 `write`/`read` 请求：每个大文件写入了 1024 个 1 MiB IO，而在 FUSE 层请求的默认最大值为 128 KiB，也就是说每个应用 IO 会被拆分成 8 个 FUSE 请求，因此一共有 `(1024 * 8 + 100) * 4 = 33168` 个请求。读 IO 与之类似，计数也相同。

以上这些值均能与 `profile` 的结果完全对应上。另外，结果中还显示 `write` 的平均时延非常小（45 微秒），而主要耗时点在 `flush`。这是因为 JuiceFS 的 `write` 默认先写入内存缓冲区，在文件关闭时再调用 `flush` 上传数据到对象存储，与预期吻合。

## 其他测试工具配置示例

:::tip 提示
JuiceFS v1.0+ 默认启用了回收站，基准测试会在文件系统中创建和删除临时文件，这些文件最终会被转存到回收站 `.trash` 占用存储空间，为了避免这种情况，可以在基准测试之前关闭回收站 `juicefs config META-URL --trash-days 0`，详情参考[回收站](../security/trash.md)。
:::

### Fio 单机性能测试

Fio 是业界常用的一个性能测试工具，完成 JuiceFS bench 后可以用它来做更复杂的性能测试。

#### 环境配置

与 [JuiceFS Bench](#环境配置) 测试环境一致。

#### 测试任务

执行下面四个 Fio 任务，分别进行顺序写、顺序读、随机写、随机读测试。

顺序写

```shell
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=write --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

顺序读

```bash
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=read --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

随机写

```shell
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randwrite --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

随机读

```shell
fio --name=jfs-test --directory=/mnt/jfs --ioengine=libaio --rw=randread --bs=1m --size=1g --numjobs=4 --direct=1 --group_reporting
```

参数说明：

- `--name`：用户指定的测试名称，会影响测试文件名
- `--directory`：测试目录
- `--ioengine`：测试时下发 IO 的方式；通常用 libaio 即可
- `--rw`：常用的有 read，write，randread，randwrite，分别代表顺序读写和随机读写
- `--bs`：每次 IO 的大小
- `--size`：每个线程的 IO 总大小；通常就等于测试文件的大小
- `--numjobs`：测试并发线程数；默认每个线程单独跑一个测试文件
- `--direct`：在打开文件时添加 `O_DIRECT` 标记位，不使用系统缓冲，可以使测试结果更稳定准确

结果如下：

```bash
# Sequential
WRITE: bw=703MiB/s (737MB/s), 703MiB/s-703MiB/s (737MB/s-737MB/s), io=4096MiB (4295MB), run=5825-5825msec
READ: bw=817MiB/s (856MB/s), 817MiB/s-817MiB/s (856MB/s-856MB/s), io=4096MiB (4295MB), run=5015-5015msec

# Random
WRITE: bw=285MiB/s (298MB/s), 285MiB/s-285MiB/s (298MB/s-298MB/s), io=4096MiB (4295MB), run=14395-14395msec
READ: bw=93.6MiB/s (98.1MB/s), 93.6MiB/s-93.6MiB/s (98.1MB/s-98.1MB/s), io=4096MiB (4295MB), run=43773-43773msec
```

### Vdbench 多机性能测试

Vdbench 也是业界常见的文件系统评测工具，且很好地支持了多机并发测试。

#### 测试环境

与 [JuiceFS Bench](#环境配置) 测试环境类似，只是多开了两台同配置主机，一共三台。

#### 准备工作

需要在每个节点相同路径下安装 vdbench：

1. [官网](https://www.oracle.com/downloads/server-storage/vdbench-downloads.html)下载 50406 版本
2. 安装 Java：`apt-get install openjdk-8-jre`
3. 测试 vdbench 安装成功：`./vdbench -t`

然后，假设三个节点名称分别为 node0，node1 和 node2，则需在 node0 上创建配置文件，如下（测试大量小文件读写）：

```bash
$ cat jfs-test
hd=default,vdbench=/root/vdbench50406,user=root
hd=h0,system=node0
hd=h1,system=node1
hd=h2,system=node2

fsd=fsd1,anchor=/mnt/jfs/vdbench,depth=1,width=100,files=3000,size=128k,shared=yes

fwd=default,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd1,host=h0
fwd=fwd2,host=h1
fwd=fwd3,host=h2

rd=rd1,fwd=fwd*,fwdrate=max,format=yes,elapsed=300,interval=1
```

参数说明：

- `vdbench=/root/vdbench50406`：指定了 vdbench 工具的安装路径
- `anchor=/mnt/jfs/vdbench`：指定了每个节点上运行测试任务的路径
- `depth=1,width=100,files=3000,size=128k`：定义了测试任务文件树结构，即测试目录下再创建 100 个目录，每个目录内包含 3000 个 128 KiB 大小的文件，一共 30 万个文件
- `operation=read,xfersize=128k,fileio=random,fileselect=random`：定义了实际的测试任务，即随机选择文件下发 128 KiB 大小的读请求

结果如下：

```
FILE_CREATES        Files created:                              300,000        498/sec
READ_OPENS          Files opened for read activity:             188,317        627/sec
```

系统整体创建 128 KiB 文件速度为每秒 498 个，读取文件速度为每秒 627 个。

#### 其他参考示例

以下是一些本地简单评估文件系统性能时可用的配置文件，以供参考；具体测试集规模和并发数可根据实际情况调整。

##### 顺序读写大文件

文件大小均为 1GiB，其中 `fwd1` 是顺序写大文件，`fwd2` 是顺序读大文件。

```bash
$ cat local-big
fsd=fsd1,anchor=/mnt/jfs/local-big,depth=1,width=1,files=4,size=1g,openflags=o_direct

fwd=fwd1,fsd=fsd1,operation=write,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4
fwd=fwd2,fsd=fsd1,operation=read,xfersize=1m,fileio=sequential,fileselect=sequential,threads=4

rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1
rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1
```

##### 随机读写小文件

文件大小均为 128KiB，其中 `fwd1` 是随机写小文件，`fwd2` 是随机读小文件，`fwd3` 是混合读写小文件（读写比 = 7:3）。

```bash
$ cat local-small
fsd=fsd1,anchor=/mnt/jfs/local-small,depth=1,width=20,files=2000,size=128k,openflags=o_direct

fwd=fwd1,fsd=fsd1,operation=write,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd2,fsd=fsd1,operation=read,xfersize=128k,fileio=random,fileselect=random,threads=4
fwd=fwd3,fsd=fsd1,rdpct=70,xfersize=128k,fileio=random,fileselect=random,threads=4

rd=rd1,fwd=fwd1,fwdrate=max,format=restart,elapsed=120,interval=1
rd=rd2,fwd=fwd2,fwdrate=max,format=restart,elapsed=120,interval=1
rd=rd3,fwd=fwd3,fwdrate=max,format=restart,elapsed=120,interval=1
```


================================================
FILE: docs/zh_cn/community/_roadmap.md
================================================
---
title: 路线图
sidebar_position: 3
---


================================================
FILE: docs/zh_cn/community/adopters.md
================================================
---
title: 使用者
sidebar_position: 1
slug: /adopters
---

| 公司／团队  |  行业 & 使用场景   |      案例       |
|-----------|------------------|-----------------|
| [之江实验室](https://www.zhejianglab.com)   | 研究机构，AI | [之江实验室：如何基于 JuiceFS 为超异构算力集群构建存储层](https://juicefs.com/zh-cn/blog/user-stories/high-performance-scale-out-heterogeneous-computing-power-cluster-storage)    |
| [人民大学](http://www.ruc.edu.cn)   |  大学，AI  |  [从 HPC 到 AI：探索文件系统的发展及性能评估](https://juicefs.com/zh-cn/blog/user-stories/hpc-ai-file-systems-performance-development)     |
| [中山大学](https://www.sysu.edu.cn)   |  大学，AI   |  [基于 JuiceFS 构建高校 AI 存储方案：高并发、系统稳定、运维简单](https://juicefs.com/zh-cn/blog/user-stories/juicefs-vs-nfs-ai-storage)  |
| [电子科技大学](https://www.uestc.edu.cn)    |  大学，AI   |       |
| [Character.AI](https://character.ai) | GenerativeAI |              |
| [BentoML](https://bentoml.com)   | GenerativeAI   |  [BentoML：如何使用 JuiceFS 加速大模型加载](https://juicefs.com/zh-cn/blog/user-stories/bentoml-use-juicefs-accelerate-large-model-loading)          |
| [NAVER](https://www.naver.com)  | 互联网服务，AI  |  [韩国国民搜索 NAVER：为 AI 平台引入存储方案 JuiceFS](https://juicefs.com/zh-cn/blog/user-stories/naver-storage-solution-juicefs-ai-platforms)  |
| [云知声](https://www.unisound.com)     |  AI  | [AI 场景存储优化：云知声超算平台基于 JuiceFS 的存储实践](https://juicefs.com/zh-cn/blog/juicefs-support-ai-storage-at-unisound)    |
| [地平线](https://horizon.ai)   | 汽车，AI     |         |
| [卓驭科技](https://www.zyt.com/zh)  | 汽车，AI  |        |
| [理想汽车](https://www.lixiang.com)   | 汽车，大数据，AI  | [JuiceFS 在理想汽车的使用和展望](https://juicefs.com/zh-cn/blog/li-auto-with-juicefs)，<br />[理想汽车：从 Hadoop 到云原生的演进与思考](https://juicefs.com/zh-cn/blog/liauto-case-hadoop-cloudnatrive)  |
| [蔚来汽车](https://www.nio.cn)  | 汽车，AI  |         |
| [上汽集团](https://www.saicmotor.com/chinese)   | 汽车，AI  | [上汽云 x JuiceFS：iGear 用了这个小魔法，模型训练速度提升 300%](https://juicefs.com/zh-cn/blog/performance-boost-3x-on-igear-platform)      |
| [Plus.AI](https://plus.ai) | 汽车，AI  | |
| [五菱汽车](https://wuling.com)   | 汽车，大数据     |          |
| [驭势科技](https://www.uisee.com)   | 汽车，AI        |       |
| [长安汽车梧桐车联](https://www.auto-pai.com)    | AI，大数据   |            |
| [小米](https://www.mi.com)    |  消费电子，AI  | [小米云原生文件存储平台化实践：支撑 AI 训练、大模型、容器平台多项业务](https://juicefs.com/zh-cn/blog/user-stories/cloud-native-file-storage-platform-as-ai-training-large-models-container-platforms)  |
| [vivo](https://www.vivo.com)   |  AI  | [vivo AI 计算平台的轩辕文件存储实践](https://www.infoq.cn/article/3oFSOWfYGsX5h7xzsIe6)     |
| [DJI 大疆创新](https://www.dji.com/cn)    |  消费电子，AI     |            |
| [安克创新](https://cn.anker-in.com)   | 消费电子，AI      |       |
| [OPPO](https://www.oppo.com)   | 消费电子，共享文件存储      |       |
| [顺丰速运](https://www.sf-express.com)   | 物流，AI，共享文件存储   |        |
| [思谋 SmartMore](https://cn.smartmore.com)   |  AI  | [思谋科技：构建易于运维的 AI 训练平台](https://juicefs.com/zh-cn/blog/user-stories/easy-operate-ai-training-platform-storage-selection)      |
| [旷视](https://megvii.com)     | AI     |            |
| [商汤科技](https://www.sensetime.com/cn)    |  AI  |           |
| [云从科技](https://www.cloudwalk.com)   | AI    |         |
| [思必驰](https://www.aispeech.com)   |  AI   |             |
| [Clobotics](https://clobotics.com)   | 机器人，AI   | [Clobotics 计算机视觉场景存储实践：多云架构、POSIX 全兼容、低运维的统一存储](https://juicefs.com/zh-cn/blog/user-stories/clobotics-posix-multi-cloud-storage)         |
| [刻行 coSence](https://www.coscene.io)   | 机器人，AI   | [机器人行业数据闭环实践：从对象存储到 JuiceFS](https://juicefs.com/zh-cn/blog/user-stories/data-object-storag--to-juicefs)   |
| [海柔创新](https://www.hairobotics.cn)   | 机器人，AI   | [海柔仿真系统存储实践：混合云架构下实现高可用与极简运维](https://juicefs.com/zh-cn/blog/user-stories/multi-cloud-storage-high-availability)   |
| [蝉妈妈数据](https://www.chanmama.com)  |  AI  |         |
| [酷家乐](https://www.kujiale.com)    |  AI    |       |
| [TP-LINK](https://www.tp-link.com)   | AI      |       |
| [Fal](https://fal.ai) | GenerativeAI    |           |
| [Lepton AI](https://www.lepton.ai) | GenerativeAI | [加速 AI 训推：Lepton AI 如何构建多租户、低延迟云存储平台](https://juicefs.com/zh-cn/blog/user-stories/lepton-ai-build-multi-tenant-low-latency-cloud-storage-platform)          |
| [Graviti Diffus](https://www.diffus.graviti.com) | GenerativeAI |      |
| [建信金融科技](https://www.ccbft.com)   | 金融科技，AI   |          |
| [平安银行](https://pingan.com)    | 金融科技，大数据  |         |
| [同盾](https://tongdun.cn)      |  金融科技，大数据    |         |
| [尧信](https://www.yaoxinhd.com)    |  金融科技，大数据，共享文件存储   |            |
| [米筐](https://www.ricequant.com)   |  金融科技，AI  |           |
| [移动云](https://ecloud.he.chinamobile.com)    |  AI，大数据   | [移动云使用 JuiceFS 支持 Apache HBase 增效降本的探索](https://juicefs.com/zh-cn/blog/juicefs-support-hbase-at-chinamobile-cloud)     |
| [中国电信](http://www.chinatelecom.com.cn)  | 大数据  | [存算分离实践：JuiceFS 在中国电信日均 PB 级数据场景的应用](https://juicefs.com/zh-cn/blog/user-stories/applicatio-of-juicefs-in-china-telecoms-daily-average-pb-data-scenario)   |
| [火山引擎](https://www.volcengine.com)   | 共享文件存储，特效渲染 | [JuiceFS 在火山引擎边缘计算的应用实践](https://juicefs.com/zh-cn/blog/user-stories/how-juicefs-accelerates-edge-rendering-performance-in-volcengine)     |
| [金山云](https://www.ksyun.com)   | AI，大数据   | [金山云：基于 JuiceFS 的 Elasticsearch 温冷热数据管理实践](https://juicefs.com/zh-cn/blog/user-stories/juicefs-elasticsearch-cold-heat-data-management)      |
| [腾讯](https://www.tencent.com)    | 互联网服务，AI   |       |
| [百度](https://home.baidu.com/home/index)    |  互联网服务，大数据   |        |
| [知乎](https://www.zhihu.com)   |  互联网服务，大数据  | [知乎 x JuiceFS：利用 JuiceFS 给 Flink 容器启动加速](https://juicefs.com/zh-cn/blog/zhihu-flink-with-juicefs)，<br />[利用 JuiceFS 动态注入 Protobuf JAR 包](https://zhuanlan.zhihu.com/p/586120009)，<br />[知乎：多云架构下大模型训练，如何保障存储稳定性](https://juicefs.com/zh-cn/blog/user-stories/data-storage-multi-cloud-zhihu-model-training-juicefs)    |
| [好未来](https://www.100tal.com)   |  互联网服务，AI  | [好未来：多云环境下基于 JuiceFS 建设低运维模型仓库](https://juicefs.com/zh-cn/blog/user-stories/multi-cloud-storage-juicefs-model-stroage)    |
| [Shopee](https://shopee.com)    |  电商，大数据  | [Shopee x JuiceFS：ClickHouse 冷热数据分离存储架构与实践](https://juicefs.com/zh-cn/blog/shopee-clickhouse-with-juicefs)       |
| [京东](https://jd.com)   | 电商，大数据   |       |
| [Grab](https://grab.com/sg)     |  出行服务，大数据   |         |
| [深势科技](https://www.dp.tech)   |  生物科技，AI  | [深势科技分享 AI 企业多云存储架构实践](https://juicefs.com/zh-cn/blog/dptech-ai-storage-in-multi-cloud-practice)    |
| [MemVerge](https://memverge.com)   | 生物科技，共享文件存储  |  [MemVerge：小文件写入性能 5 倍于 S3FS，JuiceFS 加速生信研究](https://juicefs.com/zh-cn/blog/user-stories/memverge-s3fs-juicefs)            |
| [百图生科](https://www.biotu.com)   | 生物科技，共享文件存储  |             |
| [MDI 生物实验室](https://mdibl.org) | 生物科技，高性能文件存储 |           |
| [劳伦斯伯克利实验室](https://www.lbl.gov) | 生物科技，高性能文件存储 |             |
| [美国自然历史博物馆](https://www.amnh.org) | 非盈利组织，高性能文件存储 |     |
| [阿拉贡国家实验室](https://www.anl.gov) | 非盈利组织，高性能文件存储 |     |
| [溯源精微](https://www.geneway.cn)   | 生物科技，共享文件存储           |              |
| [国家超级计算济南中心](https://www.nsccjn.cn)    | 超算，DevOps      |           |
| [网易游戏](https://game.163.com)   |  游戏，大数据，AI  | [网易互娱出海之旅：大数据平台上云架构设计与实践](https://juicefs.com/zh-cn/blog/user-stories/hadoop-compatible-storage-big-data-cloud-platform-s3)   |
| [米哈游](https://www.mihoyo.com)   | 游戏，共享文件存储   |       |
| [嘉谊互娱](http://www.joyient.com)   |  游戏，共享文件存储，特效渲染 |         |
| [一面数据](https://www.yimian.com.cn)    | 大数据   | [一面数据：Hadoop 迁移云上架构设计与实践](https://juicefs.com/zh-cn/blog/yimiancase)    |
| [携程旅行](https://www.ctrip.com)   |  互联网服务，大数据，AI   | [突破存储数据量限制，JuiceFS 在携程海量冷数据场景下的实践](https://juicefs.com/zh-cn/blog/xiecheng-case)，[稳定且高性价比的大模型存储：携程 10PB 级 JuiceFS 工程实践](https://juicefs.com/zh-cn/blog/user-stories/trip-10pb-level-llm-stroage-juicefs-practice)    |
| [同程旅行](https://ly.com)   |  互联网服务，共享文件存储  | [从 CephFS 到 JuiceFS：同程旅行亿级文件存储平台构建之路](https://juicefs.com/zh-cn/blog/user-stories/cephfs-vs-juicefs-draco-travel-file-storage)           |
| [贝壳](https://ke.com)   |  互联网服务，AI  |  [贝壳找房：为 AI 平台打造混合多云的存储加速底座](https://juicefs.com/zh-cn/blog/user-stories/beike-ai-platform-multi-cloud-storage)        |
| [Jerry](https://getjerry.com)  | 互联网服务，大数据  | [北美科技企业 Jerry：基于 JuiceFS 构建 ClickHouse 主从架构](https://juicefs.com/zh-cn/blog/user-stories/jerry-clickhouse-read-write-separation-juicefs-primary-replica-architecture)  |
| [航天宏图](https://www.piesat.cn)   |  遥感，共享文件存储    |                |
| [天桐互动](https://www.kuaidianyuedu.com)   |  AI，共享文件存储  |             |
| [视源股份](http://www.cvte.com)   |  共享文件存储  |         |
| [多点 DMALL](https://www.dmall.com)   |  零售，大数据   | [多点 DMALL：大数据存算分离下的存储架构探索与实践](https://juicefs.com/zh-cn/blog/user-stories/separation-of-storage--computing-building-cloud-native-big-data-platform)   |
| [百胜中国](https://www.yumchina.com)   |  零售，AI   |             |
| [酷数智能](http://www.kurudata.com)   | 共享文件存储    |       |
| [朗新集团](https://www.longshine.com)   | 大数据，共享文件存储   |           |
| [网易邮箱](https://mail.163.com)   |  互联网服务，大数据    |           |
| [南昌维网科技](https://www.vwell.cn)   | 共享数据存储     |         |
| [声网](https://www.agora.io/cn)   |  大数据    |            |
| [南京鹏云网络](https://www.pengyunnetwork.cn)  | 共享文件存储    |        |
| [聚云位智 LinkoopDB](http://www.datapps.cn)   |  大数据   |          |
| [国家天文科学数据中心](https://nadc.china-vo.org) |  共享文件存储   |        |
| [艾莎医学](https://www.ashermed.com)   |  共享文件存储    |          |
| [NodeReal](https://nodereal.io)    |  共享文件存储    |             |
| [不鸣科技](https://www.boomingtech.com)    |  共享文件存储   |            |
| [博依特科技](https://www.poi-t.com)   |  大数据     |          |
| [九曳供应链](https://www.jiuyescm.com)   | 大数据  |           |
| [摩登天空](https://www.modernsky.com/home)   | 共享文件存储  |       |
| [酷狗音乐](https://www.kugou.com)   | 共享文件存储   |       |
| [东方财富](https://www.eastmoney.com)   | 共享文件存储   |       |
| [Texas A&M 大学](https://www.tamu.edu) | 大学 |     |
| [Simon Fraser 大学](https://www.sfu.ca) | 大学 |     |
| [堪培拉大学](https://www.canberra.edu.au) | 大学 |     |

欢迎你在使用 JuiceFS 后，向大家分享你的使用经验，可以直接向这个列表提交 Pull Request，或者联系我们 [`hello@juicedata.io`](mailto:hello@juicedata.io)。


================================================
FILE: docs/zh_cn/community/articles.md
================================================
---
title: JuiceFS 文章合集
sidebar_position: 2
slug: /articles
---

JuiceFS 广泛适用于各种数据存储和共享场景，本页汇总来自世界各地用户使用 JuiceFS 的实践和相关技术文章，欢迎大家共同维护这个列表。

## AI

- [韩国国民搜索 NAVER：使用 JuiceFS 打通 Hadoop 与 Kubernetes 存储实践](https://juicefs.com/zh-cn/blog/user-stories/naver-juicefs-hadoop-kubernetes-storage)，2026-02-12，Nam Kyung-wan@NAVER
- [海量小文件 + 多云协同：地瓜机器人 JuiceFS 存储优化之路](https://juicefs.com/zh-cn/blog/user-stories/horizon-robotics-juicefs-small-file-multi-cloud-optimization)，2026-02-06，赵晗@地瓜机器人
- [3D-AIGC 存储架构演进：从 NFS、GlusterFS 到 JuiceFS](https://juicefs.com/zh-cn/blog/user-stories/3d-aigc-storage-evolution-juicefs)，2026-01-05，李威宇@光影焕像
- [JuiceFS + MinIO：Ariste AI 量化投资高性能存储实践](https://juicefs.com/zh-cn/blog/user-stories/juicefs-minio-ariste-ai-quant-storage)，2025-12-08，高玉堂@Ariste AI
- [NAS、对象存储与 JuiceFS：百亿量化基金的存储选型实践](https://juicefs.com/zh-cn/blog/solutions/quant-fund-storage-selection-nas-object-juicefs)，2025-11-20，蔡敏
- [基于 JuiceFS 构建 AI 推理：多模态复杂 I/O、跨云与多租户支持](https://juicefs.com/zh-cn/blog/solutions/juicefs-ai-inference-multi-modal-cross-cloud-multi-tenant)，2025-10-17，李少杰
- [九识智能：基于 JuiceFS 的自动驾驶多云亿级文件存储](https://juicefs.com/zh-cn/blog/user-stories/intsig-juicefs-autonomous-driving-multi-cloud-storage)，2025-09-24，邓君宇@九识智能
- [稿定科技：多云架构下的 AI 存储挑战与 JuiceFS 实践](https://juicefs.com/zh-cn/blog/user-stories/gaoding-ai-storage-challenges-multi-cloud-juicefs)，2025-08-08，可加@稿定科技
- [从资源闲置到弹性高吞吐，JuiceFS 如何构建 70GB/s 吞吐的缓存池？](https://juicefs.com/zh-cn/blog/solutions/building-high-throughput-cache-pool-resilience-with-juicefs)，2025-07-25，蔡敏
- [多模态“卷王”阶跃星辰：如何利用 JuiceFS 打造高效经济的大模型存储平台](https://juicefs.com/zh-cn/blog/user-stories/stepfun-ai-use-juicefs-create-multimodal-learning-storage-platform)，2025-07-23，缪昌新@阶跃星辰
- [合合信息：基于 JuiceFS 构建统一存储，支撑 PB 级 AI 训练](https://juicefs.com/zh-cn/blog/user-stories/intsig-use-juicefs-build-unified-storage-support-pb-ai-training)，2025-07-17，唐义凡@合合信息
- [中国科学院计算所：从 NFS 到 JuiceFS，大模型训推平台存储演进之路](https://juicefs.com/zh-cn/blog/user-stories/nfs-vs-juicefs-llm-storage)，2025-05-14，孙玮
- [百图生科：基于 JuiceFS 构建生命科学大模型存储平台，成本降 90%](https://juicefs.com/zh-cn/blog/user-stories/biomap-juicefs-building-llm-storage)，2025-05-07，郑泽东@百图生科
- [稳定且高性价比的大模型存储：携程 10PB 级 JuiceFS 工程实践](https://juicefs.com/zh-cn/blog/user-stories/trip-10pb-level-llm-stroage-juicefs-practice)，2025-03-10，吴松林@携程
- [加速 AI 训推：Lepton AI 如何构建多租户、低延迟云存储平台](https://juicefs.com/zh-cn/blog/user-stories/lepton-ai-build-multi-tenant-low-latency-cloud-storage-platform)，2025-01-17，丁聪@Lepton AI
- [多云架构，JuiceFS 如何实现一致性与低延迟的数据分发？](https://juicefs.com/zh-cn/blog/solutions/juicefs-multi-cloud-consistency-low-latency)，2025-01-10，蔡敏
- [从 CephFS 到 JuiceFS：同程旅行亿级文件存储平台构建之路](https://juicefs.com/zh-cn/blog/user-stories/cephfs-vs-juicefs-draco-travel-file-storage)，2024-12-13，位传海@同程旅行
- [vivo 轩辕文件系统：AI 计算平台存储性能优化实践](https://juicefs.com/zh-cn/blog/user-stories/vivo-ai)，2024-10-25，于相洋@vivo
- [大模型存储选型 & JuiceFS 在关键环节性能详解](https://juicefs.com/zh-cn/blog/solutions/large-model-storage-performance-juicefs)，2024-10-09，李少杰
- [MiniMax：如何基于 JuiceFS 构建高性能、低成本的大模型 AI 平台？](https://juicefs.com/zh-cn/blog/user-stories/minimax-juicefs-ai)，2024-08-30
- [JuiceFS 在多云架构中加速大模型推理](https://juicefs.com/zh-cn/blog/solutions/data-storage-multi-cloud-model-training-juicefs)，2024-08-23，高昌健
- [基于 JuiceFS 构建高校 AI 存储方案：高并发、系统稳定、运维简单](https://juicefs.com/zh-cn/blog/user-stories/juicefs-vs-nfs-ai-storage)，2024-06-26，徐国昊@中山大学
- [贝壳找房：为 AI 平台打造混合多云的存储加速底座](https://juicefs.com/zh-cn/blog/user-stories/beike-ai-platform-multi-cloud-storage)，2024-06-12，王天庆@贝壳找房
- [北美科技企业 Jerry：基于 JuiceFS 构建 ClickHouse 主从架构](https://juicefs.com/zh-cn/blog/user-stories/jerry-clickhouse-read-write-separation-juicefs-primary-replica-architecture)，2024-05-17，马涛@Jerry
- [大模型存储实践：性能、成本与多云](https://juicefs.com/zh-cn/blog/solutions/large-model-storage-performance-multi-cloud)，2024-04-07，苏锐
- [知乎：多云架构下大模型训练，如何保障存储稳定性](https://juicefs.com/zh-cn/blog/user-stories/data-storage-multi-cloud-zhihu-model-training-juicefs)，2024-03-28，王新
- [BentoML：如何使用 JuiceFS 加速大模型加载](https://juicefs.com/zh-cn/blog/user-stories/bentoml-use-juicefs-accelerate-large-model-loading)，2024-02-21，管锡鹏
- [韩国国民搜索 NAVER：为 AI 平台引入存储方案 JuiceFS](https://juicefs.com/zh-cn/blog/user-stories/naver-storage-solution-juicefs-ai-platforms)，2023-12-28，Nam Kyung-wan@NAVER
- [机器人行业数据闭环实践：从对象存储到 JuiceFS](https://juicefs.com/zh-cn/blog/user-stories/data-object-storag--to-juicefs)，2023-12-13，宋巨超@刻行
- [JuiceFS 在自动驾驶行业多云架构中的实践](https://juicefs.com/zh-cn/blog/user-stories/data-storage-multi-cloud-autonomous-driving-juicefs)，2023-10-27
- [构建易于运维的 AI 训练平台：存储选型与最佳实践](https://juicefs.com/zh-cn/blog/user-stories/easy-operate-ai-training-platform-storage-selection)，2023-08-04，孙冀川@思谋科技
- [之江实验室：如何基于 JuiceFS 为超异构算力集群构建存储层](https://juicefs.com/zh-cn/blog/user-stories/high-performance-scale-out-heterogeneous-computing-power-cluster-storage)，2023-06-09，洪晨@之江实验室
- [加速 AI 训练，如何在云上实现灵活的弹性吞吐](https://juicefs.com/zh-cn/blog/solutions/accelerate-ai-training-flexible-elastic-throughput-cloud)，2023-05-06，苏锐
- [如何借助分布式存储 JuiceFS 加速 AI 模型训练](https://juicefs.com/zh-cn/blog/usage-tips/how-to-use-juicefs-to-speed-up-ai-model-training)，2023-04-25，高昌健
- [云原生数据交付平台 Kuda 在 AI 场景下的模型分发实践](https://xie.infoq.cn/article/7b41c7ab9e8bdf51e9910b8a9)，2023-01-30，Geek_c4ea78
- [vivo AI 计算平台的轩辕文件存储实践](https://www.infoq.cn/article/3oFSOWfYGsX5h7xzsIe6)，2022-10-18，彭毅格@vivo AI 计算平台团队
- [深势科技分享 AI 企业多云存储架构实践](https://juicefs.com/zh-cn/blog/user-stories/dptech-ai-storage-in-multi-cloud-practice)，2022-07-06，李样兵@深势科技
- [AI 场景存储优化：云知声超算平台基于 JuiceFS 的存储实践](https://juicefs.com/zh-cn/blog/user-stories/juicefs-support-ai-storage-at-unisound)，2022-06-28，吕冬冬@云知声
- [上汽云 x JuiceFS：iGear 用了这个小魔法，模型训练速度提升 300%](https://juicefs.com/zh-cn/blog/user-stories/performance-boost-3x-on-igear-platform)，2022-01-27，上汽云 iGear
- [PaddlePaddle x JuiceFS : 全新缓存组件，大幅加速云上飞桨分布式训练作业](https://juicefs.com/zh-cn/blog/solutions/juicefs-helps-paddlepaddle-boosting-performance)，2022-01-06，百度 PaddlePaddle 团队
- [如何在 Kubernetes 集群中玩转 Fluid + JuiceFS](https://juicefs.com/zh-cn/blog/solutions/fluid-with-juicefs)，2021-12-01，吕冬冬@云知声 & 朱唯唯@Juicedata
- [百亿级小文件存储，JuiceFS 在自动驾驶行业的最佳实践](https://juicefs.com/zh-cn/blog/user-stories/ten-billion-level-small-files-storage-juicefs-best-practice-in-the-autonomous-driving-industry)，2021-10-28，高昌健
- [初探云原生下的 AI 分布式文件系统-JuiceFS](https://mp.weixin.qq.com/s/AiI0lVgFwycmK9Rl-6KW4w)，2021-08-18，屈骏@梯度科技
- [如何借助 JuiceFS 为 AI 模型训练提速 7 倍](https://juicefs.com/blog/cn/posts/how-to-use-juicefs-to-speed-up-ai-model-training-by-7-times)

## 大数据

- [基于 JuiceFS 的大数据平台上云：存储成本省 85%，性能媲美 HDFS](https://juicefs.com/zh-cn/blog/user-stories/hdfs-to-object-storage-juicefs)，2024-01-10，JuiceFS 资深用户
- [多点 DMALL：大数据存算分离下的存储架构探索与实践](https://juicefs.com/zh-cn/blog/user-stories/separation-of-storage--computing-building-cloud-native-big-data-platform), 2023-08-16，李铭@多点
- [网易互娱出海之旅：大数据平台上云架构设计与实践](https://juicefs.com/zh-cn/blog/user-stories/hadoop-compatible-storage-big-data-cloud-platform-s3)，2023-08-09，柯维鸿@网易互娱
- [云上大数据存储：探究 JuiceFS 与 HDFS 的异同](https://juicefs.com/zh-cn/blog/engineering/similarities-and-differences-between-hdfs-and-juicefs-structures)，2023-04-04，汤友棚
- [Protobuf 在知乎大数据场景的应用，利用 JuiceFS 动态注入 JAR 包](https://zhuanlan.zhihu.com/p/586120009)，2022-11-23，胡梦宇@知乎
- [金山云：基于 JuiceFS 的 Elasticsearch 温冷热数据管理实践](https://juicefs.com/zh-cn/blog/user-stories/juicefs-elasticsearch-cold-heat-data-management)，2022-11-17，侯学峰@金山云
- [JuiceFS 替代 HDFS，苦 HDFS 小文件久矣](https://zhuanlan.zhihu.com/p/569586606)，2022-10-08，久耶供应链 大数据总监
- [JuiceFS 在 Elasticsearch/ClickHouse 温冷数据存储中的实践](https://juicefs.com/zh-cn/blog/solutions/juicefs-elasticsearch-clickhouse-hot-cold-data-storage)，2022-09-30，高昌健
- [从 Hadoop 到云原生，大数据平台如何做存算分离](https://juicefs.com/zh-cn/blog/solutions/hadoop-to-cloud-native-separation-of-compute-and-storage-for-big-data-platform)，2022-09-14，苏锐
- [理想汽车：从 Hadoop 到云原生的演进与思考](https://juicefs.com/zh-cn/blog/user-stories/li-auto-case-hadoop-cloud-native)，2022-08-30，聂磊@理想汽车
- [一面数据：Hadoop 迁移云上架构设计与实践](https://juicefs.com/zh-cn/blog/user-stories/yimiancase)，2022-07-28，刘畅&李阳良@一面数据
- [移动云使用 JuiceFS 支持 Apache HBase 增效降本的探索](https://juicefs.com/zh-cn/blog/user-stories/juicefs-support-hbase-at-chinamobile-cloud)，2022-05-31，陈海峰@移动云
- [JuiceFS 在数据湖存储架构上的探索](https://juicefs.com/zh-cn/blog/solutions/juicefs-exploration-on-data-lake-storage-architecture)，2022-04-28，高昌健
- [JuiceFS 在理想汽车的使用和展望](https://juicefs.com/zh-cn/blog/user-stories/li-auto-with-juicefs)，2022-01-21，聂磊@理想汽车
- [JuiceFS + HDFS 权限问题定位](https://mp.weixin.qq.com/s/9mIMPuljL-UxP9t7-3dKxw)，2021-12-31，李阳良@一面数据
- [知乎 x JuiceFS：利用 JuiceFS 给 Flink 容器启动加速](https://juicefs.com/zh-cn/blog/user-stories/zhihu-flink-with-juicefs)，2021-11-22，胡梦宇@知乎
- [Elasticsearch 存储成本省 60%，稿定科技干货分享](https://juicefs.com/zh-cn/blog/user-stories/gaoding-with-juicefs)，2021-10-09，稿定 SRE 团队
- [Shopee x JuiceFS：ClickHouse 冷热数据分离存储架构与实践](https://juicefs.com/zh-cn/blog/user-stories/shopee-clickhouse-with-juicefs)，2021-10-09，Teng@Shopee
- [JuiceFS on AWS EMR](https://www.youtube.com/watch?v=PFNOcqiW4-M&t=3s), Youtube video, Pahud Dev
- [JuiceFS 加速 Spark Shuffle](https://mp.weixin.qq.com/s/JGa2eYqM8db_OMU7SzZw8A)，2021-03-09，RespectM
- [JuiceFS 如何帮助趣头条超大规模 HDFS 降负载](https://juicefs.com/blog/cn/posts/qutoutiao-big-data-platform-user-case)
- [环球易购数据平台如何做到既提速又省钱？](https://juicefs.com/blog/cn/posts/globalegrow-big-data-platform-user-case)
- [JuiceFS 在大搜车数据平台的实践](https://juicefs.com/blog/cn/posts/juicefs-practice-in-souche)
- [使用 AWS Cloudformation 在 Amazon EMR 中一分钟配置 JuiceFS](https://aws.amazon.com/cn/blogs/china/use-aws-cloudformation-to-configure-juicefs-in-amazon-emr-in-one-minute)
- [使用 JuiceFS 在云上优化 Kylin 4.0 的存储性能](https://juicefs.com/blog/cn/posts/optimize-kylin-on-juicefs)
- [ClickHouse 存算分离架构探索](https://juicefs.com/blog/cn/posts/clickhouse-disaggregated-storage-and-compute-practice)
- [存算分离实践：JuiceFS 在中国电信日均 PB 级数据场景的应用](https://juicefs.com/zh-cn/blog/user-stories/applicatio-of-juicefs-in-china-telecoms-daily-average-pb-data-scenario)

## 云原生 & Kubernetes

- [加速 AI 训推：Lepton AI 如何构建多租户、低延迟云存储平台](https://juicefs.com/zh-cn/blog/user-stories/lepton-ai-build-multi-tenant-low-latency-cloud-storage-platform)，2025-01-17，丁聪@Lepton AI
- [海柔仿真系统存储实践：混合云架构下实现高可用与极简运维](https://juicefs.com/zh-cn/blog/user-stories/multi-cloud-storage-high-availability)，2024-11-08，吴森栋@海柔创新
- [好未来：多云环境下基于 JuiceFS 建设低运维模型仓库](https://juicefs.com/zh-cn/blog/user-stories/multi-cloud-storage-juicefs-model-stroage)，2024-11-06，贺龙华@好未来
- [小米云原生文件存储平台化实践：支撑 AI 训练、大模型、容器平台多项业务](https://juicefs.com/zh-cn/blog/user-stories/cloud-native-file-storage-platform-as-ai-training-large-models-container-platforms)，2023-09-22，孙佳朋@小米
- [大模型训练：K8s 环境中数千节点存储最佳实践](https://juicefs.com/zh-cn/blog/usage-tips/large-model-storage-kubernetes)，2024-09-25，朱唯唯
- [Clobotics 计算机视觉场景存储实践：多云架构、POSIX 全兼容、低运维的统一存储](https://juicefs.com/zh-cn/blog/user-stories/clobotics-posix-multi-cloud-storage)，2024-08-30，Jonnas@Clobotics
- [如何在 Kubernetes 中使用 ClickHouse 和 JuiceFS](https://juicefs.com/zh-cn/blog/usage-tips/kubernetes-clickhouse-juicefs)，2024-08-02，Vitaliy Zakaznikov
- [Kubernetes 数据持久化：从零开始使用 JuiceFS CSI Driver](https://juicefs.com/zh-cn/blog/usage-tips/kubernetes-juicefs-csi-driver)，2023-12-11，于鸿儒
- [从本地到云端：豆瓣如何使用 JuiceFS 实现统一的数据存储](https://juicefs.com/zh-cn/blog/user-stories/scalable-computing-unified-data-storage-ops-cloud-spark-k8s-juicefs)，2023-05-10，曹丰宇@豆瓣
- [Sidecar-详解 JuiceFS CSI Driver 新模式](https://juicefs.com/zh-cn/blog/usage-tips/explain-in-detail-juicefs-csi-driver-sidecar)，2023-02-22，朱唯唯
- [存储更弹性，详解 Fluid“ECI 环境数据访问”新功能](https://juicefs.com/zh-cn/blog/solutions/fluid-eci-juicefs)，2022-09-05，朱唯唯
- [基于 JuiceFS 的 KubeSphere DevOps 项目数据迁移方案](https://mp.weixin.qq.com/s/RgUHRUrL0u-J9nVqwOfS8Q)，2022-08-04，尹珉@数跑科技
- [JuiceFS 在火山引擎边缘计算的应用实践](https://juicefs.com/zh-cn/blog/user-stories/how-juicefs-accelerates-edge-rendering-performance-in-volcengine)，2023-02-17
，何兰州
- [使用 KubeSphere 应用商店 5 分钟内快速部署 JuiceFS](https://juicefs.com/zh-cn/blog/solutions/kubesphere-with-juicefs)，2021-11-19，尹珉@杭州数跑科技 & 朱唯唯@Juicedata
- [JuiceFS CSI Driver 的最佳实践](https://juicefs.com/zh-cn/blog/engineering/csi-driver-best-practices)，2021-11-08，朱唯唯
- [JuiceFS CSI Driver v0.10 全新架构解读](https://juicefs.com/zh-cn/blog/engineering/juicefs-csi-driver-v010)，2021-07-28，朱唯唯

## 数据共享

- [Ollama + JuiceFS：一次拉取，到处运行](https://juicefs.com/zh-cn/blog/usage-tips/ollama-juicefs)，2024-09-09，朱唯唯
- [Conda + JuiceFS：增强 AI 开发环境共享能力](https://juicefs.com/zh-cn/blog/usage-tips/conda-juicefs-enhance-ai)，2024-12-04，于鸿儒
- [云上使用 Stable Diffusion，模型数据如何共享和存储？](https://juicefs.com/zh-cn/blog/usage-tips/share-store-model-data-stable-diffusion-cloud)，2023-06-16，于鸿儒
- [基于 JuiceFS 搭建 Milvus 分布式集群](https://juicefs.com/blog/cn/posts/build-milvus-distributed-cluster-based-on-juicefs)
- [如何解决 NAS 单点故障还顺便省了 90% 的成本？](https://juicefs.com/blog/cn/posts/modao-replace-nas-with-juicefs)

## 数据备份、迁移与恢复

- [JuiceFS v1.3-Beta1：一亿文件备份分钟级完成，性能优化全解析](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v13-beta1-backup)，2025-05-21，黄杰烽
- [基于 JuiceFS 的低成本 Elasticsearch 云上备份存储](https://juicefs.com/zh-cn/blog/user-stories/low-cost-elasticsearch-cloud-backup-storage-juicefs)，2023-11-15，uuwang（正能量云）@杭州火石创造
- [突破存储数据量限制，JuiceFS 在携程海量冷数据场景下的实践](https://juicefs.com/zh-cn/blog/user-stories/xiecheng-case)，2022-08-29，妙成 & 小峰
- [40+ 倍提升，详解 JuiceFS 元数据备份恢复性能优化方法](https://juicefs.com/zh-cn/blog/engineering/juicefs-load-and-dump-optimization)，2022-07-13，执剑
- [利用 JuiceFS 把 MySQL 备份验证性能提升 10 倍](https://juicefs.com/blog/cn/posts/optimize-xtrabackup-prepare-by-oplog)
- [跨云数据搬迁利器：Juicesync](https://juicefs.com/blog/cn/posts/juicesync)
- [下厨房基于 JuiceFS 的 MySQL 备份实践](https://juicefs.com/blog/cn/posts/xiachufang-mysql-backup-practice-on-juicefs)
- [如何用 JuiceFS 归档备份 NGINX 日志](https://juicefs.com/blog/cn/posts/backup-nginx-logs-on-juicefs)

## 原理解析

- [JuiceFS sync 原理解析与性能优化，企业级数据同步利器](https://juicefs.com/zh-cn/blog/engineering/juicefs-sync-principle-performance-optimization)，2025-11-26，执剑
- [深入解析 JuiceFS 垃圾回收机制](https://juicefs.com/zh-cn/blog/engineering/juicefs-gc-mechanism)，2025-10-30，许誉超
- [JuiceFS writeback：写加速机制与适用场景解析](https://juicefs.com/zh-cn/blog/solutions/juicesfs-writeback-analysis)，2025-08-25，蔡敏
- [JuiceFS on Windows: 首个 Beta 版的探索与优化原理](https://juicefs.com/zh-cn/blog/engineering/juicefs-on-windows-beta)，2025-08-04，陈杰
- [JuiceFS v1.3-Beta2：Apache Ranger 集成与权限控制原理](https://juicefs.com/zh-cn/blog/release-notes/juicefs-1-3-ranger)，2025-06-06，汤友棚
- [深度解析 JuiceFS 权限管理：Linux 多种安全机制全兼容](https://juicefs.com/zh-cn/blog/engineering/linux-file-system-juicefs-access-management)，2025-06-12，黄杰烽
- [JuiceFS v1.3-Beta1：一亿文件备份分钟级完成，性能优化全解析](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v13-beta1-backup)，2025-05-21，黄杰烽
- [代码级解析：JuiceFS 元数据、数据存储设计原理](https://juicefs.com/zh-cn/blog/engineering/juicefs-metadata-data-stroage-designed)，2024-11-25，Arthur
- [JuiceFS CSI：Mount Pod 的平滑升级及其实现原理](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-csi-mount-pod-smooth-upgrade)，2024-10-30，朱唯唯
- [一文详解 JuiceFS 读性能：预读、预取、缓存、FUSE 和对象存储](https://juicefs.com/zh-cn/blog/engineering/juicefs-read-performance)，2024-07-26，莫飞虎
- [平滑升级功能详解，不停服即可更新](https://juicefs.com/zh-cn/blog/engineering/smooth-upgrade)，2024-05-07，执剑
- [JuiceFS 目录配额功能设计详解](https://juicefs.com/zh-cn/blog/engineering/design-juicefs-directory-quotas)，2023-10-09，Sandy
- [JuiceFS CSI Driver 架构设计详解](https://juicefs.com/zh-cn/blog/engineering/juicefs-csi-driver-arch-design)，2022-03-23，朱唯唯
- [JuiceFS 数据加密原理](https://juicefs.com/zh-cn/blog/engineering/juicefs-encryption)，2021-12-23，Sandy

## 教程、使用指南、评测及其他

- [JuiceFS 企业版 5.3 特性详解：单文件系统支持超 5,000 亿文件，首次引入 RDMA](https://juicefs.com/zh-cn/blog/release-notes/juicefs-enterprise-5-3-500b-files-rdma-support)，2026-01-29，Sandy
- [仅两台缓存节点，如何支撑 1.45TB/s 大吞吐业务 #JuiceFS 优化实践](https://juicefs.com/zh-cn/blog/solutions/how-2-cache-nodes-support-tbs-throughput)，2026-01-16，蔡敏
- [从 MLPerf Storage v2.0 看 AI 训练中的存储性能与扩展能力](https://juicefs.com/zh-cn/blog/engineering/juicefs-mlperf-storage-v2-ai-training-storage-performance)，2025-09-17，莫飞虎
- [实现 TB 级聚合带宽，JuiceFS 分布式缓存网络优化实践](https://juicefs.com/zh-cn/blog/engineering/tb-bandwidth-juicefs-distributed-cache-optimization)，2025-09-03，莫飞虎
- [3000 台 JuiceFS Windows 客户端性能评估](https://juicefs.com/zh-cn/blog/solutions/juicefs-windows--performance-test)，2025-08-06，蔡敏
- [探索 LanceDB 在多种存储方案下的查询效率](https://juicefs.com/zh-cn/blog/solutions/lancedb-query-performance-across-storage-solutions)，2025-07-30，白伯纯
- [JuiceFS 社区版 V1.3 正式发布：支持 Python SDK、亿级备份加速、SQL 和 Windows 全面优化](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v13-ga)，2025-07-08
- [Lustre 与 JuiceFS：架构设计、文件分布与特性比较](https://juicefs.com/zh-cn/blog/engineering/lustre-vs-juicefs)，2025-06-18，刘庆
- [JuiceFS 企业版 5.2：迈入千亿文件时代，稳定性与性能再升级，首次支持 Windows 客户端](https://juicefs.com/zh-cn/blog/release-notes/juicefs-enterprise-edition-v52)，2025-05-28
- [JuiceFS v1.3-beta1：新增 Python SDK，特定场景性能 3 倍于 FUSE](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v13-beta1-python-sdk)，2025-05-09，莫飞虎
- [JuiceFS v1.3-beta1：全面优化 SQL 数据库支持，十亿级元数据管理新选项](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v13-beta1-sql)，2025-04-23，楼方鑫
- [DeepSeek 3FS 与 JuiceFS：架构与特性比较](https://juicefs.com/zh-cn/blog/engineering/deepseek-3fs-vs-juicefs)，2025-03-18，刘庆
- [FUSE，从内核到用户态文件系统的设计之路](https://juicefs.com/zh-cn/blog/engineering/fuse-file-system-design)，2025-02-27，许誉超
- [告别“服务器繁忙”：JuiceFS 助力打造专属 DeepSeek 牧场](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-deepseek--storage)，2025-02-08，朱唯唯
- [缓存管理自动化：JuiceFS 企业版 Cache Group Operator 新特性发布](https://juicefs.com/zh-cn/blog/release-notes/juicefs-cache-group-operator)，2024-12-26，张旭辉
- [代码级解析：JuiceFS 元数据、数据存储设计原理](https://juicefs.com/zh-cn/blog/engineering/juicefs-metadata-data-stroage-designed)，2024-11-25，Arthur
- [使用 JuiceFS 快照功能实现数据库发布与端到端测试](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-snapshot-database-test)，2024-11-15，马涛@Jerry
- [详解 JuiceFS 在多云架构下的数据同步与一致性](https://juicefs.com/zh-cn/blog/solutions/juicefs-mirror)，2024-10-18
- [全新 JuiceFS Python SDK 快速上手](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-python-sdk)，2024-10-14，于鸿儒
- [Hugging Face + JuiceFS：多用户多节点环境下提升模型加载效率](https://juicefs.com/zh-cn/blog/usage-tips/huggingface-juicefs)，2024-09-29，于鸿儒
- [JuiceFS 企业版 5.1：新增可写镜像、Python SDK 多项特性，强化 AI 场景支持](https://juicefs.com/zh-cn/blog/release-notes/juicefs-enterprise-edition-v51)，2024-09-14
- [性能、成本与 POSIX 兼容性比较：JuiceFS vs EFS vs FSx for Lustre](https://juicefs.com/zh-cn/blog/engineering/juicefs-vs-efs-fsx-for-lustre)，2024-09-04，白伯纯
- [如何判断数据库和对象存储是否被 JuiceFS 使用？](https://juicefs.com/zh-cn/blog/usage-tips/database-object-storage-used-by-juicefs)，2024-08-16，于鸿儒
- [MemVerge：小文件写入性能 5 倍于 S3FS，JuiceFS 加速生信研究](https://juicefs.com/zh-cn/blog/user-stories/memverge-s3fs-juicefs)，2024-07-24，Jon Jiang@MemVerge
- [JuiceFS 直连 NFS 新功能介绍，赋能 NAS 进行 AI 训练](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-nfs-nas-ai)，2024-07-19，于鸿儒
- [SeaweedFS + TiKV 部署保姆级教程](https://juicefs.com/zh-cn/blog/usage-tips/seaweedfs-tikv)，2024-07-12，杨进豪@思谋科技
- [JuiceFS 社区版 v1.2 发布，新增企业级权限管理、平滑升级功能](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v12)，2024-06-21
- [JuiceFS S3 Gateway 新功能上手指南](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-s3-gateway)，2024-06-05，于鸿儒
- [JuiceFS POSIX ACL 权限管理上手指南](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-posix-acl-permission-management-guide)，2024-05-23，于鸿儒
- [详解 JuiceFS sync 新功能，选择性同步增强与多场景性能优化](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-sync)，2024-05-15，执剑
- [JuiceFS v1.2-beta 1: ACL 功能全解析，更精细的权限控制](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v12-beta-1-acl)，2024-04-26，黄杰烽
- [JuiceFS v1.2-beta1，Gateway 升级，多用户场景权限管理更灵活](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v12-beta1-gateway)，2024-04-22，执剑
- [如何使用 Grafana 监控文件系统状态](https://juicefs.com/zh-cn/blog/usage-tips/use-grafana-monitor-file-system-status)，2024-04-12，于鸿儒
- [在 Google Colab 中使用 JuiceFS](https://juicefs.com/zh-cn/blog/community/google-colab-juicefs)，2024-03-22，Jet
- [从 HPC 到 AI：探索文件系统的发展及性能评估](https://juicefs.com/zh-cn/blog/user-stories/hpc-ai-file-systems-performance-development)，2024-03-06，鲁蔚征
- [千卡利用率超 98%，详解 JuiceFS 在权威 AI 测试中的实现策略](https://juicefs.com/zh-cn/blog/engineering/juicefs-mlperf-test)，2024-02-28，莫飞虎
- [极限挑战：使用 Go 打造百亿级文件系统的实践之旅](https://juicefs.com/zh-cn/blog/engineering/go-build-billion-file-system)，2024-02-02，Sandy
- [详解新功能 JuiceFS CSI Dashboard：简化云上环境的问题排查流程](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-csi-dashboard)，2023-12-29，李晨曦
- [JuiceFS 用户必备的 6 个技巧](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-user-skills)，2023-11-22，于鸿儒
- [手把手教你搭建 Ceph 集群、对接 JuiceFS 文件系统](https://juicefs.com/zh-cn/blog/usage-tips/ceph-juicefs)，2023-11-20
- [JuiceFS 企业版 5.0 新特性速览](https://juicefs.com/zh-cn/blog/release-notes/juicefs-enterprise-edition-v5)，2023-11-17
- [POSIX 真的不适合对象存储吗](https://juicefs.com/zh-cn/blog/engineering/posix-object-store-suitable-file-system)，2023-10-24，于鸿儒
- [浅析 GlusterFS 与 JuiceFS 的架构异同](https://juicefs.com/zh-cn/blog/engineering/similarities-and-differences-between-glusterfs-and-juicefs-structures)，2023-08-23，Sandy
- [如何基于 JuiceFS 配置 Samba 和 NFS 共享？](https://juicefs.com/zh-cn/blog/usage-tips/configure-samba-and-nfs-shares-based-juicefs)，2023-08-04，于鸿儒
- [从架构到特性：JuiceFS 企业版首次全面解析](https://juicefs.com/zh-cn/blog/solutions/juicefs-enterprise-edition-features-vs-community-edition)，2023-06-06，高昌健
- [浅析三款大规模分布式文件系统架构设计](https://juicefs.com/zh-cn/blog/engineering/large-scale-distributed-filesystem-comparison)，2023-03-08，高昌健
- [浅析 SeaweedFS 与 JuiceFS 架构异同](https://juicefs.com/zh-cn/blog/engineering/similarities-and-differences-between-seaweedfs-and-juicefs-structures)，2023-02-10，陈杰
- [分布式文件系统 JuiceFS 测试总结](https://mp.weixin.qq.com/s/XFWQASQFt5FISip-mrYG4Q)，2022-09-13，邹秋波
- [JuiceFS 元数据引擎选型指南](https://juicefs.com/zh-cn/blog/usage-tips/juicefs-metadata-engine-selection-guide)，2022-10-12，Sandy
- [GitHub Codespaces 上分离计算和存储？ #JuiceFS 花式玩法#](https://mp.weixin.qq.com/s/geoYkruj6lkXOns7bib-qA)，2022-08-19，张俊帆
- [浅析 Redis 作为 JuiceFS 元数据引擎的优劣势](https://juicefs.com/zh-cn/blog/usage-tips/introduce-redis-as-juicefs-metadata-engine)，2022-07-22，高昌健
- [如何使用 etcd 实现分布式 /etc 目录](https://juicefs.com/zh-cn/blog/usage-tips/make-distributed-etc-directory-with-etcd-and-juicefs)，2022-06-23，朱唯唯
- [社区投稿｜小团队如何妙用 JuiceFS](https://mp.weixin.qq.com/s/AAw1I6f36h1pZjLELtQCow)，2022-04-01，timfeirg
- [在 Windows 上如何后台运行 JuiceFS](https://mp.weixin.qq.com/s/nMqCuit4zRoNCK4m-b0hxA)，2022-03-10，秦牧羊
- [JuiceFS 导出/导入元数据的优化之路](https://www.youtube.com/watch?v=MDMitDtLly4), Youtube Video
- [初探 JuiceFS](https://mp.weixin.qq.com/s/jTBAcmUiBMBvTutdOUHpcA)，2021-11-28，ahnselina
- [JuiceFS 源码阅读 - 上](https://mp.weixin.qq.com/s/mdqFJLpaJ249rUUEnRiP3Q)，2021-06-24，秦牧羊
- [JuiceFS 你应该知道的一些事](https://mp.weixin.qq.com/s/6ylBmUXy_3aQggznl65nHg)，2021-01-15，祝威廉@Kyligence

## 内容收录

如果你也想把自己的 JuiceFS 应用方案添加到这份案例列表中，可以采用以下几种投稿方式：

### GitHub 投稿

你可以通过 GitHub 创建本仓库的分支，将你的案例网页链接添加到相应的分类中，提交 Pull Request 申请，等待审核和分支合并。

### 社交媒体投稿

你可以加入 JuiceFS 官方的 [Slack 频道](https://go.juicefs.com/slack)，任何一位工作人员都可以接洽案例投稿事宜。


================================================
FILE: docs/zh_cn/community/integrations.md
================================================
---
title: 社区集成
sidebar_position: 2
slug: /integrations
---

## SDK

- [旷视科技](https://megvii.com) 团队贡献了 [Python SDK](https://github.com/megvii-research/juicefs-python)。

## AI

- [云知声](https://www.unisound.com) 团队参与开发 [Fluid](https://github.com/fluid-cloudnative/fluid) JuiceFSRuntime 缓存引擎，具体请参考[文档](https://github.com/fluid-cloudnative/fluid/blob/master/docs/zh/samples/juicefs_runtime.md) 。
- [PaddlePaddle](https://github.com/paddlepaddle/paddle) 团队已将 JuiceFS 缓存加速特性集成到 [Paddle Operator](https://github.com/PaddleFlow/paddle-operator) 中，具体请参考[文档](https://github.com/PaddleFlow/paddle-operator/blob/sampleset/docs/zh_CN/ext-overview.md)。
- 通过 JuiceFS 可以轻松搭建一个 [Milvus](https://milvus.io) 向量搜索引擎，Milvus 团队已经撰写了官方 [案例](https://zilliz.com/blog/building-a-milvus-cluster-based-on-juicefs) 与 [教程](https://tutorials.milvus.io/en-juicefs/index.html?index=..%2F..index#0)。

## 大数据

- 大数据 OLAP 分析引擎 [Apache Kylin 4.0](http://kylin.apache.org) 可以使用 JuiceFS 在所有公有云上轻松部署存储计算分离架构的集群，请看 [视频分享](https://www.bilibili.com/video/BV1c54y1W72S) 和 [案例文章](https://juicefs.com/zh-cn/blog/optimize-kylin-on-juicefs)。
- [Apache Hudi](https://hudi.apache.org) 自 v0.10.0 版本开始支持 JuiceFS，你可以参考[官方文档](https://hudi.apache.org/docs/jfs_hoodie)了解如何配置 JuiceFS。

## DevOps

- [Terraform Provider for JuiceFS](https://github.com/toowoxx/terraform-provider-juicefs) 由 Toowoxx IT GmbH 贡献，他们是一家来自德国的 IT 服务公司。

## Alfred

JuiceFS 文档站集成了 Alfred，可以快速搜索 JuiceFS 文档。

![JuiceFS Alfred Workflow](../images/workflow-root.png)

只需在 Alfred 中输入关键字（默认：jfs）并提供查询即可查看 JuiceFS 文档的即时搜索结果。

### 安装

安装 Alfred 5 的 JuiceFS workflow： [下载最新版本](https://github.com/zwwhdls/juicefs-alfred-workflow/releases/download/v0.2.0/JuiceFS.Search.alfredworkflow)

### 使用

可以搜索 JuiceFS 的所有文档，包括社区、企业和 CSI：

```
# 查询 JuiceFS 社区版文档
jfs ce <search>
# 查询 JuiceFS 企业版文档
jfs ee <search>
# 查询 JuiceFS CSI 文档
jfs csi <search>
```

![JuiceFS Alfred Workflow demo](../images/workflow-demo.gif)

### Workflow 可配置的变量

- `API_KEY`：JuiceFS 文档使用的 algolia 的 API 密钥，使用默认值就可以。
- `LANGUAGE`：要搜索的 JuiceFS 文档的语言（en/zh），默认为 en。
- `HITS_PER_PAGE`：每次搜索的点击量，默认值为 10。

![JuiceFS Alfred Workflow configuration](../images/configuration.png)


================================================
FILE: docs/zh_cn/community/usage_tracking.md
================================================
---
title: 用量上报
sidebar_position: 4
---

JuiceFS 默认会收集并上报 **「匿名」** 的使用数据。这些数据仅仅包含核心指标（如版本号、文件系统大小），不会包含任何用户信息或者敏感数据。你可以查看[这里](https://github.com/juicedata/juicefs/blob/main/pkg/usage/usage.go)检查相关代码。

这些数据帮助我们理解社区如何使用这个项目。你可以简单地通过 `--no-usage-report` 选项关闭用量上报：

```
juicefs mount --no-usage-report
```


================================================
FILE: docs/zh_cn/deployment/_share_via_nfs.md
================================================
---
sidebar_label: 配置 NFS 共享
sidebar_position: 5
---
# 通过 NFS 共享 JuiceFS 存储


================================================
FILE: docs/zh_cn/deployment/_share_via_smb.md
================================================
---
sidebar_label: 配置 SMB 共享
sidebar_position: 6
---
# 通过 SMB 共享 JuiceFS 存储


================================================
FILE: docs/zh_cn/deployment/automation.md
================================================
---
title: 自动化部署
sidebar_position: 7
---

面对大量节点需要安装并挂载 JuiceFS 时，可以用本章介绍的方法进行自动化部署。

下方示范仅用于挂载，因此你需要提前[创建好 JuiceFS 文件系统](../getting-started/standalone.md#juicefs-format)。

## Ansible

使用 [Ansible](https://ansible.com) 在本机挂载 JuiceFS 文件系统的 playbook 样例如下：

```yaml
- hosts: localhost
  tasks:
    - set_fact:
        # 根据实际情况修改
        meta_url: sqlite3:///tmp/myjfs.db
        jfs_path: /jfs
        jfs_pkg: /tmp/juicefs-ce.tar.gz
        jfs_bin_dir: /usr/local/bin

    - get_url:
        # 根据实际情况替换成需要的下载链接
        url: https://d.juicefs.com/juicefs/releases/download/v1.0.2/juicefs-1.0.2-linux-amd64.tar.gz
        dest: "{{jfs_pkg}}"

    - ansible.builtin.unarchive:
        src: "{{jfs_pkg}}"
        dest: "{{jfs_bin_dir}}"
        include:
          - juicefs

    - name: Create symbolic for fstab
      ansible.builtin.file:
        src: "{{jfs_bin_dir}}/juicefs"
        dest: "/sbin/mount.juicefs"
        state: link

    - name: Mount JuiceFS and create fstab entry
      mount:
        path: "{{jfs_path}}"
        src: "{{meta_url}}"
        fstype: juicefs
        opts: _netdev
        state: mounted
```


================================================
FILE: docs/zh_cn/deployment/hadoop_java_sdk.md
================================================
---
title: 在 Hadoop 生态使用 JuiceFS
sidebar_position: 3
slug: /hadoop_java_sdk
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

JuiceFS 提供与 HDFS 接口[高度兼容](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/filesystem/introduction.html)的 Java 客户端，Hadoop 生态中的各种应用都可以在不改变代码的情况下，平滑地使用 JuiceFS 存储数据。

## 环境要求

### 1. Hadoop 及相关组件

JuiceFS Hadoop Java SDK 同时兼容 Hadoop 2.x、Hadoop 3.x，以及 Hadoop 生态中的各种主流组件。

### 2. 用户权限

JuiceFS 默认使用本地的「用户／UID」及「用户组／GID」映射，在分布式环境下使用时，为了避免权限问题，请参考[文档](../administration/sync_accounts_between_multiple_hosts.md)将需要使用的「用户／UID」及「用户组／GID」同步到所有 Hadoop 节点。也可以通过定义一个全局的用户和用户组文件使得集群中的所有节点共享权限配置，相关配置请查看[这里](#其它配置)。

### 3. 文件系统

通过 JuiceFS Java 客户端为 Hadoop 生态提供存储，需要提前创建 JuiceFS 文件系统。部署 Java 客户端时，在配置文件中指定已创建文件系统的元数据引擎地址。

创建文件系统可以参考 [JuiceFS 快速上手指南](../getting-started/installation.md)。

:::note 注意
如果要在分布式环境中使用 JuiceFS，创建文件系统时，请合理规划要使用的对象存储和数据库，确保它们可以被每个集群节点正常访问。
:::

### 4. 内存资源

根据计算任务（如 Spark executor）的读写负载，JuiceFS Hadoop Java SDK 可能需要额外使用 4 * [`juicefs.memory-size`](#io-配置) 的堆外内存用来加速读写性能。默认情况下，建议为计算任务至少配置 1.2GB 的堆外内存。

### 5. Java 运行时版本

JuiceFS Hadoop Java SDK 默认使用 JDK 8 编译，如果需要在高版本的 Java 运行时中使用（如 Java 17），需在 JVM 参数中增加以下选项以允许使用反射 API：

```shell
--add-exports=java.base/sun.nio.ch=ALL-UNNAMED
```

更多关于以上选项的说明请参考[官方文档](https://docs.oracle.com/en/java/javase/17/migrate/migrating-jdk-8-later-jdk-releases.html#GUID-7BB28E4D-99B3-4078-BDC4-FC24180CE82B)。

## 安装与编译客户端

### 安装预编译客户端

请参考[「安装」](../getting-started/installation.md#install-the-pre-compiled-client)文档了解如何下载预编译的 JuiceFS Hadoop Java SDK。

### 手动编译客户端

:::note 注意
不论为哪个系统环境编译客户端，编译后的 JAR 文件都为相同的名称，且只能部署在匹配的系统环境中，例如在 Linux 中编译则只能用于 Linux 环境。另外，由于编译的包依赖 glibc，建议尽量使用低版本的系统进行编译，这样可以获得更好的兼容性。
:::

编译依赖以下工具：

- [Go](https://golang.org) 1.15+（中国用户建议使用 [Goproxy China 镜像加速](https://github.com/goproxy/goproxy.cn)）
- JDK 8+
- [Maven](https://maven.apache.org) 3.3+（中国用户建议使用[阿里云镜像加速](https://maven.aliyun.com)）
- Git
- make
- GCC 5.4+

#### Linux 和 macOS

克隆仓库：

```shell
git clone https://github.com/juicedata/juicefs.git
```

进入目录，执行编译：

```shell
cd juicefs/sdk/java
make
```

:::note 注意
如果使用 Ceph 的 RADOS 作为 JuiceFS 的存储引擎，需要先安装 `librados-dev` 包。
:::

```shell
cd juicefs/sdk/java
make ceph
```

编译完成后，可以在 `sdk/java/target` 目录中找到编译好的 `JAR` 文件，包括两个版本：

- 包含第三方依赖的包：`juicefs-hadoop-X.Y.Z.jar`
- 不包含第三方依赖的包：`original-juicefs-hadoop-X.Y.Z.jar`

建议使用包含第三方依赖的版本。

#### Windows

用于 Windows 环境的客户端需要在 Linux 或 macOS 系统上通过交叉编译的方式获得，编译依赖 [mingw-w64](https://www.mingw-w64.org)，需要提前安装。

与编译面向 Linux 和 macOS 客户端的步骤相同，比如在 Ubuntu 系统上，先安装 `mingw-w64` 包，解决依赖问题：

```shell
sudo apt install mingw-w64
```

克隆并进入 JuiceFS 源代码目录，执行以下代码进行编译：

```shell
cd juicefs/sdk/java
```

```shell
make win
```

## 部署客户端

让 Hadoop 生态各组件能够正确识别 JuiceFS，需要进行以下配置：

1. 将编译好的 JAR 文件和 `$JAVA_HOME/lib/tools.jar` 放置到组件的 `classpath` 内，常见大数据平台和组件的安装路径见下表。
2. 将 JuiceFS 相关配置写入配置文件（通常是 `core-site.xml`），详见[客户端配置参数](#客户端配置参数)。

建议将 JAR 文件放置在一个统一的位置，其他位置通过符号链接进行调用。

### 大数据平台

| 名称           | 安装路径                                                                                                                                                                                                                                                                                                                   |
| ----           | ----                                                                                                                                                                                                                                                                                                                       |
| CDH            | `/opt/cloudera/parcels/CDH/lib/hadoop/lib`<br></br>`/opt/cloudera/parcels/CDH/spark/jars`<br></br>`/var/lib/impala`                                                                                                                                                                                                                  |
| HDP            | `/usr/hdp/current/hadoop-client/lib`<br></br>`/usr/hdp/current/hive-client/auxlib`<br></br>`/usr/hdp/current/spark2-client/jars`                                                                                                                                                                                                     |
| Amazon EMR     | `/usr/lib/hadoop/lib`<br></br>`/usr/lib/spark/jars`<br></br>`/usr/lib/hive/auxlib`                                                                                                                                                                                                                                                   |
| 阿里云 EMR     | `/opt/apps/ecm/service/hadoop/*/package/hadoop*/share/hadoop/common/lib`<br></br>`/opt/apps/ecm/service/spark/*/package/spark*/jars`<br></br>`/opt/apps/ecm/service/presto/*/package/presto*/plugin/hive-hadoop2`<br></br>`/opt/apps/ecm/service/hive/*/package/apache-hive*/lib`<br></br>`/opt/apps/ecm/service/impala/*/package/impala*/lib` |
| 腾讯云 EMR     | `/usr/local/service/hadoop/share/hadoop/common/lib`<br></br>`/usr/local/service/presto/plugin/hive-hadoop2`<br></br>`/usr/local/service/spark/jars`<br></br>`/usr/local/service/hive/auxlib`                                                                                                                                              |
| UCloud UHadoop | `/home/hadoop/share/hadoop/common/lib`<br></br>`/home/hadoop/hive/auxlib`<br></br>`/home/hadoop/spark/jars`<br></br>`/home/hadoop/presto/plugin/hive-hadoop2`                                                                                                                                                                             |
| 百度云 EMR     | `/opt/bmr/hadoop/share/hadoop/common/lib`<br></br>`/opt/bmr/hive/auxlib`<br></br>`/opt/bmr/spark2/jars`                                                                                                                                                                                                                              |

### 社区开源组件

| 名称        | 安装路径                                                                                    |
|-----------|-----------------------------------------------------------------------------------------|
| Hadoop    | `${HADOOP_HOME}/share/hadoop/common/lib/`, `${HADOOP_HOME}/share/hadoop/mapreduce/lib/` |
| Spark     | `${SPARK_HOME}/jars`                                                                    |
| Presto    | `${PRESTO_HOME}/plugin/hive-hadoop2`                                                    |
| Trino     | `${TRINO_HOME}/plugin/hive`                                                             |
| Flink     | `${FLINK_HOME}/lib`                                                                     |
| StarRocks | `${StarRocks_HOME}/fe/lib/`, `${StarRocks_HOME}/be/lib/hadoop/common/lib`               |

### 客户端配置参数

请参考以下表格设置 JuiceFS 文件系统相关参数，并写入配置文件，一般是 `core-site.xml`。

#### 核心配置

| 配置项                           | 默认值                       | 描述                                                                                                                                                                                                 |
| -------------------------------- | ---------------------------- | ------------------------------------------------------------                                                                                                                                         |
| `fs.jfs.impl`                    | `io.juicefs.JuiceFileSystem` | 指定要使用的存储实现，默认使用 `jfs://` 作为 scheme。如想要使用其它 scheme（例如 `cfs://`），则修改为 `fs.cfs.impl` 即可。无论使用的 scheme 是什么，访问的都是 JuiceFS 中的数据。                    |
| `fs.AbstractFileSystem.jfs.impl` | `io.juicefs.JuiceFS`         | 指定要使用的存储实现，默认使用 `jfs://` 作为 scheme。如想要使用其它 scheme（例如 `cfs://`），则修改为 `fs.AbstractFileSystem.cfs.impl` 即可。无论使用的 scheme 是什么，访问的都是 JuiceFS 中的数据。 |
| `juicefs.meta`                   |                              | 指定预先创建好的 JuiceFS 文件系统的元数据引擎地址。可以通过 `juicefs.{vol_name}.meta` 格式为客户端同时配置多个文件系统。具体请参考[「多文件系统配置」](#多文件系统配置)。                            |

#### 缓存配置

| 配置项                          | 默认值    | 描述                                                                                                                                                                                                                                                                                                                                                            |
|------------------------------|--------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `juicefs.cache-dir`          |        | 设置本地缓存目录，可以指定多个文件夹，用冒号 `:` 分隔，也可以使用通配符（比如 `*` ）。**请预先创建好这些目录，并给予 `0777` 权限，便于多个应用共享缓存数据。**                                                                                                                                                                                                                                                                    |
| `juicefs.cache-size`         | 0      | 设置本地缓存目录的容量，单位 MiB，默认为 0，即不开启缓存。如果配置了多个缓存目录，该值代表所有缓存目录容量的总和。                                                                                                                                                                                                                                                                                                  |
| `juicefs.cache-full-block`   | `true` | 是否缓存所有读取的数据块，`false` 表示只缓存随机读的数据块。                                                                                                                                                                                                                                                                                                                            |
| `juicefs.free-space`         | 0.1    | 本地缓存目录的最小可用空间比例，默认保留 10% 剩余空间。                                                                                                                                                                                                                                                                                                                                |
| `juicefs.open-cache`         | 0      | 缓存打开的文件元数据（单位：秒），0 表示关闭                                                                                                                                                                                                                                                                                                                                       |
| `juicefs.attr-cache`         | 0      | 目录和文件属性缓存的过期时间（单位：秒）                                                                                                                                                                                                                                                                                                                                          |
| `juicefs.entry-cache`        | 0      | 文件项缓存的过期时间（单位：秒）                                                                                                                                                                                                                                                                                                                                              |
| `juicefs.dir-entry-cache`    | 0      | 目录项缓存的过期时间（单位：秒）                                                                                                                                                                                                                                                                                                                                              |
| `juicefs.discover-nodes-url` |        | 指定发现集群缓存节点列表的方式，每 10 分钟刷新一次。<br/><br/><ul><li>YARN：`yarn`</li><li>Spark Standalone：`http://spark-master:web-ui-port/json/`</li><li>Spark ThriftServer：`http://thrift-server:4040/api/v1/applications/`</li><li>Presto：`http://coordinator:discovery-uri-port/v1/service/presto/`</li><li>文件系统：`jfs://{VOLUME}/etc/nodes`，需手动建立此文件，并将节点的主机名一条一行写入此文件</li></ul> |

#### I/O 配置

| 配置项                      | 默认值     | 描述                     |
|--------------------------|---------|------------------------|
| `juicefs.max-uploads`    | 20      | 上传数据的最大连接数             |
| `juicefs.max-downloads`  | 200     | 下载连接的最大数量       |
| `juicefs.max-deletes`    | 10      | 删除数据的最大连接数             |
| `juicefs.get-timeout`    | 5       | 下载一个对象的超时时间，单位为秒。      |
| `juicefs.put-timeout`    | 60      | 上传一个对象的超时时间，单位为秒。      |
| `juicefs.memory-size`    | 300     | 读写数据的缓冲区最大空间，单位为 MiB。  |
| `juicefs.prefetch`       | 1       | 预读数据块的线程数              |
| `juicefs.upload-limit`   | 0       | 上传带宽限制，单位为 Mbps，默认不限制。 |
| `juicefs.download-limit` | 0       | 下载带宽限制，单位为 Mbps，默认不限制。 |
| `juicefs.io-retries`     | 10      | IO 失败重试次数              |
| `juicefs.writeback`      | `false` | 是否后台异步上传数据             |

#### 其它配置

| 配置项                       | 默认值          | 描述                                                                                                          |
|---------------------------|--------------|-------------------------------------------------------------------------------------------------------------|
| `juicefs.bucket`          |              | 为对象存储指定跟格式化时不同的访问地址                                                                                         |
| `juicefs.debug`           | `false`      | 是否开启 debug 日志                                                                                               |
| `juicefs.access-log`      |              | 访问日志的路径。需要所有应用都有写权限，可以配置为 `/tmp/juicefs.access.log`。该文件会自动轮转，保留最近 7 个文件。                                    |
| `juicefs.superuser`       | `hdfs`       | 超级用户                                                                                                        |
| `juicefs.supergroup`      | `supergroup` | 超级用户组                                                                                                       |
| `juicefs.users`           | `null`       | 用户名以及 UID 列表文件的地址，比如 `jfs://name/etc/users`。文件格式为 `<username>:<UID>`，一行一个用户。                                |
| `juicefs.groups`          | `null`       | 用户组、GID 以及组成员列表文件的地址，比如 `jfs://name/etc/groups`。文件格式为 `<group-name>:<GID>:<username1>,<username2>`，一行一个用户组。 |
| `juicefs.umask`           | `null`       | 创建文件和目录的 umask 值（如 `0022`），如果没有此配置，默认值是 `fs.permissions.umask-mode`。                                        |
| `juicefs.push-gateway`    |              | [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) 地址，格式为 `<host>:<port>`。                 |
| `juicefs.push-auth`       |              | [Prometheus 基本认证](https://prometheus.io/docs/guides/basic-auth)信息，格式为 `<username>:<password>`。              |
| `juicefs.push-graphite`   |              | [Graphite](https://graphiteapp.org) 地址，格式为 `<host>:<port>`。                                                 |
| `juicefs.push-interval`   | 10           | 指标推送的时间间隔，单位为秒。                                                                                             |
| `juicefs.push-labels`     |              | 指标额外标签，格式为 `key1:value1;key2:value2`。                                                                       |
| `juicefs.fast-resolve`    | `true`       | 是否开启快速元数据查找（通过 Redis Lua 脚本实现）                                                                              |
| `juicefs.no-usage-report` | `false`      | 是否上报数据。仅上版本号等使用量数据，不包含任何用户信息。                                                                               |
| `juicefs.block.size`      | `134217728`  | 单位为字节，同 HDFS 的 `dfs.blocksize`，默认 128 MB                                                                    |
| `juicefs.file.checksum`   | `false`      | DistCp 使用 `-update` 参数时，是否计算文件 Checksum                                                                     |
| `juicefs.no-bgjob`        | `false`      | 是否关闭后台任务（清理、备份等）                                                                                            |
| `juicefs.backup-meta`     | 3600         | 自动将 JuiceFS 元数据备份到对象存储间隔（单位：秒），设置为 0 关闭自动备份                                                                 |
|`juicefs.backup-skip-trash`| `false`      | 备份元数据时忽略回收站中的文件和目录。                                                                                         |
| `juicefs.heartbeat`       | 12           | 客户端和元数据引擎之间的心跳间隔（单位：秒），建议所有客户端都设置一样                                                                         |
| `juicefs.skip-dir-mtime`  | 100ms        | 修改父目录 mtime 间隔。                                                                                             |
| `juicefs.subdir`          |              | 仅允许访问此目录的子路径。可以指定多个路径，使用逗号分隔。所有其他路径，包括根目录或同级目录，都将被拒绝访问。                                           |

#### 多文件系统配置

当需要同时使用多个 JuiceFS 文件系统时，上述所有配置项均可对特定文件系统进行指定，只需要将文件系统名字放在配置项的中间，比如下面示例中的 `jfs1` 和 `jfs2`：

```xml
<property>
  <name>juicefs.jfs1.meta</name>
  <value>redis://jfs1.host:port/1</value>
</property>
<property>
  <name>juicefs.jfs2.meta</name>
  <value>redis://jfs2.host:port/1</value>
</property>
```

#### 配置示例

以下是一个常用的配置示例，请替换 `juicefs.meta` 配置中的 `{HOST}`、`{PORT}` 和 `{DB}` 变量为实际的值。

```xml
<property>
  <name>fs.jfs.impl</name>
  <value>io.juicefs.JuiceFileSystem</value>
</property>
<property>
  <name>fs.AbstractFileSystem.jfs.impl</name>
  <value>io.juicefs.JuiceFS</value>
</property>
<property>
  <name>juicefs.meta</name>
  <value>redis://{HOST}:{PORT}/{DB}</value>
</property>
<property>
  <name>juicefs.cache-dir</name>
  <value>/data*/jfs</value>
</property>
<property>
  <name>juicefs.cache-size</name>
  <value>1024</value>
</property>
<property>
  <name>juicefs.access-log</name>
  <value>/tmp/juicefs.access.log</value>
</property>
```

## Hadoop 环境配置

请参照前述各项配置表，将配置参数加入到 Hadoop 配置文件 `core-site.xml` 中。

### CDH6

如果使用的是 CDH 6 版本，除了修改 `core-site` 外，还需要通过 YARN 服务界面修改 `mapreduce.application.classpath`，增加：

```shell
$HADOOP_COMMON_HOME/lib/juicefs-hadoop.jar
```

### HDP

除了修改 `core-site` 外，还需要通过 MapReduce2 服务界面修改配置 `mapreduce.application.classpath`，在末尾增加（变量无需替换）：

```shell
/usr/hdp/${hdp.version}/hadoop/lib/juicefs-hadoop.jar
```

### Flink

将配置参数加入 `conf/flink-conf.yaml`。如果只是在 Flink 中使用 JuiceFS, 可以不在 Hadoop 环境配置 JuiceFS，只需要配置 Flink 客户端即可。

#### 在阿里云实时平台 Flink SQL 使用 JuiceFS

1. 创建 Maven 项目，根据 Flink 不同版本引入如下依赖

   ```xml
   <dependencies>
       <dependency>
           <groupId>io.juicefs</groupId>
           <artifactId>juicefs-hadoop</artifactId>
           <version>{JUICEFS_HADOOP_VERSION}</version>
       </dependency>

       <!-- for flink-1.13 -->
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-runtime-blink_2.12</artifactId>
           <version>1.13.5</version>
           <scope>provided</scope>
       </dependency>

       <!-- for flink-1.15 -->
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-common</artifactId>
           <version>1.15.2</version>
       <scope>provided</scope>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-connector-files</artifactId>
           <version>1.15.2</version>
           <scope>provided</scope>
       </dependency>
   </dependencies>
   ```

2. 创建一个 Java class

   ```java
   public class JuiceFileSystemTableFactory extends FileSystemTableFactory {
     @Override
     public String factoryIdentifier() {
       return "juicefs";
     }
   }
   ```

3. Flink table connector 是使用 Java’s Service Provider Interfaces (SPI) 加载自定义实现。
在 resources 按照如下结构创建文件

   ```
   ## for flink-1.13
   src/main/resources
   ├── META-INF
   │   └── services
   │        └── org.apache.flink.table.factories.Factory
   ```

   `org.apache.flink.table.factories.Factory` 文件内容：

   ```
   {YOUR_PACKAGE}.JuiceFileSystemTableFactory
   ```

4. 将填写有 JuiceFS 配置的 core-site.xml 放到 src/main/resources 内：

   ```xml
   <configuration>
       <property>
           <name>fs.juicefs.impl</name>
           <value>io.juicefs.JuiceFileSystem</value>
       </property>
       <property>
           <name>juicefs.meta</name>
           <value>redis://xxx.redis.rds.aliyuncs.com:6379/0</value>
       </property>
       ...
   </configuration>
   ```

   :::note 注意
   由于 `jfs://` scheme 被阿里其他文件系统占用，所以需要配置 `fs.juicefs.impl` 类为 JuiceFS 的实现类，并在后续路径使用 `juicefs://` 协议。
   :::

5. 打包，确保 JAR 内包含 resources 目录下内容
6. 通过阿里云实时计算平台控制台->应用->作业开发->connectors 界面上传 JAR 文件
7. 测试，将如下 SQL 上线运行，可以在 JuiceFS 的 `tmp/tbl` 目录下发现写入内容

   ```sql
   CREATE TEMPORARY TABLE datagen_source(
     name VARCHAR
   ) WITH (
     'connector' = 'datagen',
     'number-of-rows' = '100'
   );

   CREATE TEMPORARY TABLE jfs_sink (name string)
   with (
       'connector' = 'juicefs', 'path' = 'juicefs://{VOL_NAME}/tmp/tbl', 'format' = 'csv'
   );

   INSERT INTO jfs_sink
   SELECT
     name
   from datagen_source;
   ```

### Hudi

:::note 注意
Hudi 自 v0.10.0 版本开始支持 JuiceFS，请确保使用正确的版本。
:::

请参考[「Hudi 官方文档」](https://hudi.apache.org/docs/jfs_hoodie)了解如何配置 JuiceFS。

### Kafka Connect

可以使用 Kafka Connect 和 HDFS Sink Connector（[HDFS 2](https://docs.confluent.io/kafka-connect-hdfs/current/overview.html)、[HDFS 3](https://docs.confluent.io/kafka-connect-hdfs3-sink/current/overview.html)）将数据落盘存储到 JuiceFS。

首先需要将 JuiceFS 的 SDK 添加到 Kafka Connect 的 `classpath` 内，如 `/usr/share/java/confluentinc-kafka-connect-hdfs/lib`。

在新建 Connect Sink 任务时，做如下配置：

- 指定 `hadoop.conf.dir` 为包含 `core-site.xml` 配置文件的目录，若没有运行在 Hadoop 环境，可创建一个单独目录，如 `/usr/local/juicefs/hadoop`，然后将与 JuiceFS 相关的配置添加到 `core-site.xml`。
- 指定 `store.url` 为以 `jfs://` 开头的路径

举例：

```ini
# 省略其他配置项...
hadoop.conf.dir=/path/to/hadoop-conf
store.url=jfs://path/to/store
```

### HBase

JuiceFS 适合存储 HBase 的 HFile，但不适合用来保存它的事务日志（WAL），因为将日志持久化到对象存储的时间会远高于持久化到 HDFS 的 DataNode 的内存中。

建议部署一个小的 HDFS 集群来存放 WAL，HFile 文件则存储在 JuiceFS 上。

#### 新建 HBase 集群

修改 `hbase-site.xml` 配置：

```xml title="hbase-site.xml"
<property>
  <name>hbase.rootdir</name>
  <value>jfs://{vol_name}/hbase</value>
</property>
<property>
  <name>hbase.wal.dir</name>
  <value>hdfs://{ns}/hbase-wal</value>
</property>
```

#### 修改原有 HBase 集群

除了修改上述配置项外，由于 HBase 集群已经在 ZooKeeper 里存储了部分数据，为了避免冲突，有以下两种方式解决：

1. 删除原集群

   通过 ZooKeeper 客户端删除 `zookeeper.znode.parent` 配置的 znode（默认 `/hbase`）。

   :::note 注意
   此操作将会删除原有 HBase 上面的所有数据
   :::

2. 使用新的 znode

   保留原 HBase 集群的 znode，以便后续可以恢复。然后为 `zookeeper.znode.parent` 配置一个新的值：

   ```xml title="hbase-site.xml"
   <property>
     <name>zookeeper.znode.parent</name>
     <value>/hbase-jfs</value>
   </property>
   ```

### 重启服务

当需要使用以下组件访问 JuiceFS 数据时，需要重启相关服务。

:::note 注意
在重启之前需要保证 JuiceFS 配置已经写入配置文件，通常可以查看机器上各组件配置的 `core-site.xml` 里面是否有 JuiceFS 相关配置。
:::

| 组件名 | 服务名                     |
| ------ | -------------------------- |
| Hive   | HiveServer<br />Metastore  |
| Spark  | ThriftServer               |
| Presto | Coordinator<br />Worker    |
| Impala | Catalog Server<br />Daemon |
| HBase  | Master<br />RegionServer   |

HDFS、Hue、ZooKeeper 等服务无需重启。

若访问 JuiceFS 出现 `Class io.juicefs.JuiceFileSystem not found` 或 `No FilesSystem for scheme: jfs` 错误，请参考 [FAQ](#faq)。

### 回收站

JuiceFS Hadoop Java SDK 同样也有和 HDFS 一样的回收站功能，需要通过设置 `fs.trash.interval` 和 `fs.trash.checkpoint.interval` 开启，请参考 [HDFS 文档](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html#File_Deletes_and_Undeletes)了解更多信息。

## 环境验证

JuiceFS Java 客户端部署完成以后，可以采用以下方式验证部署是否成功。

### Hadoop CLI

```bash
hadoop fs -ls jfs://{JFS_NAME}/
```

:::info 说明
这里的 `JFS_NAME` 是创建 JuiceFS 文件系统时指定的名称。
:::

### Hive

```sql
CREATE TABLE IF NOT EXISTS person
(
  name STRING,
  age INT
) LOCATION 'jfs://{JFS_NAME}/tmp/person';
```

### Java/Scala 项目

1. 新增 Maven 或 Gradle 依赖：

   <Tabs>
     <TabItem value="maven" label="Maven">

   ```xml
   <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
       <version>{HADOOP_VERSION}</version>
       <scope>provided</scope>
   </dependency>
   <dependency>
       <groupId>io.juicefs</groupId>
       <artifactId>juicefs-hadoop</artifactId>
       <version>{JUICEFS_HADOOP_VERSION}</version>
       <scope>provided</scope>
   </dependency>
   ```

     </TabItem>
     <TabItem value="gradle" label="Gradle">

   ```groovy
   dependencies {
     implementation 'org.apache.hadoop:hadoop-common:${hadoopVersion}'
     implementation 'io.juicefs:juicefs-hadoop:${juicefsHadoopVersion}'
   }
   ```

     </TabItem>
   </Tabs>

2. 使用以下示例代码验证：

<!-- autocorrect: false -->
   ```java
   package demo;

   import org.apache.hadoop.conf.Configuration;
   import org.apache.hadoop.fs.FileStatus;
   import org.apache.hadoop.fs.FileSystem;
   import org.apache.hadoop.fs.Path;

   public class JuiceFSDemo {
       public static void main(String[] args) throws Exception {
           Configuration conf = new Configuration();
           conf.set("fs.jfs.impl", "io.juicefs.JuiceFileSystem");
           conf.set("juicefs.meta", "redis://127.0.0.1:6379/0");  // JuiceFS 元数据引擎地址
           Path p = new Path("jfs://{JFS_NAME}/");  // 请替换 {JFS_NAME} 为正确的值
           FileSystem jfs = p.getFileSystem(conf);
           FileStatus[] fileStatuses = jfs.listStatus(p);
           // 遍历 JuiceFS 文件系统并打印文件路径
           for (FileStatus status : fileStatuses) {
               System.out.println(status.getPath());
           }
       }
   }
   ```
<!-- autocorrect: true -->

## 监控指标收集

请查看[「监控」](../administration/monitoring.md)文档了解如何收集及展示 JuiceFS 监控指标

## 从 HDFS 迁移数据到 JuiceFS

从 HDFS 迁移数据到 JuiceFS，一般是使用 DistCp 来拷贝数据，它支持数据校验 (Checksum) 来保证数据的正确性。

DistCp 是使用 HDFS 的 `getFileChecksum()` 接口来获得文件的校验码，然后对比拷贝后的文件的校验码来确保数据是一样的。

Hadoop 默认使用的 Checksum 算法是 MD5-MD5-CRC32, 严重依赖 HDFS 的实现细节。它是根据文件目前的分块形式，使用 MD5-CRC32 算法汇总每一个数据块的 Checksum（把每一个 64K 的 block 的 CRC32 校验码汇总，再算一个 MD5），然后再用 MD5 计算校验码。如果 HDFS 集群的分块大小不同，就没法用这个算法进行比较。

为了兼容 HDFS，JuiceFS 也实现了该 MD5-MD5-CRC32 算法，它会将文件的数据读一遍，用同样的算法计算得到一个 checksum，用于比较。

因为 JuiceFS 是基于对象存储实现的，后者已经通过多种 Checksum 机制保证了数据完整性，JuiceFS 默认没有启用上面的 Checksum 算法，需要通过 `juicefs.file.checksum` 配置来启用。

因为该算法依赖于相同的分块大小，需要通过 `juicefs.block.size` 配置将分块大小设置为跟 HDFS 一样（默认值是 `dfs.blocksize`，它的默认值是 128MB）。

另外，HDFS 里支持给每一个文件设置不同的分块大小，而 JuiceFS 不支持，如果启用 Checksum 校验的话会导致拷贝部分文件失败（因为分块大小不同），JuiceFS Hadoop Java SDK 对 DistCp 打了一个热补丁（需要 `tools.jar`）来跳过这些分块不同的文件（不做比较，而不是抛异常）。

## 基准测试

以下提供了一系列方法，使用 JuiceFS 客户端内置的压测工具，对已经成功部署了客户端环境进行性能测试。

### 1. 本地测试

#### 元数据性能

- **create**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench create -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

  此命令会 create 10000 个空文件

- **open**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench open -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

  此命令会 open 10000 个文件，并不读取数据

- **rename**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench rename -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

- **delete**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench delete -files 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench -local
  ```

- **参考值**

  | 操作   | TPS  | 时延（ms） |
  | ------ | ---- | ----       |
  | create | 644  | 1.55       |
  | open   | 3467 | 0.29       |
  | rename | 483  | 2.07       |
  | delete | 506  | 1.97       |

#### I/O 性能

- **顺序写**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -write -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local
  ```

- **顺序读**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -read -size 20000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO -local
  ```

  如果多次运行此命令，可能会出现数据被缓存到了系统缓存而导致读取速度非常快，只需清除 JuiceFS 的本地磁盘缓存即可

- **参考值**

  | 操作   | 吞吐（MB/s） |
  | ------ | ----         |
  | write  | 647          |
  | read   | 111          |

如果机器的网络带宽比较低，则一般能达到网络带宽瓶颈

### 2. 分布式测试

以下命令会启动 MapReduce 分布式任务程序对元数据和 IO 性能进行测试，测试时需要保证集群有足够的资源能够同时启动所需的 map 任务。

本项测试使用的计算资源：

- **服务器**：3 台 4 核 32 GB 内存的云服务器，突发带宽 5Gbit/s。
- **数据库**：阿里云 Redis 5.0 社区 4G 主从版

#### 元数据性能

- **create**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench create -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  此命令会启动 10 个 map task，每个 task 有 10 个线程，每个线程会创建 1000 个空文件，总共 100000 个空文件

- **open**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench open -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  此命令会启动 10 个 map task，每个 task 有 10 个线程，每个线程会 open 1000 个文件，总共 open 100000 个文件

- **rename**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench rename -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  此命令会启动 10 个 map task，每个 task 有 10 个线程，每个线程会 rename 1000 个文件，总共 rename 100000 个文件

- **delete**

  ```shell
  hadoop jar juicefs-hadoop.jar nnbench delete -maps 10 -threads 10 -files 1000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/NNBench
  ```

  此命令会启动 10 个 map task，每个 task 有 10 个线程，每个线程会 delete 1000 个文件，总共 delete 100000 个文件

- **参考值**

  - 10 并发

    | 操作   | IOPS | 时延（ms） |
    | ------ | ---- | ----       |
    | create | 4178 | 2.2        |
    | open   | 9407 | 0.8        |
    | rename | 3197 | 2.9       |
    | delete | 3060 | 3.0        |

  - 100 并发

    | 操作   | IOPS  | 时延（ms） |
    | ------ | ----  | ----       |
    | create | 11773  | 7.9       |
    | open   | 34083 | 2.4        |
    | rename | 8995  | 10.8       |
    | delete | 7191  | 13.6       |

#### I/O 性能

- **连续写**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -write -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO
  ```

  此命令会启动 10 个 map task，每个 task 写入 10000MB 的数据

- **连续读**

  ```shell
  hadoop jar juicefs-hadoop.jar dfsio -read -maps 10 -size 10000 -baseDir jfs://{JFS_NAME}/tmp/benchmarks/DFSIO
  ```

  此命令会启动 10 个 map task，每个 task 读取 10000MB 的数据

- **参考值**

  | 操作   | 平均吞吐（MB/s） | 总吞吐（MB/s） |
  | ------ | ----             | ----           |
  | write  | 198              | 1835           |
  | read   | 124              | 1234           |

### 3. TPC-DS

测试数据集 100GB 规模，测试 Parquet 和 ORC 两种文件格式。

本次测试仅测试前 10 个查询。

使用 Spark Thrift JDBC/ODBC Server 开启 Spark 常驻进程，然后通过 Beeline 连接提交任务。

#### 测试硬件

| 节点类型 | 机器型号             | CPU  | 内存   | 磁盘                                            | 数量 |
| ------   | -------------------  | ---- | ------ | ----------------------------------              | ---- |
| Master   | 阿里云 ecs.r6.xlarge | 4    | 32GiB  | 系统盘：100GiB                                  | 1    |
| Core     | 阿里云 ecs.r6.xlarge | 4    | 32GiB  | 系统盘：100GiB<br />数据盘：500GiB 高效云盘 x 2 | 3    |

#### 软件配置

##### Spark Thrift JDBC/ODBC Server

```shell
${SPARK_HOME}/sbin/start-thriftserver.sh \
  --master yarn \
  --driver-memory 8g \
  --executor-memory 10g \
  --executor-cores 3 \
  --num-executors 3 \
  --conf spark.locality.wait=100 \
  --conf spark.sql.crossJoin.enabled=true \
  --hiveconf hive.server2.thrift.port=10001
```

##### JuiceFS 缓存配置

Core 节点的 2 块数据盘挂载在 `/data01` 和 `/data02` 目录下，`core-site.xml` 配置如下：

```xml
<property>
  <name>juicefs.cache-size</name>
  <value>200000</value>
</property>
<property>
  <name>juicefs.cache-dir</name>
  <value>/data*/jfscache</value>
</property>
<property>
  <name>juicefs.cache-full-block</name>
  <value>false</value>
</property>
<property>
  <name>juicefs.discover-nodes-url</name>
  <value>yarn</value>
</property>
<property>
  <name>juicefs.attr-cache</name>
  <value>3</value>
</property>
<property>
  <name>juicefs.entry-cache</name>
  <value>3</value>
</property>
<property>
  <name>juicefs.dir-entry-cache</name>
  <value>3</value>
</property>
```

#### 测试

任务提交的命令如下：

```shell
${SPARK_HOME}/bin/beeline -u jdbc:hive2://localhost:10001/${DATABASE} \
  -n hadoop \
  -f query{i}.sql
```

#### 结果

JuiceFS 可以使用本地磁盘作为缓存加速数据访问，以下数据是分别使用 Redis 和 TiKV 作为 JuiceFS 的元数据引擎跑 4 次后的结果（单位秒）。

##### ORC

| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS |
| ------- | --------------- | -------------- | ---- |
| q1      | 20              | 20             | 20   |
| q2      | 28              | 33             | 26   |
| q3      | 24              | 27             | 28   |
| q4      | 300             | 309            | 290  |
| q5      | 116             | 117            | 91   |
| q6      | 37              | 42             | 41   |
| q7      | 24              | 28             | 23   |
| q8      | 13              | 15             | 16   |
| q9      | 87              | 112            | 89   |
| q10     | 23              | 24             | 22   |

![orc](../images/spark_ql_orc.png)

##### Parquet

| Queries | JuiceFS (Redis) | JuiceFS (TiKV) | HDFS |
| ------- | --------------- | -------------- | ---- |
| q1      | 33              | 35             | 39   |
| q2      | 28              | 32             | 31   |
| q3      | 23              | 25             | 24   |
| q4      | 273             | 284            | 266  |
| q5      | 96              | 107            | 94   |
| q6      | 36              | 35             | 42   |
| q7      | 28              | 30             | 24   |
| q8      | 11              | 12             | 14   |
| q9      | 85              | 97             | 77   |
| q10     | 24              | 28             | 38   |

![parquet](../images/spark_sql_parquet.png)

## 使用 Apache Ranger 进行权限管控（ v1.3支持 ）

JuiceFS 当前支持对接 Apache Ranger 的 `HDFS` 模块进行路径的权限管控。仅 Hadoop Java SDK 支持该功能。

### 1. 相关配置

Apache Ranger 的配置统一放在 META 数据库内。可以通过以下方法开启 Ranger 的权限管控：

```shell
# format 的时候指定 ranger 配置
juicefs format META-URL NAME --ranger-rest-url http://localhost:6080 --ranger-service jfs

# 已有的文件系统增加 ranger 配置
juicefs config META-URL --ranger-rest-url http://localhost:6080 --ranger-service jfs

# 关闭 ranger
juicefs config META-URL --ranger-rest-url "" --ranger-service jfs ""
```

### 2. 环境及依赖

考虑到使用的方便性，JuiceFS 将 Ranger 所有依赖的包均打包到 JuiceFS 的 SDK 中。如果遇到 Apache Ranger 的版本冲突问题，可能需要修改版本重新编译。

### 3. 使用提示

#### 3.1 Ranger版本

当前代码测试基于`Ranger2.3`和`Ranger2.4`版本，因除`HDFS`模块鉴权外并未使用其他特性，理论上其他版本均适用。

#### 3.2 Ranger Audit

当前仅支持鉴权功能，`Ranger Audit`功能已关闭。

#### 3.3 Ranger其他参数

为提升使用效率，当前仅开放连接 Ranger 最核心的参数。

#### 3.4 安全性问题

因项目代码完全开源，无法避免用户通过替换`ranger-rest-url`等参数的方式扰乱安全管控。如需更严格的管控，建议自主编译代码，通过将相关安全参数进行加密处理等方式解决。

## FAQ

### 1. 出现 `Class io.juicefs.JuiceFileSystem not found` 异常

出现这个异常的原因是 `juicefs-hadoop.jar` 没有被加载，可以用 `lsof -p {pid} | grep juicefs` 查看 JAR 文件是否被加载。需要检查 JAR 文件是否被正确地放置在各个组件的 classpath 里面，并且保证 JAR 文件有可读权限。

另外，在某些发行版 Hadoop 环境中，需要修改 `mapred-site.xml` 中的 `mapreduce.application.classpath` 参数，添加 `juicefs-hadoop.jar` 的路径。

### 2. 出现 `No FilesSystem for scheme: jfs` 异常

出现这个异常的原因是 `core-site.xml` 配置文件中的 JuiceFS 配置没有被读取到，需要检查组件配置的 `core-site.xml` 中是否有 JuiceFS 相关配置。

### 3. JuiceFS 与 HDFS 的用户权限管理有何相同和不同之处？

JuiceFS 也是使用「用户／用户组」的方式管理文件权限，默认使用的是本地的用户和用户组。为了保证分布式计算时不同节点的权限统一，可以通过 `juicefs.users` 和 `juicefs.groups` 配置全局的「用户／UID」和「用户组／GID」映射。

### 4. 数据删除后都是直接存储在 JuiceFS 的 `.trash` 目录，虽然文件都在但是很难像 HDFS 那样简单通过 `mv` 命令就能恢复数据，是否有某种办法可以达到类似 HDFS 回收站的效果？

在 Hadoop 应用场景下，仍然保留了类似于 HDFS 回收站的功能。需要通过 `fs.trash.interval` 以及 `fs.trash.checkpoint.interval` 配置来显式开启，请参考[文档](#回收站)了解更多信息。

### 5. 设置 `juicefs.discover-nodes-url` 这个参数有什么好处？

在 HDFS 里面，每个数据块会有 [`BlockLocation`](https://hadoop.apache.org/docs/current/api/org/apache/hadoop/fs/BlockLocation.html) 信息，计算引擎会利用此信息尽量将计算任务调度到数据所存储的节点。JuiceFS 会通过一致性哈希算法为每个数据块计算出对应的 `BlockLocation`，这样第二次读取相同的数据时，计算引擎有可能将计算任务调度到相同的机器上，就可以利用第一次计算时缓存在本地磁盘的数据来加速数据访问。

此算法需要事先知道所有的计算节点信息，`juicefs.discover-nodes-url` 参数就是用来获得这些计算节点信息的。

### 6. 对于采用 Kerberos 认证的 CDH 集群，社区版 JuiceFS 目前能否支持呢？

不支持。JuiceFS 不会校验 Kerberos 用户的合法性，但是可以使用通过 Kerberos 认证的用户名。


================================================
FILE: docs/zh_cn/deployment/how_to_use_on_kubernetes.md
================================================
---
title: Kubernetes 使用 JuiceFS
sidebar_position: 2
slug: /how_to_use_on_kubernetes
---

JuiceFS 非常适合用作 Kubernetes 集群的存储层，阅读本文以了解如何使用。

## 以 `hostPath` 方式挂载 JuiceFS

如果你仅仅需要在 Kubernetes 容器中简单使用 JuiceFS，没有其他任何复杂要求（比如隔离性、权限控制），那么完全可以以 [`hostPath` 卷](https://kubernetes.io/zh-cn/docs/concepts/storage/volumes/#hostpath) 的方式使用 JuiceFS，搭建起来也十分简单：

1. 在 Kubernetes 节点上统一安装、挂载 JuiceFS，如果节点众多，考虑[自动化部署](./automation.md)。
1. 在 pod 定义中使用 `hostPath` 卷，直接将宿主机上的 JuiceFS 子目录挂载到容器中：

   ```yaml {8-16}
   apiVersion: v1
   kind: Pod
   metadata:
     name: juicefs-app
   spec:
     containers:
       - ...
         volumeMounts:
           - name: jfs-data
             mountPath: /opt/app-data
     volumes:
       - name: jfs-data
         hostPath:
           # 假设挂载点为 /jfs
           path: "/jfs/myapp/"
           type: Directory
   ```

相比以 CSI 驱动的方式来使用 JuiceFS，`hostPath` 更为简单直接，出问题也更易排查，但也要注意：

* 为求管理方便，一般所有容器都在使用同一个宿主机挂载点，缺乏隔离可能导致数据安全问题，未来也无法在不同应用中单独调整 JuiceFS 挂载参数。请谨慎评估。
* 所有节点都需要提前挂载 JuiceFS，因此集群加入新节点，需要在初始化流程里进行安装和挂载，否则新节点没有 JuiceFS 挂载点，容器将无法创建。
* 宿主机上的 JuiceFS 挂载进程所占用的系统资源（如 CPU、内存等）不受 Kubernetes 控制，有可能占用较多宿主机资源。可以考虑用 [`system-reserved`](https://kubernetes.io/zh-cn/docs/tasks/administer-cluster/reserve-compute-resources/#system-reserved) 来适当调整 Kubernetes 的系统资源预留值，为 JuiceFS 挂载进程预留更多资源。
* 如果宿主机上的 JuiceFS 挂载进程意外退出，将会导致应用 pod 无法正常访问挂载点，此时需要重新挂载 JuiceFS 文件系统并重建应用 pod。作为对比，JuiceFS CSI 驱动提供[「挂载点自动恢复」](https://juicefs.com/docs/zh/csi/recover-failed-mountpoint)功能来解决这个问题。
* 如果你使用 Docker 作为 Kubernetes 容器运行环境，最好令 JuiceFS 先于 Docker 启动，否则在节点重启的时候，偶尔可能出现容器启动时，JuiceFS 尚未挂载好的情况，此时便会因该依赖问题启动失败。以 systemd 为例，可以用下方 unit file 来配置启动顺序：

  ```systemd title="/etc/systemd/system/docker.service.d/override.conf"
  [Unit]
  # 请使用下方命令确定 JuiceFS 挂载服务的名称（例如 jfs.mount）：
  # systemctl list-units | grep "\.mount"
  After=network-online.target firewalld.service containerd.service jfs.mount
  ```

## JuiceFS CSI 驱动

在 Kubernetes 中使用 JuiceFS，请阅读[「JuiceFS CSI 驱动文档」](https://juicefs.com/docs/zh/csi/introduction)。

## 在容器中挂载 JuiceFS

某些情况下，你可能需要在容器中直接挂载 JuiceFS 存储，这需要在容器中使用 JuiceFS 客户端，你可以参考以下 `Dockerfile` 样本将 JuiceFS 客户端集成到应用镜像：

```dockerfile title="Dockerfile"
FROM alpine:latest
LABEL maintainer="Juicedata <https://juicefs.com>"

# Install JuiceFS client
RUN apk add --no-cache curl && \
  JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \
  wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \
  tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \
  install juicefs /usr/bin && \
  rm juicefs "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" && \
  rm -rf /var/cache/apk/* && \
  apk del curl

ENTRYPOINT ["/usr/bin/juicefs", "mount"]
```

由于 JuiceFS 需要使用 FUSE 设备挂载文件系统，因此在创建 Pod 时需要允许容器在特权模式下运行：

```yaml {19-20}
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-run
spec:
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
        - name: nginx
          image: linuxserver/nginx
          ports:
            - containerPort: 80
          securityContext:
            privileged: true
```

:::caution 注意
容器启用 `privileged: true` 特权模式以后，就具备了访问宿主机所有设备的权限，即拥有了对宿主机内核的完全控制权限。使用不当会带来严重的安全隐患，请您在使用此方式之前进行充分的安全评估。
:::


================================================
FILE: docs/zh_cn/deployment/juicefs_on_docker.md
================================================
---
title: 在 Docker 中使用 JuiceFS
sidebar_position: 6
slug: /juicefs_on_docker
description: 在 Docker 中以不同方式使用 JuiceFS，包括卷映射、卷插件，以及容器中挂载。
---

在 Docker 中使用 JuiceFS 文件系统，可以通过卷插件或直接在容器中运行客户端。

## 使用卷插件 {#volume-plugin}

如果你对挂载管理有一定要求，比如希望通过 Docker 来管理挂载点，方便不同的应用容器使用不同的 JuiceFS 文件系统，则可以使用[卷插件](https://github.com/juicedata/docker-volume-juicefs)（Docker volume plugin）。

Docker 插件通常是以镜像形式提供的，[JuiceFS 卷插件镜像](https://hub.docker.com/r/juicedata/juicefs)中内置了 [JuiceFS 社区版](../introduction/README.md)和 [JuiceFS 云服务](https://juicefs.com/docs/zh/cloud)客户端，安装以后，便能够运行卷插件，在 Docker 中创建 JuiceFS Volume。

通过下面的命令安装插件，按照提示为 FUSE 提供必要的权限：

```shell
docker plugin install juicedata/juicefs
```

你可以使用以下命令管理卷插件：

```shell
# 停用插件
docker plugin disable juicedata/juicefs

# 升级插件（需先停用）
docker plugin upgrade juicedata/juicefs
docker plugin enable juicedata/juicefs

# 卸载插件
docker plugin rm juicedata/juicefs
```

### 创建存储卷 {#create-volume}

请将以下命令中的 `<VOLUME_NAME>`、`<META_URL>`、`<STORAGE_TYPE>`、`<BUCKET_NAME>`、`<ACCESS_KEY>`、`<SECRET_KEY>` 替换成你自己的文件系统配置。

```shell
docker volume create -d juicedata/juicefs \
  -o name=<VOLUME_NAME> \
  -o metaurl=<META_URL> \
  -o storage=<STORAGE_TYPE> \
  -o bucket=<BUCKET_NAME> \
  -o access-key=<ACCESS_KEY> \
  -o secret-key=<SECRET_KEY> \
  jfsvolume
```

对于已经预先创建好的文件系统，在用其创建卷插件时，只需指定文件系统名称和数据库地址，例如：

```shell
docker volume create -d juicedata/juicefs \
  -o name=<VOLUME_NAME> \
  -o metaurl=<META_URL> \
  jfsvolume
```

如果需要在挂载文件系统时传入额外的环境变量（比如 [Google 云](../reference/how_to_set_up_object_storage.md#google-cloud)），可以对上方命令追加类似 `-o env=FOO=bar,SPAM=egg` 的参数。

### 使用和管理 {#usage-and-management}

```shell
# 创建容器时挂载卷
docker run -it -v jfsvolume:/opt busybox ls /opt

# 卸载后，可以操作删除存储卷，注意这仅仅是删除 Docker 中的对应资源，并不影响 JuiceFS 中存储的数据
docker volume rm jfsvolume
```

### 在 Docker Compose 中使用卷插件  {#using-plugin-in-docker-compose}

下面是在 `docker compose` 中使用 JuiceFS 卷插件的示例：

```yaml
version: '3'
services:
busybox:
  image: busybox
  command: "ls /jfs"
  volumes:
    - jfsvolume:/jfs
volumes:
  jfsvolume:
    driver: juicedata/juicefs
    driver_opts:
      name: ${VOL_NAME}
      # 因为 SQLite 在插件容器本地路径创建数据库文件，
      # sqlite:// 将在服务重启时失败。
      # （详见 https://github.com/juicedata/docker-volume-juicefs/issues/37）
      metaurl: ${META_URL}
      storage: ${STORAGE_TYPE}
      bucket: ${BUCKET}
      access-key: ${ACCESS_KEY}
      secret-key: ${SECRET_KEY}
      # 如有需要，可以用 env 传入额外环境变量
      # env: FOO=bar,SPAM=egg
```

使用和管理：

```shell
# 启动服务
docker-compose up

# 关闭服务并从 Docker 中卸载 JuiceFS 文件系统
docker-compose down --volumes
```

### 卷插件问题排查 {#troubleshooting}

无法正常工作时，推荐先[升级卷插件](#volume-plugin)，然后根据问题情况查看日志。

* 收集 JuiceFS 客户端日志，日志位于 Docker volume plugin 容器内，需要进入容器采集：

  ```shell
  # 确认 docker plugins runtime 目录，根据实际情况可能与下方示范不同
  # ls 打印出来的目录就是容器目录，名称为容器 ID
  ls /run/docker/plugins/runtime-root/plugins.moby

  # 打印 plugin 容器信息
  # 如果打印出的容器列表为空，说明 plugin 容器创建失败
  # 阅读下方查看 plugin 启动日志继续排查
  runc --root /run/docker/plugins/runtime-root/plugins.moby list

  # 进入容器，打印日志
  runc --root /run/docker/plugins/runtime-root/plugins.moby exec 452d2c0cf3fd45e73a93a2f2b00d03ed28dd2bc0c58669cca9d4039e8866f99f cat /var/log/juicefs.log
  ```

  如果发现容器不存在（`ls` 发现目录为空），或者在最后打印日志的阶段发现 `juicefs.log` 不存在，那么多半是挂载本身就失败了，继续查看 plugin 自身的日志寻找原因。

* 收集 plugin 日志，以 systemd 为例：

  ```shell
  journalctl -f -u docker | grep "plugin="
  ```

  如果 plugin 调用 `juicefs` 发生错误，或者 plugin 自身报错，均会在日志里有所体现。

## 在容器中使用 JuiceFS 客户端 {#mount-juicefs-in-docker}

相比卷插件，直接在容器中使用 JuiceFS 客户端更加灵活，可以在容器中直接挂载 JuiceFS 文件系统，也可以通过 S3 Gateway、WebDAV 开放文件系统访问。

### 方式一：自行构建镜像

JuiceFS 客户端是一个独立的二进制程序，同时提供 AMD64 和 ARM64 架构的版本，可以在 Dockerfile 中定义下载安装 JuiceFS 客户端的命令，例如：

```Dockerfile
FROM ubuntu:22.04
...
# 使用官方一键安装脚本
RUN curl -sSL https://d.juicefs.com/install | sh - 
```

更多内容详见[「定制容器镜像」](https://juicefs.com/docs/zh/csi/guide/custom-image)。

### 方式二：使用官方维护的镜像

JuiceFS 官方维护的镜像 [`juicedata/mount`](https://hub.docker.com/r/juicedata/mount) ，可以通过 tag 指定所需要的版本。**社区版 tag 为 ce**，例如：latest、ce-v1.1.2、ce-nightly。`latest` 标签仅包含最新的社区版，`nightly` 标签指向最新的开发版本，详情查看 [Docker hub 的 tags 页面](https://hub.docker.com/r/juicedata/mount/tags)。

开始之前，你需要先准备好[对象存储](../reference/how_to_set_up_object_storage.md)和[元数据引擎](../reference/how_to_set_up_metadata_engine.md)。

#### 创建文件系统

通过一个临时容器创建文件系统，例如：

```sh
docker run --rm \
    juicedata/mount:ce-v1.1.2 juicefs format \
    --storage s3 \
    --bucket https://xxx.your-s3-endpoint.com \
    --access-key=ACCESSKEY \
    --secret-key=SECRETKEY \
    rediss://user:password@xxx.your-redis-server.com:6379/1 myjfs
```

请将 `--storage`、`--bucket`、`--access-key`、`--secret-key` 以及元数据引擎的 URL 替换成你自己的配置。

#### 直接在容器中挂载文件系统

创建一个容器并将 JuiceFS 文件系统到挂载到容器中，例如：

```sh
docker run --privileged --name myjfs \
    juicedata/mount:ce-v1.1.2 juicefs mount \
    rediss://user:password@xxx.your-redis-server.com:6379/1 /mnt
```

请将元数据引擎的 URL 替换成你自己的配置，`/mnt` 是挂载点，可以根据需要修改。由于需要使用 FUSE，所以还需要 `--privileged` 权限。

#### 通过 Docker Compose 挂载文件系统

下面是一个使用 Docker Compose 的示例，请将元数据引擎的 URL 和挂载点替换成你自己的配置。

```yaml
version: "3"
services:
    busybox:
      image: busybox
      command: "ls /jfs"
      volumes:
        - ./mnt:/jfs
      depends_on:
        juicefs:
          condition: service_healthy

    juicefs:
      image: juicedata/mount:ce-v1.1.2
      container_name: myjfs
      volumes:
        - ./mnt:/mnt:rw,rshared
      cap_add:
        - SYS_ADMIN
      devices:
        - /dev/fuse
      security_opt: 
        - apparmor:unconfined
      command: ["juicefs", "mount", "rediss://user:password@xxx.your-redis-server.com:6379/1", "/mnt"]
      restart: unless-stopped
      healthcheck:
        test: ["CMD-SHELL", "cat /mnt/.control"]
        interval: 60s
        retries: 5
        start_period: 30s
        timeout: 10s
```

在容器中，JuiceFS 文件系统挂载到了 `/mnt` 目录，又通过配置文件中的 volumes 部分将容器中的 `/mnt` 映射到宿主机的 `./mnt` 目录，这样就可以实现在宿主机直接访问容器中挂载的 JuiceFS 文件系统。同时通过 depends_on 和 volumes 的结合可以将目录再次挂载进其余容器中使用

#### 通过 S3 Gateway 开放文件系统访问

下面是一个将 JuiceFS 以 S3 Gateway 方式开放访问的示例，请将 `MINIO_ROOT_USER`、`MINIO_ROOT_PASSWORD`、元数据引擎的 URL、监听的地址和端口号替换成你自己的配置。

```yaml
version: "3"
services:
    s3-gateway:
      image: juicedata/mount:ce-v1.1.2
      container_name: juicefs-s3-gateway
      environment:
        - MINIO_ROOT_USER=your-username
        - MINIO_ROOT_PASSWORD=your-password
      ports:
        - "9090:9090"
      command: ["juicefs", "gateway", "rediss://user:password@xxx.your-redis-server.com:6379/1", "0.0.0.0:9090"]
      restart: unless-stopped
```

使用宿主机的 `9090` 端口即可打开 S3 Gateway 的控制台，用相同的地址通过 S3 客户端或者 SDK 读写 JuiceFS 文件系统。


================================================
FILE: docs/zh_cn/deployment/nfs.md
================================================
---
title: 创建 NFS 共享
sidebar_position: 9
description: 本文介绍如何通过 NFS 共享 JuiceFS 文件系统中的目录。
---

NFS（Network File System）是一种网络文件共享协议，允许不同计算机之间通过网络共享文件和目录。它最初由 Sun Microsystems 开发，是一种在 Unix 和类 Unix 系统之间进行文件共享的标准方式。NFS 协议允许客户端像访问本地文件系统一样访问远程文件系统，从而实现透明的远程文件访问。

当需要将 JuiceFS 文件系统中的目录通过 NFS 共享时，只需使用 `juicefs mount` 命令挂载，然后使用 JuiceFS 挂载点或子目录创建 NFS 共享即可。

:::note
`juicefs mount` 以 FUSE 接口的形式挂载为本地的用户态文件系统，与本地文件系统在形态和用法上无异，因此可以直接被用于创建 NFS 共享。
:::

## 第 1 步：安装 NFS

配置 NFS 共享需要分别在服务端和客户端安装相应的软件包，以 Ubuntu/Debian 系统为例：

### 1. 服务端安装

创建 NFS 共享的主机（JuiceFS 文件系统也挂载在该服务器上）。

```shell
sudo apt install nfs-kernel-server
```

### 2. 客户端安装

所有需要访问 NFS 的 Linux 主机都需要安装客户端。

```shell
sudo apt install nfs-common
```

## 第 2 步：创建共享

这里假设 JuiceFS 在服务端系统的挂载点是 `/mnt/myjfs`，比如要将其中的 `media` 子目录设置为 NFS 共享，可以在服务端系统的 `/etc/exports` 文件中添加如下配置：

```
"/mnt/myjfs/media" *(rw,sync,no_subtree_check,fsid=1)
```

NFS 共享配置的语法为：

```
<Share Path> <Allowed IPs>(options)
```

比如要将这个共享设置为仅允许 `192.168.1.0/24` 这个 IP 段的主机挂载且避免挤压 root 权限，则可以修改为：

```
"/mnt/myjfs/media" 192.168.1.0/24(rw,async,no_subtree_check,no_root_squash,fsid=1)
```

### 共享选项说明

**其中涉及的共享选项：**

- `rw`：代表允许读和写，如果只允许读则使用 `ro`。
- `sync` 与 `async`：`sync` 为同步写入，当向 NFS 共享写入文件时，客户端会等待服务端确认数据写入成功后再进行后续操作。`async` 为异步写入，写入操作是异步的，在写数据到 NFS 共享时，客户端不会等待服务器确认是否成功写入，而是立即执行后续操作。
- `no_subtree_check`：禁用子目录检查，这将允许客户端挂载共享目录的父目录和子目录，会降低一些安全性但能提高 NFS 的兼容性。也可以设置为 `subtree_check` 来启用子目录检查，这样仅允许客户端挂载共享目录和它的子目录。
- `no_root_squash`：用于控制客户端 root 用户访问 NFS 共享时的身份映射行为。默认情况下，客户端以 root 身份挂载 NFS 共享时，服务端会将其映射为非特权用户（通常是 nobody 或 nfsnobody），这被称为 root 挤压。设置该选项后，则取消这种权限挤压，从而让客户端拥有服务端相同的 root 用户权限。该选项有一定安全风险，建议谨慎使用。
- `fsid`：文件系统标识符，用于在 NFS 上标识不同的文件系统。在 NFSv4 中，NFS 的根目录所在的文件系统被定义为 fsid=0，其他文件系统需要在它之下且编号唯一。在这里，JuiceFS 就是一个外挂的 FUSE 文件系统，因此需要给它设置一个唯一的标识。

### async 与 sync 模式的选择

对于 NFS 共享而言，sync（同步写入）模式可以提高数据的可靠性，但总是需要等待服务器确认成功写入才会执行下一个操作，这势必会导致写入速度降低。对于 JuiceFS 这种基于云上对象存储的文件系统，还需要进一步考虑网络延时的影响，使用 sync 模式往往会导致较低的写入性能。

通常情况下，在使用 JuiceFS 创建 NFS 共享时，建议将写入模式设置为 async（异步写入），从而避免损失写入性能。如果为了保证数据可靠性而必须使用 sync 模式时，建议为 JuiceFS 设置容量充足的高性能 SSD 磁盘作为本地缓存，并开启 writeback 写缓存模式。


================================================
FILE: docs/zh_cn/deployment/production_deployment_recommendations.md
================================================
---
sidebar_position: 1
slug: /production_deployment_recommendations
description: 本文面向即将把 JuiceFS 部署到生产环境的用户参考，提供一系列环境配置建议。
---

# 生产环境部署建议

本文档提供在生产环境中部署 JuiceFS 社区版的建议，主要涉及监控指标收集、元数据自动备份、回收站配置、客户端后台任务、客户端日志滚动和命令行自动补全等方面，以确保文件系统的稳定性和可靠性。

## 监控指标收集与可视化

务必收集 JuiceFS 客户端的监控指标，并通过 Grafana 可视化，以便实时监控文件系统的性能和健康状态。具体请参考[文档](../administration/monitoring.md)。

## 元数据自动备份

:::tip 提示
元数据自动备份是自 JuiceFS v1.0.0 版本开始加入的特性
:::

元数据对 JuiceFS 文件系统非常关键，一旦丢失或损坏将可能影响大批文件甚至整个文件系统。因此必须对元数据进行定期备份。

元数据自动备份特性默认开启，备份间隔为 1 小时，备份的元数据会经过压缩后存储至对应的对象存储中（与文件系统的数据隔离）。备份由 JuiceFS 客户端执行，备份期间会导致其 CPU 和内存使用量上升，默认情况下可认为会在所有客户端中随机选择一个执行备份操作。

特别注意默认情况下当文件系统的**文件数达到一百万**时，元数据自动备份功能将会关闭，需要配置一个更大的备份间隔（`--backup-meta` 选项）才会再次开启。备份间隔每个客户端独立配置，设置 `--backup-meta 0` 则表示关闭元数据自动备份特性。

:::note 注意
备份元数据所需的时间取决于具体的元数据引擎，不同元数据引擎会有不同的性能表现。
:::

有关元数据自动备份的详细介绍请参考[文档](../administration/metadata_dump_load.md#backup-automatically)，你也可以手动备份元数据。除此之外，也请遵照你所使用的元数据引擎的运维建议对数据进行定期备份。

## 回收站

:::tip 提示
回收站是自 JuiceFS v1.0.0 版本开始加入的特性
:::

回收站默认开启，文件被删除后的保留时间默认配置为 1 天，可以有效防止数据被误删除时造成的数据丢失风险。

不过回收站开启以后也可能带来一些副作用，如果应用需要经常删除文件或者频繁覆盖写文件，会导致对象存储使用量远大于文件系统用量。这本质上是因为 JuiceFS 客户端会将对象存储上被删除的文件或者覆盖写时产生的需要垃圾回收的数据块持续保留一段时间。因此，在部署 JuiceFS 至生产环境时就应该考虑好合适的回收站配置，回收站保留时间可以通过以下方式配置（如果将 `--trash-days` 设置为 `0` 则表示关闭回收站特性）：

- 新建文件系统：通过 `juicefs format` 的 `--trash-days <value>` 选项设置
- 已有文件系统：通过 `juicefs config` 的 `--trash-days <value>` 选项修改

有关回收站的详细介绍请参考[文档](../security/trash.md)。

## 客户端后台任务

JuiceFS 文件系统通过客户端维护后台任务，可以自动执行清理待删除文件和对象、清理回收站中的过期文件和碎片、清理长时间未响应的客户端会话等任务等。

同一个 JuiceFS 文件系统的所有客户端在运行过程中共享一个后台任务集，每个任务定时执行，且具体执行的客户端随机选择。具体的后台任务包括：

1. 清理待删除的文件和对象
2. 清理回收站中的过期文件和碎片
3. 清理长时间未响应的客户端会话
4. 自动备份元数据

由于这些任务执行时会占用一定资源，因此可以为业务较繁重的客户端配置 `--no-bgjob` 选项来禁止其参与后台任务。

:::note 注意
请保证至少有一个 JuiceFS 客户端可以执行后台任务
:::

## 客户端日志滚动

当后台运行 JuiceFS 挂载点时，客户端默认会将日志输出到本地文件中。取决于挂载文件系统时的运行用户，本地日志文件的路径稍有区别。root 用户对应的日志文件路径是 `/var/log/juicefs.log`，非 root 用户的日志文件路径是 `$HOME/.juicefs/juicefs.log`。

本地日志文件默认不会滚动，生产环境中为了确保日志文件不占用过多磁盘空间需要手动配置。以下是一个日志滚动的示例配置：

```text title="/etc/logrotate.d/juicefs"
/var/log/juicefs.log {
    daily
    rotate 7
    compress
    delaycompress
    missingok
    notifempty
    copytruncate
}
```

通过 `logrotate -d` 命令可以验证配置文件的正确性：

```shell
logrotate -d /etc/logrotate.d/juicefs
```

有关日志滚动配置的详细介绍请参考[文档](https://linux.die.net/man/8/logrotate)。

## 命令行自动补全

JuiceFS 为 Bash 和 Zsh 提供了命令行自动补全脚本，方便在命令行中使用 `juicefs` 命令，具体请参考[文档](../reference/command_reference.mdx#auto-completion)。


================================================
FILE: docs/zh_cn/deployment/python_sdk.md
================================================
---
title: Python SDK
sidebar_position: 6
---

JuiceFS 社区版从 v1.3.0 引入 Python SDK，适合无法使用 FUSE 挂载的容器化或虚拟化环境使用。并且 Python SDK 实现了 fsspec 的接口规范，可方便的对接 Ray 等框架。

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114471129321725&bvid=BV1Xu5NzQEiG&cid=29850536628&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 编译

你可以在当前工作环境中直接编译 Python SDK，也可以使用 Docker 容器进行编译。两种方式都需要先克隆仓库并进入 SDK 所在目录。

```bash
# 克隆 JuiceFS 仓库
git clone https://github.com/juicedata/juicefs.git
# 进入 JuiceFS 目录
cd juicefs/sdk/python
```

### 直接编译

直接编译需要 `go1.20+` 和 `python3` 环境。

#### 第一步：编译 libjfs.so

```bash
go build -buildmode c-shared -ldflags="-s -w" -o juicefs/juicefs/libjfs.so ../java/libjfs
```

编译产生的 `libjfs.so` 和 `libjfs.h` 文件在 `sdk/python/juicefs/juicefs` 目录下。

#### 第二步：编译 Python SDK

```bash
cd juicefs && python3 -m build -w
```

编译好的 Python SDK 会在 `juicefs/sdk/python/dist` 目录下，文件名为 `juicefs-1.3.0-py3-none-any.whl`。

### Docker 编译

使用 Docker 容器编译需要当前系统安装了 `Docker`、`make` 和 `go1.20+` 环境。

#### 第一步：构建 Docker 镜像

```bash
# For arm64
make arm-builder

# For amd64
make builder
```

#### 第二步：编译 Python SDK

```bash
make juicefs
```

编译好的 Python SDK 会在 `juicefs/sdk/python/dist` 目录下，文件名为 `juicefs-1.3.0-py3-none-any.whl`。

### 编译报错处理

如果在编译时遇到 `sed: 1: "juicefs/setup.py": invalid command code j` 的错误，可以尝试将 `Makefile` 中 `sed` 相关的命令注释掉。

## 安装与使用

### 安装 SDK

将编译好的 `juicefs-1.3.0-py3-none-any.whl` 文件拷贝到目标机器上，使用 `pip` 安装：

```bash
pip install juicefs-1.3.0-py3-none-any.whl
```

### 准备文件系统

:::tip
JuiceFS 的 Python SDK 暂不支持格式化文件系统，因此在使用之前请确保已经预先创建了 JuiceFS 文件系统。
:::

假设这里已经有一个预先创建好的名称为 `myfs` 的文件系统，元数据引擎 URL 为 `redis://192.168.1.8/0`。

### 使用 Client

`Client` 类的实现与 Python 的 io 模块类似。

可以使用以下代码实例化一个 JuiceFS 客户端，`name` 参数为文件系统名称，`meta` 参数为元数据引擎的 URL。其中，`name`参数必须存在，但允许使用空字符串或 `None`。

```python
from juicefs import Client

# 创建 JuiceFS 客户端
jfs = Client(name='', meta='redis://192.168.1.8/0')

# 列出目录中的文件
jfs.listdir('/')
```

### 使用 fsspec

JuiceFS 的 Python SDK 还支持 `fsspec` 接口来操作 JuiceFS 文件系统。

```bash
# 安装 fsspec
pip install fsspec
```

`fsspec` 的使用方式与 `Client` 类类似，只是需要指定 `jfs` 或 `juicefs` 作为文件系统类型。

```python
import fsspec
from juicefs.spec import JuiceFS

jfs = fsspec.filesystem('jfs', name='', meta='redis://192.168.1.8/0')

# 列出目录中的文件
jfs.ls('/')
```

### 获取帮助信息

可以使用 `help()` 函数获取类和方法的帮助信息。

```python
import juicefs

help(juicefs.Client)
```

也可以使用 `dir()` 函数获取类和方法的列表。

```python
import juicefs

dir(juicefs.Client)
```


================================================
FILE: docs/zh_cn/deployment/samba.md
================================================
---
title: 创建 Samba 共享
sidebar_position: 8
description: 本文介绍如何通过 Samba 共享 JuiceFS 文件系统中的目录。
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

Samba 是一个开源的软件套件，它实现了 SMB/CIFS（Server Message Block / Common Internet File System）协议，该协议是 Windows 系统中常用的文件共享协议。通过 Samba，可以在 Linux/Unix 服务器上创建共享目录，允许 Windows 计算机通过网络访问和使用这些共享资源。

在安装了 Samba 的 Linux 系统上通过编辑 `smb.conf` 配置文件即可将本地目录创建成为共享文件夹，Windows 和 macOS 系统使用文件管理器就可以直接访问读写，Linux 需要安装 Samba 客户端访问。

当需要将 JuiceFS 文件系统中的目录通过 Samba 共享时，只需使用 `juicefs mount` 命令挂载，然后使用 JuiceFS 挂载点或子目录创建 Samba 共享即可。

:::note
`juicefs mount` 以 FUSE 接口的形式挂载为本地的用户态文件系统，与本地文件系统在形态和用法上无异，因此可以直接被用于创建 Samba 共享。
:::

## 第 1 步：安装 Samba

主流 Linux 发行版的包管理器都会提供 Samba：

<Tabs>
<TabItem value="debian" label="Debian 及衍生版本">

```shell
sudo apt install samba
```

</TabItem>
    <TabItem value="redhat" label="RHEL 及衍生版本">

```shell
sudo dnf install samba
```

</TabItem>
</Tabs>

如果需要配置 AD/DC，还需要安装其他的软件包，详情参考 [Samba 官方安装指南](https://wiki.samba.org/index.php/Distribution-specific_Package_Installation)。

## 第 2 步：启用 JuiceFS 的扩展属性支持

根据 [Samba 官方文档](https://wiki.samba.org/index.php/File_System_Support#File_systems_without_xattr_support)，建议使用支持扩展属性（xattr）的文件系统，JuiceFS 文件系统需要在挂载时使用 `--enable-xattr` 选项来启用扩展属性，例如：

```shell
sudo juicefs mount -d --enable-xattr sqlite3://myjfs.db /mnt/myjfs
```

对于通过 `/etc/fstab` 配置自动挂载的情况，可以在挂载选项部分添加 `enable-xattr` 选项，例如：

```ini
# <元数据引擎 URL> <挂载点> <文件系统类型> <挂载选项>
redis://127.0.0.1:6379/0 /mnt/myjfs juicefs _netdev,max-uploads=50,writeback,cache-size=1024000,enable-xattr 0 0
```

### 知识拓展：Samba 为什么需要文件系统支持扩展属性？

Samba 是一个基于 Linux/Unix 的软件，用途是面向 Windows 系统提供文件共享。由于 Windows 系统中很多文件和目录具有附加元数据（文件作者、关键字、图标位置等），这些信息通常是 POSIX 文件系统之外，需要以 xattr 的形式存储在 Windows 中的。为了保证这类文件可以正确的保存在 Linux 系统中，因此 Samba 建议使用支持扩展属性的文件系统创建共享。

## 第 3 步：创建 Samba 共享

假设 JuiceFS 的挂载点是 `/mnt/myjfs`，比如要把其中的 `media` 目录创建成为 Samba 共享，可以这样配置：

```ini
[Media]
    path = /mnt/myjfs/media
    guest ok = no
    read only = no
    browseable = yes
```

## 面向 macOS 的共享

苹果 macOS 系统支持直接访问 Samba 共享，与 Windows 类似，macOS 也存在一些额外的元数据（图标位置、Spotlight 搜索等）需要通过 xattr 来保存，Samba 4.9 及以上版本默认开启了对苹果系统的扩展属性支持。

如果 [Samba 版本低于 4.9](https://wiki.samba.org/index.php/Configure_Samba_to_Work_Better_with_Mac_OS_X)，需要在 Samba 的 [global] 全局配置部分添加 `ea support = yes` 选项来启用面向苹果系统的扩展属性支持，编辑配置文件 `/etc/samba/smb.conf`，例如：

```ini
[global]
    workgroup = SAMBA
    security = user
    passdb backend = tdbsam
    ea support = yes
```

## Samba 的用户管理

Samba 有一套自己的用户数据库，它与操作系统用户之间是独立的，但是 Samba 共享的是系统中的目录，因此必须有恰当的用户权限才能读写。

### 创建 Samba 用户

在为 Samba 创建用户时，要求该用户必须是系统中已经存在的用户，系统会自动进行映射，从而让 Samba 用户具有同名系统用户的权限。

- 如果系统中已存在该用户，假设该账户是 herald，则这样创建 Samba 账户：

    ```shell
    sudo smbpasswd -a herald
    ```

    根据命令提示设置密码即可，Samba 账户可以设置与系统用户不同的密码。

- 如果需要创建一个新的用户，以创建一个名为 `abc` 的用户为例，则这样操作：
    1. 创建用户：

        ```shell
        sudo adduser abc
        ```

    2. 创建同名的 Samba 用户：

        ```shell
        sudo smbpasswd -a abc
        ```

### 查看已创建的 Samba 用户

`pdbedit` 是一个 Samba 自带的用于管理 Samba 用户数据库的工具，可以使用该工具来列出所有已创建的 Samba 用户：

```shell
sudo pdbedit -L
```

它会列出所有已创建的 Samba 用户列表，包括用户名、用户的 SID（Security Identifier）和所属的组等信息。

## 扩展阅读

[《如何基于 JuiceFS 配置 Samba 和 NFS 共享》](https://juicefs.com/zh-cn/blog/usage-tips/configure-samba-and-nfs-shares-based-juicefs)，这篇文章介绍了如何使用 Cockpit 在浏览器中以图形化界面方式来管理 Samba 和 NFS 共享。


================================================
FILE: docs/zh_cn/deployment/webdav.md
================================================
---
title: 配置 WebDAV 服务
sidebar_position: 5
---

WebDAV 是 HTTP 协议的扩展，是一种便于多用户间协同编辑和管理网络上的文档的共享协议。很多涉及文件编辑和同步的工具、macOS Finder 以及一些 Linux 发行版的文件管理器都内置了 WebDAV 客户端支持。

JuiceFS 支持通过 WebDAV 协议挂载访问，对于 macOS 以及其他没有原生 FUSE 支持的操作系统，通过 WebDAV 协议访问 JuiceFS 文件系统是非常方便的。

## 前置条件

在配置 WebDAV 服务之前，你需要预先[创建一个 JuiceFS 文件系统](../getting-started/standalone.md#juicefs-format)。

## 匿名 WebDAV

对于单机或内网等安全不敏感的环境中，可以配置不带身份认证的匿名 WebDAV，命令格式如下：

```shell
juicefs webdav META-URL LISTENING-ADDRESS:PORT
```

例如，为一个 JuiceFS 文件系统启用 WebDAV 协议访问：

```shell
sudo juicefs webdav sqlite3://myjfs.db 192.168.1.8:80
```

WebDAV 服务需要通过设定的监听地址和端口进行访问，如上例中使用了内网的 IP 地址 `192.168.1.8`，以及标准的 Web 端口号 `80`，访问时无需指定端口，直接访问 `http://192.168.1.8` 即可。

如果使用了其他端口号，则需要在地址中明确指定，例如，监听 `9007` 端口，访问地址则应该用 `http://192.168.1.8:9007`。

:::tip 提示
当使用 macOS 的 Finder 访问匿名 WebDAV 时，不要使用「客人」身份。请使用「注册用户」身份，用户名可以输入任意字符，密码可以为空，然后直接连接即可。
:::

## 带身份认证的 WebDAV

:::info 说明
JuiceFS v1.0.3 及之前的版本不支持身份认证功能
:::

JuiceFS 的 WebDAV 身份认证功能需要通过环境变量设置用户名（`WEBDAV_USER`）和密码（`WEBDAV_PASSWORD`），例如：

```shell
export WEBDAV_USER=user
export WEBDAV_PASSWORD=mypassword
sudo juicefs webdav sqlite3://myjfs.db 192.168.1.8:80
```

## 启用 HTTPS 支持

JuiceFS 支持配置通过 HTTPS 协议保护的 WebDAV 服务，通过 `--cert-file` 和 `--key-file` 选项指定证书和私钥，既可以使用受信任的数字证书颁发机构 CA 签发的证书，也可以使用 OpenSSL 创建自签名证书。

### 自签名证书

这里使用 OpenSSL 创建私钥和证书：

1. 生成服务器私钥

   ```shell
   openssl genrsa -out client.key 4096
   ```

2. 生成证书签名请求（CSR）

   ```shell
   openssl req -new -key client.key -out client.csr
   ```

3. 使用 CSR 签发证书

   ```shell
   openssl x509 -req -days 365 -in client.csr -signkey client.key -out client.crt
   ```

以上三条命令会在当前目录产生以下文件：

- `client.key`：服务器私钥
- `client.csr`：证书签名请求文件
- `client.crt`：自签名证书

创建 WebDAV 服务时需要使用 `client.key` 和 `client.crt`，例如：

```shell
sudo juicefs webdav \
   --cert-file ./client.crt \
   --key-file ./client.key \
   sqlite3://myjfs.db 192.168.1.8:443
```

启用了 HTTPS 支持，监听的端口号可以改为 HTTPS 的标准端口号 `443`，然后改用 `https://` 协议头，访问时无需指定端口号，例如：`https://192.168.1.8`。

同样地，设置了非 HTTPS 标准端口号，应该在访问地址中明确指定，例如，设置了监听 `9999` 端口，访问地址应使用 `https://192.168.1.8:9999`。


================================================
FILE: docs/zh_cn/development/contributing_guide.md
================================================
---
title: 贡献指南
sidebar_position: 1
description: JuiceFS 是开源软件，代码由全球开发者共同贡献和维护，您可以参考本文了解参与开发的流程和注意事项。
---

## 基本准则 {#guidelines}

- 在开始修复功能或错误之前，请先通过 GitHub、Slack 等渠道与我们沟通。此步骤的目的是确保没有其他人已经在处理它，如有必要，我们将要求您创建一个 GitHub issue。
- 在开始贡献前，使用 GitHub issue 来讨论功能实现并与核心开发者达成一致。
- 如果这是一个重大的特性更新，写一份设计文档来帮助社区理解你的动机和解决方案。
- 对于首次贡献者来说，找到合适 issue 的好方法是使用标签 ["kind/good-first-issue"](https://github.com/juicedata/juicefs/labels/kind%2Fgood-first-issue) 或 ["kind/help-wanted"](https://github.com/juicedata/juicefs/labels/kind%2Fhelp-wanted) 搜索未解决的问题。

## 代码风格 {#coding-style}

- 我们遵循 ["Effective Go"](https://go.dev/doc/effective_go) 和 ["Go Code Review Comments"](https://github.com/golang/go/wiki/CodeReviewComments)。
- 在提交前使用 `go fmt` 格式化你的代码。你可以在 [Go 的编辑器和 IDE](https://github.com/golang/go/wiki/IDEsAndTextEditorPlugins) 中找到支持 Go 的相关工具的信息。
- 每个新的源文件都必须以许可证头开始。
- 安装 [pre-commit](https://pre-commit.com) 并使用它来设置一个预提交钩子来进行静态分析。只需在仓库根目录下运行 `pre-commit install` 即可。

## 签署 CLA {#sign-the-cla}

在您为 JuiceFS 进行贡献之前，您需要签署[贡献者许可协议](https://cla-assistant.io/juicedata/juicefs)。当你第一次提交 PR 的时候，将有一个 CLA 助手指导你。

## 什么是好的 PR {#what-is-a-good-pr}

- 足够的单元测试
- 遵循编码风格
- 足够的行内注释
- 简要解释的提交内容

## 贡献流程 {#contribution-flow}

1. 基于主分支创建一个要贡献的主题分支。这个主分支通常是 `main` 分支；
1. 提交代码；
1. 确保提交消息的格式正确；
1. 将主题分支中的更改推到个人 fork 的仓库；
1. 提交一个 PR 到 [`juicedata/juicefs`](https://github.com/juicedata/juicefs/compare) 仓库。这个 PR 应该链接到你或其他人创建的一个 issue；
1. PR 在合并之前必须得到至少一个维护者的批准。


================================================
FILE: docs/zh_cn/development/internals.md
================================================
---
title: 内部实现
sidebar_position: 3
slug: /internals
---

本文介绍 JuiceFS 的实现细节，用来为开发者了解和贡献开源代码作参考。其中内容对应的 JuiceFS 代码版本为 v1.0.0，元数据版本为 v1。

在深入学习源码前，我们还推荐阅读：

* [JuiceFS 读写请求处理流程](../introduction/io_processing.md)
* 网易存储团队的工程师写的这几篇博客（注意文章内容可能与最新版本代码有出入，一切请以代码为准）：[JuiceFS 调研（基于开源版本代码）](https://aspirer.wang/?p=1560)、[JuiceFS 源码阅读 - 上](https://mp.weixin.qq.com/s/mdqFJLpaJ249rUUEnRiP3Q)、[JuiceFS 源码阅读 - 中](https://mp.weixin.qq.com/s/CLQbQ-cLLGFsShPKUrCUJg)。

## 关键词定义

高层概念：

- 文件系统（File System）：即 JuiceFS Volume，代表一个独立的命名空间。文件在同文件系统内可自由移动，不同文件系统之间则需要数据拷贝；
- 元数据引擎（Metadata Engine）：用来存储和管理文件系统元数据的组件，通常由支持事务的数据库担任。目前已支持的元数据引擎共有三大类：
  - Redis：Redis 及各种协议兼容的服务；
  - SQL：MySQL、PostgreSQL、SQLite 等；
  - TKV：TiKV、BadgerDB、etcd 等。
- 数据存储：用来存储和管理文件系统数据的组件，通常由对象存储担任，如 Amazon S3、Aliyun OSS 等；也可由能兼容对象存储语义的其他存储系统担任，如本地文件系统、Ceph RADOS、TiKV 等；
- JuiceFS 客户端（JuiceFS Client）：有多种形式，如挂载进程、S3 网关、WebDAV 服务器、Java SDK 等；
- 文件：本文中泛指所有类型的文件，包括普通文件、目录文件、链接文件、设备文件等；
- 目录：一种特殊的文件，用来组织文件树型结构，其内容是一组其他文件的索引。

底层概念（详见 [JuiceFS 读写请求处理流程](../introduction/io_processing.md)）：

- Chunk：对文件分割的逻辑单位，大小 64MiB。Chunk 的存在让 JuiceFS 在读取大文件时能快速定位，提升读取性能；
- Slice：数据写入的逻辑单位，每一次写入都会分配一个已有或新的 Slice，而在元数据中则在 Chunk 下维护着 Chunk Slice 列表。
- Block：文件分割后的实际最小存储单位，默认大小 4MiB。一个 Chunk 包含一个或多个 Slice，而一个 Slice 又包含一个或多个 Block。

## 代码结构 {#source-code-structure}

[JuiceFS 源码](https://github.com/juicedata/juicefs)的大体结构如下：

* [`cmd`](https://github.com/juicedata/juicefs/tree/main/cmd) 是代码结构总入口，所有相关功能都能在此找到入口，如 `juicefs format` 命令对应着 `cmd/format.go`；
* [`pkg`](https://github.com/juicedata/juicefs/tree/main/pkg) 是具体实现，核心逻辑都在其中：
  * `pkg/fuse/fuse.go` 是 FUSE 实现的入口，提供抽象 FUSE 接口；
  * `pkg/vfs` 是具体的 FUSE 接口实现，元数据请求会调用 `pkg/meta` 中的实现，读请求会调用 `pkg/vfs/reader.go`，写请求会调用 `pkg/vfs/writer.go`；
  * `pkg/meta` 目录中是所有元数据引擎的实现，其中：
    * `pkg/meta/interface.go` 是所有类型元数据引擎的接口定义
    * `pkg/meta/redis.go` 是 Redis 数据库的接口实现
    * `pkg/meta/sql.go` 是关系型数据库的接口定义及通用接口实现，特定数据库的实现在单独文件中（如 MySQL 的实现在 `pkg/meta/sql_mysql.go`）
    * `pkg/meta/tkv.go` 是 KV 类数据库的接口定义及通用接口实现，特定数据库的实现在单独文件中（如 TiKV 的实现在 `pkg/meta/tkv_tikv.go`）
  * `pkg/object` 是与各种对象存储对接的实现。
* [`sdk/java`](https://github.com/juicedata/juicefs/tree/main/sdk/java) 是 Hadoop Java SDK 的实现，底层依赖 `sdk/java/libjfs` 这个库（通过 JNI 调用）。

## FUSE 接口实现 {#fuse-interface-implementation}

JuiceFS 基于 [FUSE](https://en.wikipedia.org/wiki/Filesystem_in_Userspace)（Filesystem in Userspace）实现了一个用户态文件系统，FUSE 接口在 Linux 系统中的实现库 [`libfuse`](https://github.com/libfuse/libfuse) 提供两种 API：high-level API 和 low-level API，其中 high-level API 基于文件名和路径，low-level API 基于 inode。

JuiceFS 基于 low-level API 实现（事实上 JuiceFS 不依赖 `libfuse`，而是 [`go-fuse`](https://github.com/hanwen/go-fuse)），这是因为内核的 VFS 跟 FUSE 库交互就使用 low-level API。如果使用 high-level API 的话，其实是在 `libfuse` 内部做了 VFS 树的模拟，然后对外暴露基于路径的 API，这种模式适合元数据本身是基于路径提供的 API 的系统，比如 HDFS 或者 S3 之类。而如果元数据本身也是基于 inode 的目录树，这种 inode → path → inode 的反复转换就会影响性能（所以 HDFS 的 FUSE 接口实现性能都不好）。JuiceFS 的元数据是按照 inode 组织的，也直接提供基于 inode 的 API，那么使用 FUSE 的 low-level API 就非常简单和自然，性能也很好。

## 元数据结构

文件系统通常组织成树型结构，其中节点代表文件，边代表目录的包含关系。文件无法悬空停留，其（根目录除外）必然属于某个目录；目录可以包含一个或多个子文件。JuiceFS 中一共有十多种元数据结构，其中大部分用来维护文件树的组织关系和各个节点的属性，其余的用来管理系统配置，客户端会话和异步任务等。以下具体介绍所有的元数据结构。

### 通用结构

#### Setting

保存文件系统的格式化信息，在执行 `juicefs format` 命令时创建，后续可通过 `juicefs config` 命令修改其中的部分字段。结构具体如下：

```go
type Format struct {
    Name             string
    UUID             string
    Storage          string
    Bucket           string
    AccessKey        string `json:",omitempty"`
    SecretKey        string `json:",omitempty"`
    SessionToken     string `json:",omitempty"`
    BlockSize        int
    Compression      string `json:",omitempty"`
    Shards           int    `json:",omitempty"`
    HashPrefix       bool   `json:",omitempty"`
    Capacity         uint64 `json:",omitempty"`
    Inodes           uint64 `json:",omitempty"`
    EncryptKey       string `json:",omitempty"`
    KeyEncrypted     bool   `json:",omitempty"`
    TrashDays        int    `json:",omitempty"`
    MetaVersion      int    `json:",omitempty"`
    MinClientVersion string `json:",omitempty"`
    MaxClientVersion string `json:",omitempty"`
    EnableACL        bool
}
```

- Name：文件系统名称，在格式化时由用户指定
- UUID：文件系统的唯一 ID，在格式化时由系统自动生成
- Storage：用来保存数据的对象存储简称，如 `s3`、`oss` 等
- Bucket：对象存储的桶路径
- AccessKey：用来访问对象存储的 access key
- SecretKey：用来访问对象存储的 secret key
- SessionToken：用来访问对象存储的 session token，部分对象存储支持使用临时的 token 以获得有限时间的权限
- BlockSize：存储文件时拆分成的数据块大小，默认为 4 MiB
- Compression：数据块上传到对象存储前执行的压缩算法，默认为不压缩
- Shards：对象存储中分片桶的个数，默认为只有一个桶；当 Shards > 1 时，数据对象会随机哈希到 Shards 个桶中
- HashPrefix：是否为对象名称设置一个散列的前缀，默认为不设置
- Capacity：文件系统的总容量配额限制
- Inodes：文件系统的总文件数配额限制
- EncryptKey：数据对象的加密私钥，只要在开启了数据加密功能后才有用
- KeyEncrypted：保存的密钥是否处于加密状态，默认会将 SecretKey、EncryptKey 和 SessionToken 加密保存
- TrashDays：文件在回收站中被保留的天数，默认为 1 天
- MetaVersion：元数据结构的版本，目前为 V1（V0 和 V1 相同）
- MinClientVersion：允许连接的最小客户端版本，早于此版本的客户端会被拒绝连接
- MaxClientVersion：允许连接的最大客户端版本
- EnableACL: 是否开启 ACL 功能

此结构会序列化成 JSON 格式保存在元数据引擎中。

#### Counter

维护系统中的各个计数器值和一些后台任务的启动时间戳，具体有：

- usedSpace：文件系统的已使用容量
- totalInodes：文件系统的已使用文件数
- nextInode：下一个可用的 inode 号（Redis 中为当前已用的最大 inode 号）
- nextChunk：下一个可用的 sliceId（Redis 中为当前已用的最大 sliceId）
- nextSession：当前已用的最大 SID（sessionID）
- nextTrash：当前已用的最大 trash inode 号
- nextCleanupSlices：上一次检查清理残留 slices 的时间点
- lastCleanupSessions：上一次检查清理残留 stale sessions 的时间点
- lastCleanupFiles：上一次检查清理残留文件的时间点
- lastCleanupTrash：上一次检查清理回收站的时间点

#### Session

记录连接到此文件系统的客户端会话 ID 和其超时时间。每个客户端会定时发送心跳消息以更新超时时间，长时间未更新者会被其他客户端自动清理。

:::tip 注意
只读客户端无法写入元数据引擎，因此其会话**不会**被记录。
:::

#### SessionInfo

记录客户端会话的具体元信息，使其可以通过 `juicefs status` 命令查看。具体为：

```go
type SessionInfo struct {
    Version    string // JuiceFS 版本
    HostName   string // 主机名称
    MountPoint string // 挂载点路径。S3 网关和 WebDAV 服务分别为 "s3gateway" 和 "webdav"
    ProcessID  int    // 进程 ID
}
```

此结构会序列化成 JSON 格式保存在元数据引擎中。

#### Node

记录每个文件的属性信息，具体为：

```go
type Attr struct {
    Flags     uint8  // reserved flags
    Typ       uint8  // type of a node
    Mode      uint16 // permission mode
    Uid       uint32 // owner id
    Gid       uint32 // group id of owner
    Rdev      uint32 // device number
    Atime     int64  // last access time
    Mtime     int64  // last modified time
    Ctime     int64  // last change time for meta
    Atimensec uint32 // nanosecond part of atime
    Mtimensec uint32 // nanosecond part of mtime
    Ctimensec uint32 // nanosecond part of ctime
    Nlink     uint32 // number of links (sub-directories or hardlinks)
    Length    uint64 // length of regular file

    Parent    Ino  // inode of parent; 0 means tracked by parentKey (for hardlinks)
    Full      bool // the attributes are completed or not
    KeepCache bool // whether to keep the cached page or not

    AccessACL  uint32 // access ACL id (identical ACL rules share the same access ACL ID.)
    DefaultACL uint32 // default ACL id (default ACL and the access ACL share the same cache and store)
}
```

其中几个需要说明的字段：

- Atime/Atimensec：参考 [`--atime-mode`](../reference/command_reference.mdx#mount-metadata-options)
- Nlink：
  - 目录文件：初始值为 2（'.' 和 '..'），每有一个子目录 Nlink 值加 1
  - 其他文件：初始值为 1，每创建一个硬链接 Nlink 值加 1
- Length：
  - 目录文件：固定为 4096
  - 软链接（symbolic link）文件：为链接指向路径的字符串长度
  - 其他文件：为文件实际内容的长度

此结构一般会编码成二进制格式保存在元数据引擎中。

#### Edge

记录文件树中每条边的信息，具体为：

```
parentInode, name -> type, inode
```

其中 parentInode 是父目录的 inode 号，其他分别为子文件的名称、类型和 inode 号。

#### LinkParent

记录部分文件的父目录。绝大部分文件的父目录记在其属性的 Parent 字段中；但对于创建过硬链接的文件，其父目录可能有多个，此时会将 Parent 字段置 0，同时独立记录其所有父目录 inodes，具体为：

```
inode -> parentInode, links
```

其中 links 是 parentInode 的计数，因为一个目录中可以创建多个硬链接，这些硬连接共享 inode。

#### Chunk

记录每个 Chunk 的信息，具体为：

```
inode, index -> []Slices
```

其中 inode 是此 Chunk 所属文件的 inode 号，index 是其在这个文件所有 Chunks 中序号，从 0 开始。Chunk 值内容为一个 Slices 数组，每个 Slice 代表一段客户端写入的数据，并且按写入时间顺序 append 到这个数组中。当不同 Slices 之间有重叠时，以后加入的 Slice 为准。Slice 的具体结构为：

```go
type Slice struct {
    Pos  uint32 // Slice 在 Chunk 中的偏移位置
    ID   uint64 // Slice 的 ID，全局唯一
    Size uint32 // Slice 的总大小
    Off  uint32 // 有效数据在此 Slice 中的偏移位置
    Len  uint32 // 有效数据在此 Slice 中的大小
}
```

此结构会编码成二进制格式保存，占 24 个字节。

#### SliceRef

记录 Slice 的引用计数，具体为：

```
sliceId, size -> refs
```

由于绝大部分 Slice 的引用计数均为 1，为减少数据库中相关 entry 数量，在 Redis 和 TKV 中以实际值减 1 作为存储的计数值。这样，大部分的 Slice 对应 refs 值为 0，则不必在数据库中创建相关 entry。

#### Symlink

记录软链接文件的指向位置，具体为：

```
inode -> target
```

#### Xattr

记录文件相关的扩展属性（Key-Value 对），具体为：

```
inode, key -> value
```

#### Flock

记录文件相关的 BSD locks（flock），具体为：

```
inode, sid, owner -> ltype
```

其中 `sid` 为客户端会话 ID，`owner` 为一串数字，通常与进程相关联；`ltype` 为锁类型，可以为 'R' 或者 'W'。

#### Plock

记录文件相关的 POSIX record locks（fcntl），具体为：

```
inode, sid, owner -> []plockRecord
```

这里 plock 是一种更细粒度的锁，可以只锁定文件中的某一片段：

```go
type plockRecord struct {
    ltype uint32 // 锁类型
    pid   uint32 // 进程 ID
    start uint64 // 锁起始位置
    end   uint64 // 锁结束位置
}
```

此结构会编码成二进制格式保存，占 24 个字节。

#### DelFiles

记录待清理的文件列表。由于文件的数据清理是一个异步且可能长耗时的操作，可能被其他因素中断，因此会由此列表进行跟踪：

```
inode, length -> expire
```

其中 length 为文件长度，expire 为文件被删除的时间。

#### DelSlices

记录延迟删除的 Slices。当回收站功能开启时，因 Slice Compaction 功能删除的旧 Slices 会被保留与回收站配置相同的时间，以被在必要时可用来恢复数据。其内容为：

```
sliceId, deleted -> []slice
```

其中 sliceId 为 compact 后新 Slice 的 ID，deleted 为 compact 完成的时间戳，映射值为被 compacted 的所有旧 slice 列表，每个 slice 仅编码了 ID 和 size 信息：

```go
type slice struct {
    ID   uint64
    Size uint32
}
```

此结构会编码成二进制格式保存，占 12 个字节。

#### Sustained

记录会话中需临时保留的文件列表。当文件被删除时若其仍处于打开状态，则不能立即清理数据，而需要暂时保留直至其被关闭。

```
sid -> []inode
```

其中 `sid` 为会话 ID，映射值为暂时未删除的文件 inodes 列表。

### Redis

Redis 中 Key 的通用格式为 `${prefix}${JFSKey}`，其中：

- 在 Redis 非集群模式下 prefix 为空字符串，在集群模式中是一个大括号括起来的数据库编号，如 "{10}"
- JFSKey 是指 JuiceFS 不同数据结构的 Key，具体列举在后续小节中

在 Redis 的 Keys 中，如无特殊说明整数（包括 inode 号）都以十进制字符串表示。

#### Setting {#redis-setting}

- Key：`setting`
- Value Type：String
- Value：JSON 格式的文件系统格式化信息

#### Counter

- Key：计数器名称
- Value Type：String
- Value：计数器的值，实际均为整数

#### Session

- Key：`allSessions`
- Value Type：Sorted Set
- Value：所有连接此文件系统的非只读会话。在 Set 中：
  - Member：会话 ID
  - Score：此会话超时的时间点

#### SessionInfo

- Key：`sessionInfos`
- Value Type：Hash
- Value：所有非只读会话的基本元信息。在 Hash 中：
  - Key：会话 ID
  - Value：JSON 格式的会话信息

#### Node {#redis-node}

- Key：`i${inode}`
- Value Type：String
- Value：二进制编码的文件属性

#### Edge {#redis-edge}

- Key：`d${inode}`
- Value Type：Hash
- Value：此目录下的所有目录项。在 Hash 中：
  - Key：文件名称
  - Value：二进制编码的文件类型和 inode 号

#### LinkParent

- Key：`p${inode}`
- Value Type：Hash
- Value：此文件的所有父目录 inodes。在 Hash 中：
  - Key：父目录 inode
  - Value：此父目录 inode 的计数

#### Chunk {#redis-chunk}

- Key：`c${inode}_${index}`
- Value Type：list
- Value：Slices 列表，每个 Slice 均以二进制编码，各占 24 个字节

#### SliceRef {#sliceref}

- Key：`sliceRef`
- Value Type：Hash
- Value：所有需记录的 Slices 的计数值。在 Hash 中：
  - Key：`k${sliceId}_${size}`
  - Value：此 Slice 的引用计数值减 1（若引用计数为 1，则一般不创建对应 entry）

#### Symlink

- Key：`s${inode}`
- Value Type：String
- Value：符号链接指向的路径

#### Xattr

- Key：`x${inode}`
- Value Type：Hash
- Value：此文件的所有扩展属性。在 Hash 中：
  - Key：扩展属性名称
  - Value：扩展属性值

#### Flock

- Key：`lockf${inode}`
- Value Type：Hash
- Value：此文件的所有 flocks。在 Hash 中：
  - Key：`${sid}_${owner}`，owner 以十六进制表示
  - Value：锁类型，可能为 'R' 或者 'W'

#### Plock {#redis-plock}

- Key：`lockp${inode}`
- Value Type：Hash
- Value：此文件的所有 plocks。在 Hash 中：
  - Key：`${sid}_${owner}`，owner 以十六进制表示
  - Value：字节数组，其中每 24 字节对应一个 [plockRecord](#plock)

#### DelFiles

- Key：`delfiles`
- Value Type：Sorted Set
- Value：所有待清理的文件列表。在 Set 中：
  - Member：`${inode}:${length}`
  - Score：此文件加入集合的时间点

#### DelSlices {#redis-delslices}

- Key：`delSlices`
- Value Type：Hash
- Value：所有待清理的 Slices。在 Hash 中：
  - Key：`${sliceId}_${deleted}`
  - Value：字节数组，其中每 12 字节对应一个 [slice](#delslices)

#### Sustained

- Key：`session${sid}`
- Value Type：List
- Value：此会话中临时保留的文件列表。在 List 中：
  - Member：文件的 inode 号

### SQL

元数据按类型存储在不同的表中，每张表命名时以 `jfs_` 开头，跟上其具体的结构体名称组成表名，如 `jfs_node`。部分表中加入了 `bigserial` 类型的 `Id` 列作为主键，其仅用来确保每张表中都有主键，并不包含实际信息。

#### Setting {#sql-setting}

```go
type setting struct {
    Name  string `xorm:"pk"`
    Value string `xorm:"varchar(4096) notnull"`
}
```

固定只有一条 entry，Name 为 "format"，Value 为 JSON 格式的文件系统格式化信息。

#### Counter

```go
type counter struct {
    Name  string `xorm:"pk"`
    Value int64  `xorm:"notnull"`
}
```

#### Session

```go
type session2 struct {
    Sid    uint64 `xorm:"pk"`
    Expire int64  `xorm:"notnull"`
    Info   []byte `xorm:"blob"`
}
```

#### SessionInfo

没有独立的表，而是记在 `session2` 的 `Info` 列中。

#### Node {#sql-node}

```go
type node struct {
    Inode  Ino    `xorm:"pk"`
    Type   uint8  `xorm:"notnull"`
    Flags  uint8  `xorm:"notnull"`
    Mode   uint16 `xorm:"notnull"`
    Uid    uint32 `xorm:"notnull"`
    Gid    uint32 `xorm:"notnull"`
    Atime  int64  `xorm:"notnull"`
    Mtime  int64  `xorm:"notnull"`
    Ctime  int64  `xorm:"notnull"`
    Nlink  uint32 `xorm:"notnull"`
    Length uint64 `xorm:"notnull"`
    Rdev   uint32
    Parent Ino
    AccessACLId  uint32 `xorm:"'access_acl_id'"`
    DefaultACLId uint32 `xorm:"'default_acl_id'"`
}
```

大部分字段与 [Attr](#node) 相同，但时间戳使用了较低精度，其中 Atime/Mtime/Ctime 的单位为微秒。

#### Edge {#sql-edge}

```go
type edge struct {
    Id     int64  `xorm:"pk bigserial"`
    Parent Ino    `xorm:"unique(edge) notnull"`
    Name   []byte `xorm:"unique(edge) varbinary(255) notnull"`
    Inode  Ino    `xorm:"index notnull"`
    Type   uint8  `xorm:"notnull"`
}
```

#### LinkParent

没有独立的表，而是根据 `edge` 中的 `Inode` 索引找到所有 `Parent`。

#### Chunk {#sql-chunk}

```go
type chunk struct {
    Id     int64  `xorm:"pk bigserial"`
    Inode  Ino    `xorm:"unique(chunk) notnull"`
    Indx   uint32 `xorm:"unique(chunk) notnull"`
    Slices []byte `xorm:"blob notnull"`
}
```

Slices 是一段字节数组，每 24 字节对应一个 [Slice](#chunk)。

#### SliceRef

```go
type sliceRef struct {
    Id   uint64 `xorm:"pk chunkid"`
    Size uint32 `xorm:"notnull"`
    Refs int    `xorm:"notnull"`
}
```

#### Symlink

```go
type symlink struct {
    Inode  Ino    `xorm:"pk"`
    Target []byte `xorm:"varbinary(4096) notnull"`
}
```

#### Xattr

```go
type xattr struct {
    Id    int64  `xorm:"pk bigserial"`
    Inode Ino    `xorm:"unique(name) notnull"`
    Name  string `xorm:"unique(name) notnull"`
    Value []byte `xorm:"blob notnull"`
}
```

#### Flock

```go
type flock struct {
    Id    int64  `xorm:"pk bigserial"`
    Inode Ino    `xorm:"notnull unique(flock)"`
    Sid   uint64 `xorm:"notnull unique(flock)"`
    Owner int64  `xorm:"notnull unique(flock)"`
    Ltype byte   `xorm:"notnull"`
}
```

#### Plock {#sql-plock}

```go
type plock struct {
    Id      int64  `xorm:"pk bigserial"`
    Inode   Ino    `xorm:"notnull unique(plock)"`
    Sid     uint64 `xorm:"notnull unique(plock)"`
    Owner   int64  `xorm:"notnull unique(plock)"`
    Records []byte `xorm:"blob notnull"`
}
```

Records 是一段字节数组，每 24 字节对应一个 [plockRecord](#plock)。

#### DelFiles

```go
type delfile struct {
    Inode  Ino    `xorm:"pk notnull"`
    Length uint64 `xorm:"notnull"`
    Expire int64  `xorm:"notnull"`
}
```

#### DelSlices {#sql-delslices}

```go
type delslices struct {
    Id      uint64 `xorm:"pk chunkid"`
    Deleted int64  `xorm:"notnull"`
    Slices  []byte `xorm:"blob notnull"`
}
```

Slices 是一段字节数组，每 12 字节对应一个 [slice](#delslices)。

#### Sustained

```go
type sustained struct {
    Id    int64  `xorm:"pk bigserial"`
    Sid   uint64 `xorm:"unique(sustained) notnull"`
    Inode Ino    `xorm:"unique(sustained) notnull"`
}
```

### TKV

TKV（Transactional Key-Value Database）中 Key 的通用格式为 `${prefix}${JFSKey}`，其中：

- prefix 用来区分不同的文件系统，通常是 `${VolumeName}0xFD`，其中的 `0xFD` 作为特殊字节用来处理不同文件系统名称间存在包含关系的情况。此外，对于无法公用的数据库（如 BadgerDB）则直接使用空字符串作前缀
- JFSKey 是指 JuiceFS 为不同数据类型设计的 Key，具体列举在后续小节中

在 TKV 的 Keys 中，所有整数都以编码后的二进制形式存储：

- inode 和 counter value 占 8 个字节，使用**小端**编码
- SID、sliceId 和 timestamp 占 8 个字节，使用**大端**编码

#### Setting {#tkv-setting}

```
setting -> JSON 格式的文件系统格式化信息
```

#### Counter

```
C${name} -> counter value
```

#### Session

```
SE${sid} -> timestamp
```

#### SessionInfo

```
SI${sid} -> JSON 格式的会话信息
```

#### Node {#tkv-node}

```
A${inode}I -> encoded Attr
```

#### Edge {#tkv-edge}

```
A${inode}D${name} -> encoded {type, inode}
```

#### LinkParent

```
A${inode}P${parentInode} -> counter value
```

#### Chunk {#tkv-chunk}

```
A${inode}C${index} -> Slices
```

其中 index 占 4 个字节，使用**大端**编码。Slices 是一段字节数组，每 24 字节对应一个 [Slice](#chunk)。

#### SliceRef

```
K${sliceId}${size} -> counter value
```

其中 size 占 4 个字节，使用**大端**编码。

#### Symlink

```
A${inode}S -> target
```

#### Xattr

```
A${inode}X${name} -> xattr value
```

#### Flock

```
F${inode} -> flocks
```

其中 flocks 是一段字节数组，每 17 字节对应一个 flock：

```go
type flock struct {
    sid   uint64
    owner uint64
    ltype uint8
}
```

#### Plock {#tkv-plock}

```
P${inode} -> plocks
```

其中 plocks 是一段字节数组，对应的 plock 是变长的：

```go
type plock struct {
    sid     uint64
    owner     uint64
    size     uint32
    records []byte
}
```

其中 size 是 records 数组的长度，records 中每 24 字节对应一个 [plockRecord](#plock)。

#### DelFiles

```
D${inode}${length} -> timestamp
```

其中 length 占 8 个字节，使用**大端**编码。

#### DelSlices {#tkv-delslices}

```
L${timestamp}${sliceId} -> slices
```

其中 slices 是一段字节数组，每 12 字节对应一个 [slice](#delslices)。

#### Sustained

```
SS${sid}${inode} -> 1
```

这里 Value 值仅用来占位。

## 4 文件数据格式

### 根据路径查找文件

根据 [Edge](#edge) 的设计，元数据引擎中只记录了每个目录的直接子节点。当应用提供一个路径来访问文件时，JuiceFS 需要逐级查找。现在假设应用想打开文件 `/dir1/dir2/testfile`，则需要：

1. 在根目录（Inode 号固定为 1）的 Edge 结构中搜寻 name 为 "dir1" 的 entry，得到其 inode 号 N1
2. 在 N1 的 Edge 结构中搜寻 name 为 "dir2" 的 entry，得到其 inode 号 N2
3. 在 N2 的 Edge 结构中搜寻 name 为 "testfile" 的 entry，得到其 inode 号 N3
4. 根据 N3 搜寻其对应的 [Node](#node) 结构，得到该文件的相关属性

在以上步骤中，任何一步搜寻失败都会导致该路径指向的文件未找到。

### 文件数据拆分

上一节中，我们已经可以根据文件的路径找到此文件，并获取到其属性。根据文件属性中的 inode 和 size 字段，即可找到跟文件内容相关的元数据。现在假设有个文件的 inode 为 100，size 为 160 MiB，那么该文件一共有 `(size-1) / 64 MiB + 1 = 3` 个 Chunks，如下：

```
 File: |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|_ _ _ _ _ _ _ _|
Chunk: |<---        Chunk 0        --->|<---        Chunk 1        --->|<-- Chunk 2 -->|
```

在单机 Redis 中，这意味着有 3 个 [Chunk Keys](#chunk)，分别为 `c100_0`， `c100_1` 和 `c100_2`，每个 Key 对应一个 Slices 列表。这些 Slices 主要在数据写入时生成，可能互相之间有覆盖，也可能未完全填充满 Chunk。因此，在使用前需要顺序遍历这个 Slices 列表，并重新构建出最新版的数据分布，做到：

1. 有多个 Slice 覆盖的部分以最后加入的 Slice 为准
2. 没有被 Slice 覆盖的部分自动补零，用 sliceId = 0 来表示
3. 根据文件 size 截断 Chunk

现假设 Chunk 0 中有 3 个 Slices，分别为：

```go
Slice{pos: 10M, id: 10, size: 30M, off: 0, len: 30M}
Slice{pos: 20M, id: 11, size: 16M, off: 0, len: 16M}
Slice{pos: 16M, id: 12, size: 10M, off: 0, len: 10M}
```

图示如下（每个 '_' 表示 2 MiB）：

```
   Chunk: |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
Slice 10:           |_ _ _ _ _ _ _ _ _ _ _ _ _ _ _|
Slice 11:                     |_ _ _ _ _ _ _ _|
Slice 12:                 |_ _ _ _ _|

New List: |_ _ _ _ _|_ _ _|_ _ _ _ _|_ _ _ _ _|_ _|_ _ _ _ _ _ _ _ _ _ _ _|
               0      10      12         11    10             0
```

重构后的新列表包含且仅包含了此 Chunk 的最新数据分布，具体如下：

```go
Slice{pos:   0, id:  0, size: 10M, off:   0, len: 10M}
Slice{pos: 10M, id: 10, size: 30M, off:   0, len:  6M}
Slice{pos: 16M, id: 12, size: 10M, off:   0, len: 10M}
Slice{pos: 26M, id: 11, size: 16M, off:  6M, len: 10M}
Slice{pos: 36M, id: 10, size: 30M, off: 26M, len:  4M}
Slice{pos: 40M, id:  0, size: 24M, off:   0, len: 24M} // 实际这一段也会省去
```

### 数据对象

#### 对象命名 {#object-storage-naming-format}

Block 是 JuiceFS 管理数据的基本单元，其大小默认为 4 MiB，且可在文件系统格式化时配置，允许调整的区间范围为 [64 KiB, 16 MiB]。每个 Block 上传后即为对象存储中的一个对象，其命名格式为 `${fsname}/chunks/${hash}/${basename}`，其中：

- fsname 是文件系统名称
- “chunks”为固定字符串，代表 JuiceFS 的数据对象
- hash 是根据 basename 算出来的哈希值，起到一定的隔离管理的作用
- basename 是对象的有效名称，格式为 `${sliceId}_${index}_${size}`，其中：
  - sliceId 为该对象所属 Slice 的 ID，JuiceFS 中每个 Slice 都有一个全局唯一的 ID
  - index 是该对象在所属 Slice 中的序号，默认一个 Slice 最多能拆成 16 个 Blocks，因此其取值范围为 [0, 16)
  - size 是该 Block 的大小，默认情况下其取值范围为 (0, 4 MiB]

目前使用的 hash 算法有两种，以 basename 中的 sliceId 为参数，根据文件系统格式化时的 [HashPrefix](#setting) 配置选择：

```go
func hash(sliceId int) string {
    if HashPrefix {
        return fmt.Sprintf("%02X/%d", sliceId%256, sliceId/1000/1000)
    }
    return fmt.Sprintf("%d/%d", sliceId/1000/1000, sliceId/1000)
}
```

假设一个名为 `jfstest` 的文件系统中写入了一段连续的 10 MiB 数据，内部赋予的 SliceID 为 1，且未开启 HashPrefix，那么在对象存储中则会产生以下三个对象：

```
jfstest/chunks/0/0/1_0_4194304
jfstest/chunks/0/0/1_1_4194304
jfstest/chunks/0/0/1_2_2097152
```

类似地，现在以上一节的 64 MiB 的 Chunk 为例，它的实际数据分布如下：

```
 0 ~ 10M: 补零
10 ~ 16M: 10_0_4194304, 10_1_4194304(0 ~ 2M)
16 ~ 26M: 12_0_4194304, 12_1_4194304, 12_2_2097152
26 ~ 36M: 11_1_4194304(2 ~ 4M), 11_2_4194304, 11_3_4194304
36 ~ 40M: 10_6_4194304(2 ~ 4M), 10_7_2097152
40 ~ 64M: 补零
```

据此，客户端可以快速找到应用所需数据。例如，在 offset 为 10MiB 位置读取 8MiB 数据，会涉及 3 个对象，具体为：

- 从 `10_0_4194304` 读取整个对象，对应读取数据的 0 ～ 4 MiB
- 从 `10_1_4194304` 读取 0 ～ 2 MiB，对应读取数据的 4 ～ 6 MiB
- 从 `12_0_4194304` 读取 0 ～ 2 MiB，对应读取数据的 6 ～ 8 MiB

为方便直接查看文件内容对应的对象列表，JuiceFS 提供了 `info` 命令，如 `juicefs info /mnt/jfs/test.tmp`：

```bash
objects:
+------------+---------------------------------+----------+---------+----------+
| chunkIndex |            objectName           |   size   |  offset |  length  |
+------------+---------------------------------+----------+---------+----------+
|          0 |                                 | 10485760 |       0 | 10485760 |
|          0 | jfstest/chunks/0/0/10_0_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/10_1_4194304 |  4194304 |       0 |  2097152 |
|          0 | jfstest/chunks/0/0/12_0_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/12_1_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/12_2_2097152 |  2097152 |       0 |  2097152 |
|          0 | jfstest/chunks/0/0/11_1_4194304 |  4194304 | 2097152 |  2097152 |
|          0 | jfstest/chunks/0/0/11_2_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/11_3_4194304 |  4194304 |       0 |  4194304 |
|          0 | jfstest/chunks/0/0/10_6_4194304 |  4194304 | 2097152 |  2097152 |
|          0 | jfstest/chunks/0/0/10_7_2097152 |  2097152 |       0 |  2097152 |
|        ... |                             ... |      ... |     ... |      ... |
+------------+---------------------------------+----------+---------+----------+
```

表中空的 objectName 表示文件空洞，读取时均为 0。可以看到，输出结果与之前分析一致。

值得一提的是，这里的 size 是 Block 中原始数据的大小，而不是对象存储中实际对象的大小。默认情况下，原始数据拆分后直接写到对象存储，此时 size 与对象大小是相等的。但当开启了数据压缩或数据加密功能后，实际对象的大小会发生变化，此时其与 size 很可能不再相同。

#### 数据压缩

在文件系统格式化时可以通过 `--compress <value>` 参数配置压缩算法（支持 LZ4 和 zstd），使得此文件系统的所有数据 Block 会经过压缩后再上传到对象存储。此时对象名称仍与默认配置相同，且内容为原始数据经压缩算法后的结果，不携带任何其它元信息。因此，文件[文统格式化信息](#setting)中的压缩算法不允许修改，否则会导致读取已有数据失败。

#### 数据加密

在文件系统格式化时可以通过 `--encrypt-rsa-key <value>` 参数配置 RSA 私钥以开启[静态数据加密](../security/encryption.md)功能，使得此文件系统的所有数据 Block 会经过加密后再上传到对象存储。此时对象名称仍与默认配置相同，内容为一段 header 加上数据经加密算法后的结果。这段 header 里记录了用来解密的对称密钥以及随机种子，而对称密钥本身又经过 RSA 私钥加密。因此，文件[文统格式化信息](#setting)中的 RSA 私钥目前不允许修改，否则会导致读取已有数据失败。

:::note 备注
若同时开启压缩和加密，原始数据会先压缩再加密后上传到对象存储。
:::


================================================
FILE: docs/zh_cn/faq.md
================================================
---
title: 常见问题（FAQ）
slug: /faq
---

## 文档没能解答我的疑问

请首先尝试使用「Ask AI」功能（右下角），如果 AI 助手的回答有帮到你或者给了你错误的回答，欢迎在回答里给出你的反馈。或者使用文档搜索功能（右上角），尝试用不同的关键词进行检索。

如果以上方法依然未能解决你的疑问，可以加入 [JuiceFS 开源社区](https://juicefs.com/zh-cn/community)以寻求帮助。

## 一般问题

### JuiceFS 与 XXX 的区别是什么？

请查看[「同类技术对比」](introduction/comparison/juicefs_vs_alluxio.md)文档了解更多信息。

### 怎么升级 JuiceFS 客户端？

首先请卸载 JuiceFS 文件系统，然后使用新版本的客户端重新挂载。

### JuiceFS 的日志在哪里？

不同类型的 JuiceFS 客户端获取日志的方式也不同，详情请参考[「客户端日志」](administration/fault_diagnosis_and_analysis.md#client-log)文档。

### JuiceFS 是否可以直接读取对象存储中已有的文件？

不可以，JuiceFS 是一个用户态文件系统，虽然它通常使用对象存储作为数据存储层，但它并不是一般意义上的对象存储访问工具。可以查看[技术架构](introduction/architecture.md)文档了解详情。

如果你希望把对象存储 Bucket 中已有数据迁移到 JuiceFS，可以使用 [`juiceFS sync`](guide/sync.md)。

### 如何将多台服务器组合成一个 JuiceFS 文件系统来使用？

不可以，虽然 JuiceFS 支持使用本地磁盘或 SFTP 作为底层存储，但是它并不干预底层存储的逻辑结构管理。如果你希望把多台服务器的存储空间整合起来，可以考虑使用 MinIO 或 Ceph 创建对象存储集群，然后在其之上创建 JuiceFS 文件系统。

## 元数据相关问题

### 支持哨兵或者集群模式的 Redis 作为 JuiceFS 的元数据引擎吗？

支持，另外这里还有一篇 Redis 作为 JuiceFS 元数据引擎的[最佳实践文档](administration/metadata/redis_best_practices.md)可供参考。

## 对象存储相关问题

### 为什么不支持某个对象存储？

已经支持了绝大部分对象存储，参考这个[列表](reference/how_to_set_up_object_storage.md#supported-object-storage)。如果它跟 S3 兼容的话，也可以当成 S3 来使用。否则，请创建一个 issue 来增加支持。

### 为什么我在挂载点删除了文件，但是对象存储占用空间没有变化或者变化很小？

第一个原因是你可能开启了回收站特性。为了保证数据安全回收站默认开启，删除的文件其实被放到了回收站，实际并没有被删除，所以对象存储大小不会变化。回收站的保留时间可以通过 `juicefs format` 指定或者通过 `juicefs config` 修改。请参考[「回收站」](security/trash.md)文档了解更多信息。

第二个原因是 JuiceFS 是异步删除对象存储中的数据，所以对象存储的空间变化会慢一点。如果你需要立即清理对象存储中需要被删除的数据，可以尝试运行 [`juicefs gc`](reference/command_reference.mdx#gc) 命令。

### 关于 JuiceFS 的异步删除，具体流程是怎样的？

* ​**当未开启回收站时：​**
  - 系统会检查文件是否被其他程序使用：
    * 如果文件正在被使用，会标记为**”暂缓删除 (`sustained`)**"，等程序关闭文件后再处理
    * 如果文件没有被使用，会标记为**待删除 (`delfile`)**，尝试将其放入**删除队列 (`maxDeleting`)**
  
* ​**当开启回收站时：​**
  - 系统会在回收站中按照**当前时间（精确到小时）​** 创建子目录（如`2024-01-15-14`）
  - 待删除文件移动到对应时间目录中：
    * ​**所有 chunk 和 slice 数据均保持完整**
    * ​**仅元数据**中的父目录指向发生变化
    * 文件名会被**重新编码**以避免冲突
  - 后台任务根据保留天数清理过期文件：
    * 从**最老的目录开始**逐个清理
    * 方法：打上**待删除 (`delfile`)**，放入**删除队列 (`maxDeleting`)**
  
* ​**删除队列处理流程（异步清理）：​**
  1. ​**查找文件对应的所有 chunk 并删除**
  2. 删除 chunk 时会**减少其 slice 的引用计数**
  3. 当 slice 的引用计数减为零，成为 ​**`Pending Deleted Slices`**
  4. 后台清理对象存储中的这些数据片段

![JuiceFS-delete-file](./images/juicefs-delete-file.svg)

* 删除队列是有容量限制的，如果同时删除的文件过多，队列满后删除请求会先返回。然后由一个每小时工作一次的后台清理任务继续清理。它查找所有标记为**待删除 (`delfile`)**的文件，对其进行清理，清理方法和删除队列中的文件一致。
* 如果配置了 NoBGJob，每小时间隔的后台定时清理任务和回收站清理任务都会被禁用，删除文件后需要手动去回收站中进一步清理。
* 一种特殊情况是，当你手动直接删除回收站里的文件时，它可以确保以同步的方式插入到删除队列中，相对较快地回收对象存储空间，但是后续清理 chunk 的动作仍然是异步的。
* 关于 slice 引用计数：删除 chunk 和碎片整理 compact 会减少相关 slice 的引用计数，clone 和 copyFileRange 则会增加相关 slice 的引用计数。

### 为什么文件系统数据量与对象存储占用空间存在差异？ {#size-inconsistency}

* [JuiceFS 随机写](#random-write)会产生文件碎片，因此对象存储的占用空间大部分情况下是大于等于实际大小的，尤其是短时间内进行大量的覆盖写产生许多文件碎片后，这些碎片仍旧占用着对象存储的空间。不过也不必担心，因为在每次读／写文件的时候都会检查，并在后台任务进行该文件相关碎片的整理工作。你可以通过 [`juicefs gc —-compact -—delete`](./reference/command_reference.mdx#gc) 命令手动触发合并与回收。
* 如果开启了[「回收站」](./security/trash.md)功能，被删除的文件不会立刻清理，而是在回收站内保留指定时间后，才进行清理删除。
* 碎片被合并以后，失效的旧碎片也会在回收站中进行保留（但对用户不可见），过期时间也遵循回收站的设置。如果想要清理这些碎片，阅读[回收站和文件碎片](./security/trash.md#gc)。
* 如果文件系统开启了压缩功能（也就是 [`format`](./reference/command_reference.mdx#format) 命令的 `--compress` 参数，默认不开启），那么对象存储上存储的对象有可能比实际文件大小更小（取决于不同类型文件的压缩比）。
* 根据所使用对象存储的[存储类型](reference/how_to_set_up_object_storage.md#storage-class)不同，云服务商可能会针对某些存储类型设置最小计量单位。例如阿里云 OSS 低频访问存储的[最小计量单位](https://help.aliyun.com/document_detail/173534.html)是 64KB，如果单个文件小于 64KB 也会按照 64KB 计算。
* 对于自建对象存储，例如 MinIO，实际占用大小也受到[存储级别](https://github.com/minio/minio/blob/master/docs/erasure/storage-class/README.md)设置的影响。

### JuiceFS 支持使用对象存储中的某个目录作为 `--bucket` 选项的值吗？

到 JuiceFS 1.0 为止，还不支持该功能。

### JuiceFS 支持访问对象存储中已经存在的数据吗？

到 JuiceFS 1.0 为止，还不支持该功能。

### 一个文件系统可以绑定多个不同的对象存储吗（比如同时用 Amazon S3、GCS 和 OSS 组成一个文件系统）？

不支持。但在创建文件系统时可以设定关联同一个对象存储的多个 bucket，从而解决单个 bucket 对象数量限制的问题，例如，可以为一个文件系统关联多个 S3 Bucket。具体请参考 [`--shards`](./reference/command_reference.mdx#format) 选项的说明。

## 性能相关问题

### JuiceFS 的性能如何？

JuiceFS 是一个分布式文件系统，元数据访问的延时取决于挂载点到服务端之间 1 到 2 个网络来回（通常 1-3 ms），数据访问的延时取决于对象存储的延时 (通常 20-100 ms)。顺序读写的吞吐量可以到 50MiB/s 至 2800MiB/s（查看 [fio 测试结果](benchmark/fio.md)），取决于网络带宽以及数据是否容易被压缩。

JuiceFS 内置多级缓存（主动失效），一旦缓存预热好，访问的延时和吞吐量非常接近单机文件系统的性能（FUSE 会带来少量的开销）。

### JuiceFS 支持随机读写吗？原理如何？ {#random-write}

支持，包括通过 mmap 等进行的随机读写。目前 JuiceFS 主要是对顺序读写进行了大量优化，对随机读写的优化也在进行中。如果想要更好的随机读性能，建议关闭压缩（[`--compress none`](reference/command_reference.mdx#format)）。

JuiceFS 不将原始文件存入对象存储，而是将其按照某个大小（默认为 4MiB）拆分为 N 个数据块（Block）后，上传到对象存储，然后将数据块的 ID 存入元数据引擎。随机写的时候，逻辑上是要覆盖原本的内容，实际上是把**要覆盖的数据块**的元数据标记为旧数据，同时只上传随机写时产生的**新数据块**到对象存储，并将**新数据块**对应的元数据更新到元数据引擎中。

当读取被覆盖部分的数据时，根据**最新的元数据**，从随机写时上传的**新数据块**读取即可，同时**旧数据块**可能会被后台运行的垃圾回收任务自动清理。这样就将随机写的复杂度转移到读的复杂度上。

详见[「内部实现」](development/internals.md)与[「读写请求处理流程」](introduction/io_processing.md)。

### 怎么快速地拷贝大量小文件到 JuiceFS？

请在挂载时加上 [`--writeback` 选项](reference/command_reference.mdx#mount-data-cache-options)，它会先把数据写入本机的缓存，然后再异步上传到对象存储，会比直接上传到对象存储快很多倍。

请查看[「客户端写缓存」](guide/cache.md#client-write-cache)了解更多信息。

### JuiceFS 支持分布式缓存吗？

企业版支持，详见[「分布式缓存」](https://juicefs.com/docs/zh/cloud/guide/distributed-cache)。

## 访问相关问题

### 为什么同名用户在主机 X 上有权限访问 JuiceFS 的文件，在主机 Y 上访问该文件却没有权限？

虽然用户在主机 X 和主机 Y 上的用户名相同，但各自对应的 UID 或 GID 不相同。你可以使用 `id` 命令来查看用户的具体 UID 和 GID：

```bash
$ id alice
uid=1201(alice) gid=500(staff) groups=500(staff)
```

阅读文档[「多主机间同步账户」](administration/sync_accounts_between_multiple_hosts.md)解决这个问题。

### JuiceFS 除了挂载外还支持哪些方式访问数据？

除了挂载外，还支持以下几种方式：

- Kubernetes CSI 驱动：通过 Kubernetes CSI 驱动的方式将 JuiceFS 作为 Kubernetes 集群的存储层，详情请参考[「JuiceFS CSI 驱动」](deployment/how_to_use_on_kubernetes.md)。
- Hadoop Java SDK：方便在 Hadoop 体系中使用兼容 HDFS 接口的 Java 客户端访问 JuiceFS。详情请参考[「Hadoop 使用 JuiceFS」](deployment/hadoop_java_sdk.md)。
- S3 网关：通过 S3 协议访问 JuiceFS，详情请参考[「配置 JuiceFS S3 网关」](./guide/gateway.md)。
- Docker Volume 插件：在 Docker 中方便使用 JuiceFS 的方式，详情请参考[「Docker 使用 JuiceFS」](deployment/juicefs_on_docker.md)。
- WebDAV 网关：通过 WebDAV 协议访问 JuiceFS

### JuiceFS S3 网关支持多用户管理等高级功能吗？

JuiceFS 内置的 `gateway` 从 1.2 版本开始支持多用户管理等高级功能。

### JuiceFS 目前有 SDK 可以使用吗？

截止到 JuiceFS 1.0 发布，社区有两个 SDK，一个是 Juicedata 官方维护的 HDFS 接口高度兼容的 [Java SDK](deployment/hadoop_java_sdk.md)，另一个是由社区用户维护的 [Python SDK](https://github.com/megvii-research/juicefs-python)。


================================================
FILE: docs/zh_cn/getting-started/for_distributed.md
================================================
---
sidebar_position: 3
description: 本文将指导你使用基于云的对象存储和数据库，构建一个具有分布式和共享访问能力的 JuiceFS 文件系统。
---

# 分布式模式

上一篇文档[「JuiceFS 单机模式快速上手指南」](./standalone.md)通过采用「对象存储」和「SQLite」数据库的组合，实现了一个可以在任意主机上挂载的文件系统。得益于对象存储是可以被网络上任何有权限的计算机访问的特点，我们只需要把 SQLite 数据库文件复制到任何想要访问该存储的计算机，就可以实现在不同计算机上访问同一个 JuiceFS 文件系统。

很显然，想要依靠在计算机之间复制 SQLite 数据库的方式进行文件系统共享，虽然可行，但文件的实时性是得不到保证的。受限于 SQLite 这种单文件数据库无法被多个计算机同时读写访问的情况，为了能够让一个文件系统可以在分布式环境中被多个计算机同时挂载读写，我们需要采用支持通过网络访问的数据库，比如 Redis、PostgreSQL、MySQL 等。

本文以上一篇文档为基础，进一步将数据库从单用户的「SQLite」替换成多用户的「云数据库」，从而实现可以在网络上任何一台计算机上进行挂载读写的分布式文件系统。

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114215360666958&bvid=BV1kVoCYGEfo&cid=29039723061&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 基于网络的数据库

这里所谓的「基于网络的数据库」是指允许多个用户通过网络同时访问的数据库，从这个角度出发，可以简单的把数据库分成：

1. **单机数据库**：数据库是单个文件，通常只能单机访问，如 SQLite，Microsoft Access 等；
2. **基于网络的数据库**：数据库通常是复杂的多文件结构，提供基于网络的访问接口，支持多用户同时访问，如 Redis、PostgreSQL 等。

JuiceFS 目前支持的基于网络的数据库有：

- **键值数据库**：Redis、TiKV、etcd、FoundationDB
- **关系型数据库**：PostgreSQL、MySQL、MariaDB

不同的数据库性能和稳定性表现也各不相同，比如 Redis 是内存型键值数据库，性能极为出色，但可靠性相对较弱。PostgreSQL 是关系型数据库，相比之下性能没有内存型强悍，但它的可靠性要更强。

有关数据库选择方面的内容，我们会专门编写文档进行介绍。

## 云数据库

云计算平台通常都有种类丰富的云数据库提供，比如 Amazon RDS 提供各类关系型数据库的版本，Amazon ElastiCache 提供兼容 Redis 的内存型数据库产品。经过简单的初始化设置就可以创建出多副本、高可用的数据库集群。

当然，如果愿意，你可以自己在服务器上搭建数据库。

简单起见，这里以阿里云数据库 Redis 版为例介绍。对于基于网络的数据库来说，最基本的是以下 2 项信息：

1. **数据库地址**：数据库的访问地址，云平台可能会针对内外网提供不同的链接；
2. **用户名和密码**：用于访问数据库时的身份验证信息。

## 上手实践

### 1. 安装客户端

在所有需要挂载文件系统的计算机上安装 JuiceFS 客户端，详情参照[「安装」](installation.md)。

### 2. 准备对象存储

以下是以阿里云 OSS 为例的伪样本，你可以改用其他对象存储，详情参考 [JuiceFS 支持的存储](../reference/how_to_set_up_object_storage.md#supported-object-storage)。

- **Bucket Endpoint**：`https://myjfs.oss-cn-shanghai.aliyuncs.com`
- **Access Key ID**：`ABCDEFGHIJKLMNopqXYZ`
- **Access Key Secret**：`ZYXwvutsrqpoNMLkJiHgfeDCBA`

### 3. 准备数据库

以下是以阿里云数据库 Redis 版为例的伪样本，你可以改用其他类型的数据库，详情参考 [JuiceFS 支持的数据库](../reference/how_to_set_up_metadata_engine.md)。

- **数据库地址**：`myjfs-sh-abc.redis.rds.aliyuncs.com:6379`
- **数据库用户名**：`tom`
- **数据库密码**：`mypassword`

在 JuiceFS 中使用 Redis 数据库的格式如下：

```
redis://<username>:<password>@<Database-IP-or-URL>:6379/1
```

:::tip 提示
Redis 6.0 之前的版本没有用户名，请省略 URL 中的 `<username>` 部分，例如 `redis://:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1`（请注意密码前面的冒号是分隔符，需要保留）。
:::

### 4. 创建文件系统

以下命令使用「对象存储」和「Redis」数据库的组合创建了一个支持跨网络、多机同时挂载、共享读写的文件系统。

```shell
juicefs format \
    --storage oss \
    --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \
    --access-key ABCDEFGHIJKLMNopqXYZ \
    --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \
    redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    myjfs
```

文件系统创建完成后，终端将返回类似下面的内容：

```shell
2021/12/16 16:37:14.264445 juicefs[22290] <INFO>: Meta address: redis://@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1
2021/12/16 16:37:14.277632 juicefs[22290] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/12/16 16:37:14.281432 juicefs[22290] <INFO>: Ping redis: 3.609453ms
2021/12/16 16:37:14.527879 juicefs[22290] <INFO>: Data uses oss://myjfs/myjfs/
2021/12/16 16:37:14.593450 juicefs[22290] <INFO>: Volume is formatted as {Name:myjfs UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:oss Bucket:https://myjfs AccessKey:ABCDEFGHIJKLMNopqXYZ SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

:::info
文件系统创建完毕以后，包含对象存储密钥等信息会完整的记录到数据库中。JuiceFS 客户端只要拥有数据库地址、用户名和密码信息，就可以挂载读写该文件系统。也正因此，JuiceFS 客户端没有本地配置文件（作为对比，JuiceFS 云服务用 [`juicefs auth`](https://juicefs.com/docs/zh/cloud/reference/commands_reference/#auth) 命令进行认证、获取配置文件）。
:::

### 5. 挂载文件系统

由于这个文件系统的「数据」和「元数据」都存储在基于网络的云服务中，因此在任何安装了 JuiceFS 客户端的计算机上都可以同时挂载该文件系统进行共享读写。例如：

```shell
juicefs mount redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 ~/jfs
```

#### 数据强一致性保证

对于多客户端同时挂载读写同一个文件系统的情况，JuiceFS 提供「关闭再打开（close-to-open）」一致性保证，即当两个及以上客户端同时读写相同的文件时，客户端 A 的修改在客户端 B 不一定能立即看到。但是，一旦这个文件在客户端 A 写入完成并关闭，之后在任何一个客户端重新打开该文件都可以保证能访问到最新写入的数据，不论是否在同一个节点。

#### 调大缓存提升性能

由于「对象存储」是基于网络的存储服务，不可避免会产生访问延时。为了解决这个问题，JuiceFS 提供并默认启用了缓存机制，即划拨一部分本地存储作为数据与对象存储之间的一个缓冲层，读取文件时会异步地将数据缓存到本地存储，详情请查阅[「缓存」](../guide/cache.md)。

缓存机制让 JuiceFS 可以高效处理海量数据的读写任务，默认情况下，JuiceFS 会在 `$HOME/.juicefs/cache` 或 `/var/jfsCache` 目录设置 100GiB 的缓存。在速度更快的 SSD 上设置更大的缓存空间可以有效提升 JuiceFS 的读写性能。

你可以使用 `--cache-dir` 调整缓存目录的位置，使用 `--cache-size` 调整缓存空间的大小，例如：

```shell
juicefs mount
    --background \
    --cache-dir /mycache \
    --cache-size 512000 \
    redis://tom:mypassword@myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    ~/jfs
```

:::note 注意
JuiceFS 进程需要具有读写 `--cache-dir` 目录的权限。
:::

上述命令将缓存目录设置在了 `/mycache` 目录，并指定缓存空间为 500GiB。

#### 开机自动挂载

在 Linux 环境中，可以在挂载文件系统时通过 `--update-fstab` 选项设置自动挂载，这个选项会将挂载 JuiceFS 所需的选项添加到 `/etc/fstab` 中。例如：

:::note 注意
此特性需要使用 1.1.0 及以上版本的 JuiceFS
:::

```bash
$ sudo juicefs mount --update-fstab --max-uploads=50 --writeback --cache-size 204800 redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 <MOUNTPOINT>
$ grep <MOUNTPOINT> /etc/fstab
redis://tom:mypassword@myjfs-sh-abc.apse1.cache.amazonaws.com:6379/1 <MOUNTPOINT> juicefs _netdev,max-uploads=50,writeback,cache-size=204800 0 0
$ ls -l /sbin/mount.juicefs
lrwxrwxrwx 1 root root 29 Aug 11 16:43 /sbin/mount.juicefs -> /usr/local/bin/juicefs
```

更多请参考[「启动时自动挂载 JuiceFS」](../administration/mount_at_boot.md)。

### 6. 验证文件系统

当挂载好文件系统以后可以通过 `juicefs bench` 命令对文件系统进行基础的性能测试和功能验证，确保 JuiceFS 文件系统能够正常访问且性能符合预期。

:::info 说明
`juicefs bench` 命令只能完成基础的性能测试，如果需要对 JuiceFS 进行更完整的评估，请参考[「JuiceFS 性能评估指南」](../benchmark/performance_evaluation_guide.md)。
:::

```shell
juicefs bench ~/jfs
```

运行 `juicefs bench` 命令以后会根据指定的并发度（默认为 1）往 JuiceFS 文件系统中写入及读取 N 个大文件（默认为 1）及 N 个小文件（默认为 100），并统计读写的吞吐和单次操作的延迟，以及访问元数据引擎的延迟。

如果在验证文件系统的过程中遇到任何问题，请先参考[「故障诊断和分析」](../administration/fault_diagnosis_and_analysis.md)文档进行问题排查。

### 7. 卸载文件系统

你可以通过 `juicefs umount` 命令卸载 JuiceFS 文件系统（假设挂载点路径是 `~/jfs`）：

```shell
juicefs umount ~/jfs
```

如果执行命令后，文件系统卸载失败，提示 `Device or resource busy`：

```shell
2021-05-09 22:42:55.757097 I | fusermount: failed to unmount ~/jfs: Device or resource busy
exit status 1
```

发生这种情况，可能是因为某些程序正在读写文件系统中的文件。为了确保数据安全，你应该首先排查是哪些程序正在与文件系统中的文件进行交互（例如通过 `lsof` 命令），并尝试结束它们之间的交互动作，然后再重新执行卸载命令。

:::caution 注意
以下内容包含的命令可能会导致文件损坏、丢失，请务必谨慎操作！
:::

当然，在你能够确保数据安全的前提下，也可以在卸载命令中添加 `--force` 或 `-f` 参数，强制卸载文件系统：

```shell
juicefs umount --force ~/jfs
```


================================================
FILE: docs/zh_cn/getting-started/installation.md
================================================
---
title: 安装
sidebar_position: 1
description: 本文介绍 JuiceFS 在 Linux、macOS 和 Windows 上的安装方法，包括一键安装、编译安装和容器化安装。
---

JuiceFS 有良好的跨平台能力，支持在几乎所有主流架构的各类操作系统上运行，包括且不限于 Linux、macOS、Windows 等。

JuiceFS 客户端只有一个二进制文件，你可以下载预编译的版本直接解压使用，也可以用源代码手动编译。

## 一键安装 {#one-click-installation}

一键安装脚本适用于 Linux 和 macOS 系统，会根据你的硬件架构自动下载安装最新版 JuiceFS 客户端。

**方式一（推荐）：** 默认安装到 `/usr/local/bin`：

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

**方式二：** 如需安装到自定义位置，例如安装到 `/tmp` 目录下：

```shell
curl -sSL https://d.juicefs.com/install | sh -s /tmp
```

:::tip 提示
大多数用户应该选择**方式一**进行默认安装。只有在对安装目录有特殊要求时才使用**方式二**。
:::

## 安装预编译客户端 {#install-the-pre-compiled-client}

你可以在 [GitHub](https://github.com/juicedata/juicefs/releases) 找到最新版客户端下载地址，每个版本的下载列表中都提供了面向不同 CPU 架构和操作系统的预编译版本，请注意识别选择，例如：

| 文件名                               | 说明                                                                            |
|--------------------------------------|---------------------------------------------------------------------------------|
| `juicefs-x.y.z-darwin-amd64.tar.gz`  | 面向 Intel 芯片的 macOS 系统                                                    |
| `juicefs-x.y.z-darwin-arm64.tar.gz`  | 面向 M1 系列芯片的 macOS 系统                                                   |
| `juicefs-x.y.z-linux-amd64.tar.gz`   | 面向 x86 架构 Linux 发行版                                                      |
| `juicefs-x.y.z-linux-arm64.tar.gz`   | 面向 ARM 架构的 Linux 发行版                                                    |
| `juicefs-x.y.z-windows-amd64.tar.gz` | 面向 x86 架构的 Windows 系统                                                    |
| `juicefs-hadoop-x.y.z.jar`           | 面向 x86 和 ARM 架构的 Hadoop Java SDK（同时支持 Linux、macOS 及 Windows 系统） |

### Linux 发行版 {#linux}

以 x86 架构的 Linux 系统为例，下载文件名包含 `linux-amd64` 的压缩包，在终端依次执行以下命令。

1. 获取最新的版本号

   ```shell
   JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v')
   ```

2. 下载客户端到当前目录

   ```shell
   wget "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz"
   ```

3. 解压安装包

   ```shell
   tar -zxf "juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz"
   ```

4. 安装客户端

   ```shell
   sudo install juicefs /usr/local/bin
   ```

完成上述 4 个步骤，在终端执行 `juicefs` 命令，返回帮助信息，则说明客户端安装成功。

:::info 说明
如果终端提示 `command not found`，可能是因为 `/usr/local/bin` 不在你的系统 `PATH` 环境变量中，可以执行 `echo $PATH` 查看系统设置了哪些可执行路径，根据返回结果选择一个恰当的路径，调整并重新执行第 4 步的安装命令。
:::

#### Ubuntu PPA

JuiceFS 也提供 [PPA](https://launchpad.net/~juicefs) 仓库，可以方便地在 Ubuntu 系统上安装最新版的客户端。根据你的 CPU 架构选择对应的 PPA 仓库：

- **x86 架构**：`ppa:juicefs/ppa`
- **ARM 架构**：`ppa:juicefs/arm64`

以 x86 架构的 Ubuntu 22.04 系统为例，执行以下命令。

1. 添加 PPA 仓库：

   ```shell
   sudo add-apt-repository ppa:juicefs/ppa
   ```

2. 更新包列表：

   ```shell
   sudo apt-get update
   ```

3. 安装 JuiceFS 客户端：

   ```shell
   sudo apt-get install juicefs
   ```

#### Fedora Copr

JuiceFS 也提供 [Copr](https://copr.fedorainfracloud.org/coprs/juicedata/juicefs) 仓库，可以方便地在 Red Hat 及其衍生系统上安装最新版的客户端，目前支持的系统有：

- **Amazonlinux 2023**
- **CentOS 8, 9**
- **Fedora 37, 38, 39, rawhide**
- **RHEL 7, 8, 9**

以 Fedora 38 系统为例，执行以下命令安装客户端：

启用 Copr 仓库：

```shell
sudo dnf copr enable -y juicedata/juicefs
```

安装客户端：

```shell
sudo dnf install juicefs
```

#### Snapcraft

我们也在 [Canonical Snapcraft](https://snapcraft.io) 平台打包并发布了 [Snap 版本的 JuiceFS 客户端](https://github.com/juicedata/juicefs-snapcraft)，对于 Ubuntu 16.04 及以上版本和其他支持 Snap 的操作系统，可以直接使用以下命令安装：

```shell
sudo snap install juicefs
```

由于 Snap 是一个封闭的沙箱环境，它会影响客户端的 FUSE 挂载，执行以下命令可以解除限制。如果只需使用 WebDAV 和 Gateway 则不必执行以下命令：

```shell
sudo ln -s -f /snap/juicefs/current/juicefs /snap/bin/juicefs
```

当有新版本时，执行以下命令更新客户端：

```shell
sudo snap refresh juicefs
```

#### AUR (Arch User Repository) {#aur}

JuiceFS 也提供 [AUR](https://aur.archlinux.org/packages/juicefs) 仓库，可以方便地在 Arch Linux 及其衍生系统上安装最新版的客户端。

对于使用 Yay 包管理器的系统，执行以下命令安装客户端：

```shell
yay -S juicefs
```

:::info 说明
AUR 上存在多个 JuiceFS 客户端的打包，以下是 JuiceFS 官方维护的版本：

- [`aur/juicefs`](https://aur.archlinux.org/packages/juicefs)：是稳定编译版，安装时会拉取最新的稳定版源码并编译安装；
- [`aur/juicefs-bin`](https://aur.archlinux.org/packages/juicefs-bin)：是稳定预编译版，安装时会直接下载最新的稳定版预编译程序并安装；
- [`aur/juicefs-git`](https://aur.archlinux.org/packages/juicefs-git)：是开发版，安装时会拉取最新的开发版源码并编译安装；

:::

另外，你也可以使用 `makepkg` 手动编译安装，以 Arch Linux 系统为例：

安装依赖：

```shell
sudo pacman -S base-devel git go
```

克隆要打包的 AUR 仓库：

```shell
git clone https://aur.archlinux.org/juicefs.git
```

进入仓库目录：

```shell
cd juicefs
```

编译安装：

```shell
makepkg -si
```

### Windows 系统 {#windows}

由于 Windows 没有原生支持 FUSE 接口，首先需要下载安装 [WinFsp](https://winfsp.dev) 才能实现对 FUSE 的支持。

   :::tip 提示
   **[WinFsp](https://github.com/winfsp/winfsp)** 是一个开源的 Windows 文件系统代理，它提供了一个 FUSE 仿真层，使得 JuiceFS 客户端可以将文件系统挂载到 Windows 系统中使用。
   :::

#### 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114499784808051&bvid=BV1jtEczZEvq&cid=29939011077&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

在 Windows 系统安装 JuiceFS 有以下几种方法：

- [使用预编译的 Windows 客户端](#预编译的-windows-客户端)
- [使用 Scoop 安装](#scoop)
- [在 WSL 中使用 Linux 版客户端](#在-wsl-中使用-linux-版客户端)

#### 预编译的 Windows 客户端

JuiceFS 的 Windows 客户端为独立的二进制文件，下载并解压后即可直接运行。

以 Windows 10 为例，下载包含 `windows-amd64` 的压缩包，解压后获得 `juicefs.exe`，即为 JuiceFS 客户端。

为方便使用，可将 `juicefs.exe` 移动到 `C:\Windows\System32`，这样可在任意目录下通过命令行直接运行 `juicefs`。

如需更灵活地管理 JuiceFS 客户端，可以在 `C:\` 盘下新建 `juicefs` 文件夹，将 `juicefs.exe` 放入其中，并将 `C:\juicefs` 添加到系统环境变量 PATH。重启系统后，即可在「命令提示符」或「PowerShell」等终端中直接使用 `juicefs` 命令。

![Windows ENV path](../images/windows-path.png)

#### 使用 Scoop 安装 {#scoop}

如果你的 Windows 系统中安装了 [Scoop](https://scoop.sh)，可以使用以下命令安装最新版的 JuiceFS 客户端：

```shell
scoop install juicefs
```

#### 在 WSL 中使用 Linux 版客户端

[WSL](https://docs.microsoft.com/zh-cn/windows/wsl/about) 全称 Windows Subsystem for Linux，即 Windows 的 Linux 子系统，从 Windows 10 版本 2004 以上或 Windows 11 开始支持该功能。它可以让你在 Windows 系统中运行原生的 GNU/Linux 的大多数命令行工具、实用工具和应用程序且不会产生传统虚拟机或双启动设置开销。

详情查看「[在 WSL 中使用 JuiceFS](../tutorials/juicefs_on_wsl.md)」

### macOS 系统 {#macos}

由于 macOS 默认不支持 FUSE 接口，需要安装 [macFUSE](https://osxfuse.github.io) 才能实现 FUSE 挂载。如果 FUSE 挂载不是你的主要使用场景，则无需安装 macFUSE。通过使用 JuiceFS 的 [WebDAV](../deployment/webdav.md)、[Gateway](../guide/gateway.md)、[Python SDK](../deployment/python_sdk.md) 等访问方式也能方便地读写数据。

:::tip 提示
[macFUSE](https://github.com/osxfuse/osxfuse) 是一个开源的文件系统增强工具，它让 macOS 可以挂载第三方的文件系统，使得 JuiceFS 客户端可以将文件系统挂载到 macOS 系统中使用。
:::

#### Homebrew 安装

如果你的系统安装了 [Homebrew](https://brew.sh) 包管理器，可以执行以下命令安装 JuiceFS 客户端：

```shell
brew install juicefs
```

*请参考 [Homebrew Formulae](https://formulae.brew.sh/formula/juicefs#default) 页面了解命令详情。*

#### 预编译二进制程序

你也可以下载文件名包含 `darwin-amd64` 的二进制程序，解压后使用 `install` 命令将程序安装到系统的任意可执行路径，例如：

```shell
sudo install juicefs /usr/local/bin
```

### Docker 容器 {#docker}

对于要在 Docker 容器中使用 JuiceFS 的情况，这里提供一份构建 JuiceFS 客户端镜像的 `Dockerfile`，可以以此为基础单独构建 JuiceFS 客户端镜像或与其他应用打包在一起使用。

```dockerfile
FROM ubuntu:20.04

RUN apt update && apt install -y curl fuse && \
    apt-get autoremove && \
    apt-get clean && \
    rm -rf \
    /tmp/* \
    /var/lib/apt/lists/* \
    /var/tmp/*

RUN set -x && \
    mkdir /juicefs && \
    cd /juicefs && \
    JFS_LATEST_TAG=$(curl -s https://api.github.com/repos/juicedata/juicefs/releases/latest | grep 'tag_name' | cut -d '"' -f 4 | tr -d 'v') && \
    curl -s -L "https://github.com/juicedata/juicefs/releases/download/v${JFS_LATEST_TAG}/juicefs-${JFS_LATEST_TAG}-linux-amd64.tar.gz" \
    | tar -zx && \
    install juicefs /usr/bin && \
    cd .. && \
    rm -rf /juicefs

CMD [ "juicefs" ]
```

## 手动编译客户端 {#manually-compiling}

如果预编译的客户端中没有适用于你的版本（比如 FreeBSD），这时可以采用手动编译的方式编译适合你的 JuiceFS 客户端。

另外，手动编译客户端可以让你优先体验到 JuiceFS 开发中的各种新功能，但这需要你具备一定的软件编译相关的基础知识。

:::tip 提示
对于中国地区用户，为了加快获取 Go 模块的速度，建议通过执行 `go env -w GOPROXY=https://goproxy.cn,direct` 来将 `GOPROXY` 环境变量设置国内的镜像服务器。详情请参考：[Goproxy China](https://github.com/goproxy/goproxy.cn)。
:::

### 类 Unix 客户端

编译面向 Linux、macOS、BSD 等类 Unix 系统的客户端需要满足以下依赖：

- [Go](https://golang.org) 1.20+
- GCC 5.4+

1. 克隆源码

   ```shell
   git clone https://github.com/juicedata/juicefs.git
   ```

2. 进入源代码目录

   ```shell
   cd juicefs
   ```

3. 切换分支

   源代码默认使用 `main` 分支，你可以切换到任何正式发布的版本，比如切换到 `v1.0.0` 版本：

   ```shell
   git checkout v1.0.0
   ```

   :::caution 注意
   开发分支经常涉及较大的变化，请不要将「开发分支」编译的客户端用于生产环境。
   :::

4. 执行编译

   ```shell
   make
   ```

   编译好的 `juicefs` 二进制程序位于当前目录。

### 在 Windows 下编译

在 Windows 系统编译 JuiceFS 客户端需要安装以下依赖：

- [WinFsp](https://github.com/winfsp/winfsp)
- [Go](https://golang.org) 1.20+
- GCC 5.4+

其中，WinFsp 和 Go 直接下载安装即可。GCC 需要使用第三方提供的版本，可以使用 [MinGW-w64](https://www.mingw-w64.org) 或 [Cygwin](https://www.cygwin.com)，这里以 MinGW-w64 为例介绍。

在 [MinGW-w64 的下载页面](https://www.mingw-w64.org/downloads) 选择一个适用于 Windows 的预编译版本，比如 [mingw-builds-binaries](https://github.com/niXman/mingw-builds-binaries/releases)。下载完成后，将其解压到 `C` 盘根目录，然后在系统环境变量设置中找到 PATH 并添加 `C:\mingw64\bin` 目录，重启系统后在命令行或 PowerShell 中执行 `gcc -v` 命令，如果能看到版本信息则说明 MingGW-w64 安装成功，接下来就可以开始编译了。

1. 克隆并进入项目目录

   ```shell
   git clone https://github.com/juicedata/juicefs.git && cd juicefs
   ```

2. 复制 WinFsp 头文件

   ```shell
   mkdir "C:\WinFsp\inc\fuse"
   ```

   ```shell
   copy .\hack\winfsp_headers\* C:\WinFsp\inc\fuse\
   ```

   ```shell
   dir "C:\WinFsp\inc\fuse"
   ```

   ```shell
   set CGO_CFLAGS=-IC:/WinFsp/inc/fuse
   ```

   ```shell
   go env -w CGO_CFLAGS=-IC:/WinFsp/inc/fuse
   ```

3. 编译客户端

   ```shell
   go build -ldflags="-s -w" -o juicefs.exe .
   ```

编译好的 `juicefs.exe` 二进制程序位于当前目录。为了方便使用，可以将其移动到 `C:\Windows\System32` 目录下，这样就可以在任何地方直接使用 `juicefs.exe` 命令了。

### 在 Linux 中交叉编译 Windows 客户端

为 Windows 编译特定版本客户端的过程与[类 Unix 客户端](#类-unix-客户端)基本一致，可以直接在 Linux 系统中进行编译，但除了 `go` 和 `gcc` 必须安装以外，还需要安装 [MinGW-w64](https://www.mingw-w64.org/downloads)

安装 Linux 发行版包管理器提供的最新版本即可，例如 Ubuntu 20.04+ 可以直接安装：

```shell
sudo apt install mingw-w64
```

编译 Windows 客户端：

```shell
make juicefs.exe
```

编译好的客户端是一个名为 `juicefs.exe` 的二进制文件，位于当前目录。

### 在 macOS 中交叉编译 Linux 客户端

1. 克隆并进入项目目录

   ```shell
   git clone https://github.com/juicedata/juicefs.git && cd juicefs
   ```

2. 安装依赖

   ```shell
   brew install FiloSottile/musl-cross/musl-cross
   ```

3. 编译客户端

   ```shell
   make juicefs.linux
   ```

## 卸载客户端 {#uninstall}

JuiceFS 客户端只有一个二进制文件，只需找到程序所在位置删除即可。例如，参照本文档 Linux 系统安装的客户端，执行以下命令卸载客户端：

```shell
sudo rm /usr/local/bin/juicefs
```

你还可以通过 `which` 命令查看程序所在位置：

```shell
which juicefs
```

命令返回的路径即 JuiceFS 客户端在你系统上的安装位置。其他操作系统卸载方法依此类推。


================================================
FILE: docs/zh_cn/getting-started/standalone.md
================================================
---
sidebar_position: 2
pagination_next: getting-started/for_distributed
---

# 单机模式

JuiceFS 文件系统由[「对象存储」](../reference/how_to_set_up_object_storage.md)和[「数据库」](../reference/how_to_set_up_metadata_engine.md)共同驱动。除了对象存储，还支持使用本地磁盘、WebDAV 和 HDFS 等作为底层存储。因此，可以使用本地磁盘和 SQLite 数据库快速创建一个单机文件系统用以了解和体验 JuiceFS。

## 安装客户端

对于 Linux 发行版和 macOS 系统用户，可以使用一键安装脚本快速安装 JuiceFS 客户端：

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

其他操作系统及安装方式请查阅[「安装」](installation.md)。

不论你使用什么操作系统，当在终端输入并执行 `juicefs` 并返回了程序的帮助信息，就说明你成功安装了 JuiceFS 客户端。

## 创建文件系统 {#juicefs-format}

### 基本概念

创建文件系统使用客户端提供的 [`format`](../reference/command_reference.mdx#format) 命令，一般格式为：

```shell
juicefs format [command options] META-URL NAME
```

可见，格式化文件系统需要提供 3 种信息：

- **[command options]**：设定文件系统的存储介质，留空则**默认使用本地磁盘**作为存储介质，路径为 `"$HOME/.juicefs/local"`(darwin/macOS)，`"/var/jfs"`(Linux) 或 `"C:/jfs/local"`(Windows)；
- **META-URL**：用来设置元数据存储，即数据库相关的信息，通常是数据库的 URL 或文件路径；
- **NAME**：是文件系统的名称。

:::tip 提示
JuiceFS 支持丰富的存储介质和元数据存储引擎，查看 [JuiceFS 支持的存储介质](../reference/how_to_set_up_object_storage.md) 和 [JuiceFS 支持的元数据存储引擎](../reference/how_to_set_up_metadata_engine.md)。
:::

### 上手实践

以 Linux 系统为例，以下命令创建了一个名为 `myjfs` 的文件系统。

```shell
juicefs format sqlite3://myjfs.db myjfs
```

创建完成将返回类似下面的输出：

```shell {1,4}
2021/12/14 18:26:37.666618 juicefs[40362] <INFO>: Meta address: sqlite3://myjfs.db
[xorm] [info]  2021/12/14 18:26:37.667504 PING DATABASE sqlite3
2021/12/14 18:26:37.674147 juicefs[40362] <WARNING>: The latency to database is too high: 7.257333ms
2021/12/14 18:26:37.675713 juicefs[40362] <INFO>: Data use file:///Users/herald/.juicefs/local/myjfs/
2021/12/14 18:26:37.689683 juicefs[40362] <INFO>: Volume is formatted as {Name:myjfs UUID:d5bdf7ea-472c-4640-98a6-6f56aea13982 Storage:file Bucket:/Users/herald/.juicefs/local/ AccessKey: SecretKey: BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

从返回的信息中可以看到，该文件系统使用 SQLite 作为元数据存储引擎，数据库文件位于当前目录，文件名为 `myjfs.db`，保存了 `myjfs` 文件系统的所有信息。它构建了完善的表结构，将用作所有数据的元信息的存储。

![SQLite-info](../images/sqlite-info.png)

由于没有指定任何存储相关的选项，客户端默认使用本地磁盘作为存储介质，根据返回的信息， `myjfs` 的存储路径为 `file:///Users/herald/.juicefs/local/myjfs/`，即当前用户家目录下的 `.juicefs/local/myjfs/`。

## 挂载文件系统

### 基本概念

挂载文件系统使用客户端提供的 [`mount`](../reference/command_reference.mdx#mount) 命令，一般格式为：

```shell
juicefs mount [command options] META-URL MOUNTPOINT
```

与创建文件系统的命令类似，挂载文件系统需要提供以下信息：

- `[command options]`：用来指定文件系统相关的选项，例如：`-d` 可以实现后台挂载；
- `META-URL`：用来设置元数据存储。即数据库相关的信息，通常是数据库的 URL 或文件路径；
- `MOUNTPOINT`：指定文件系统的挂载点。

:::tip 提示
Windows 系统的挂载点（`MOUNTPOINT`）应该使用尚未占用的盘符，比如：`Z:`、`Y:`。
:::

### 上手实践

:::note 注意
由于 SQLite 是单文件数据库，挂载时要注意数据库文件的的路径，JuiceFS 同时支持相对路径和绝对路径。
:::

以下命令将 `myjfs` 文件系统挂载到 `~/jfs` 文件夹：

```shell
juicefs mount sqlite3://myjfs.db ~/jfs
```

![SQLite-mount-local](../images/sqlite-mount-local.png)

默认情况下，客户端会在前台挂载文件系统。就像你在上图中看到的那样，程序会一直运行在当前终端进程中，使用 <kbd>Ctrl</kbd> + <kbd>C</kbd> 组合键或关闭终端窗口，文件系统会被卸载。

为了让文件系统可以在后台保持挂载，你可以在挂载时指定 `-d` 或 `--background` 选项，即让客户端在守护进程中挂载文件系统：

```shell
juicefs mount sqlite3://myjfs.db ~/jfs -d
```

接下来，任何存入挂载点 `~/jfs` 的文件，都会按照 [JuiceFS 的文件存储格式](../introduction/architecture.md#how-juicefs-store-files)被拆分成特定的「数据块」并存入 `$HOME/.juicefs/local/myjfs` 目录中，相对应的「元数据」会全部存储在 `myjfs.db` 数据库中。

最后执行以下命令可以将挂载点 `~/jfs` 卸载：

```shell
juicefs umount ~/jfs
```

## 更进一步

前面介绍的内容通常只适用于快速在本地体验和了解，帮助你对 JuiceFS 的工作方式建立基本的认识。我们可以在前面内容的基础上更进一步，仍然使用 SQLite 存储元数据，把本地存储换成「对象存储」，做一个更有实用价值的方案。

### 对象存储

对象存储是一种基于 HTTP 协议的，提供简单访问 API 的网络存储服务。它的结构扁平，易于扩展，价格相对低廉，非常适合存储海量的非结构化数据。几乎所有主流的云计算平台都有提供对象存储服务，如亚马逊 S3、阿里云 OSS、Backblaze B2 等。

JuiceFS 支持几乎所有的对象存储服务，查看「[JuiceFS 支持的存储介质](../reference/how_to_set_up_object_storage.md)」。

一般来说，创建对象存储通常只需要 2 个环节：

1. 创建 **Bucket** 存储桶，拿到 Endpoint 地址；
2. 创建 **Access Key ID** 和 **Access Key Secret**，即对象存储 API 的访问密钥。

以阿里云 OSS 为例，创建好的资源大概像下面这样：

- **Bucket Endpoint**：`https://myjfs.oss-cn-shanghai.aliyuncs.com`
- **Access Key ID**：`ABCDEFGHIJKLMNopqXYZ`
- **Access Key Secret**：`ZYXwvutsrqpoNMLkJiHgfeDCBA`

:::note 注意
创建对象存储的过程各个平台会略有差别，建议查看云平台的帮助手册操作。另外，有些平台可能会针对内外网提供不同的 Endpoint 地址，由于本文要从本地访问对象存储，因此请选择使用面向外网访问的地址。
:::

### 上手实践

接下来使用 SQLite 和阿里云 OSS 对象存储创建一个 JuiceFS 文件系统：

:::note 注意
如果 `myjfs.db` 文件已经存在，请先删除它再执行以下命令。
:::

```shell
# 使用你自己所使用的对象存储信息替换下方相关参数
juicefs format --storage oss \
    --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \
    --access-key ABCDEFGHIJKLMNopqXYZ \
    --secret-key ZYXwvutsrqpoNMLkJiHgfeDCBA \
    sqlite3://myjfs.db myjfs
```

在上述命令中，数据库和文件系统名称保持不变，增加了对象存储相关的信息：

- `--storage`：设置存储类型，比如 `oss`、`s3` 等；
- `--bucket`：设置对象存储的 Endpoint 地址；
- `--access-key`：设置对象存储 API 访问密钥 Access Key ID；
- `--secret-key`：设置对象存储 API 访问密钥 Access Key Secret。

创建完成即可进行挂载：

```shell
juicefs mount sqlite3://myjfs.db ~/jfs
```

挂载命令与使用本地存储时完全一样，这是因为创建文件系统时，对象存储相关的信息已经写入了 `myjfs.db` 数据库，因此客户端不需要额外提供对象存储认证信息，也没有本地配置文件（作为对比，JuiceFS 云服务用 [`juicefs auth`](https://juicefs.com/docs/zh/cloud/reference/commands_reference/#auth) 命令进行认证、获取配置文件）。

相比使用本地磁盘，SQLite 和对象存储的组合实用价值更高。从应用的角度看，这种形式等同于将容量几乎无限的对象存储接入到了本地计算机，让你可以像使用本地磁盘那样使用云存储。

进一步的，该文件系统的所有数据都存储在云端的对象存储，因此可以把 `myjfs.db` 数据库复制到其他安装了 JuiceFS 客户端的计算机上进行挂载和读写。也就是说，任何一台计算机只要能够读取到存储了元数据的数据库，那么它就能够挂载读写该文件系统。

很显然，SQLite 这种单文件数据库很难实现被多台计算机同时访问。如果把 SQLite 改为 Redis、PostgreSQL、MySQL 等能够通过网络被多台计算机同时读写访问的数据库，那么就可以实现 JuiceFS 文件系统的分布式挂载读写。


================================================
FILE: docs/zh_cn/guide/cache.md
================================================
---
title: 缓存
sidebar_position: 3
---

对于一个由对象存储和数据库组合驱动的文件系统，缓存是本地客户端与远端服务之间高效交互的重要纽带。读写的数据可以提前或者异步载入缓存，再由客户端在后台与远端服务交互执行异步上传或预取数据。相比直接与远端服务交互，采用缓存技术可以大大降低存储操作的延时并提高数据吞吐量。

JuiceFS 提供包括元数据缓存、数据读写缓存等多种缓存机制。

:::tip 我的场景真的需要缓存吗？
数据缓存可以有效地提高随机读的性能，对于像 Elasticsearch、ClickHouse 等对随机读性能要求更高的应用，建议将缓存路径设置在速度更快的存储介质上并分配更大的缓存空间。

然而缓存能提升性能的前提是，你的应用需要反复读取同一批文件。如果你确定你的应用对数据是「读取一次，然后再也不需要」的访问模式（比如大数据的数据清洗常常就是这样），可以关闭缓存功能，省去缓存不断建立，又反复淘汰的开销。
:::

## 数据一致性 {#consistency}

分布式系统，往往需要在缓存和一致性之间进行取舍。JuiceFS 由于其元数据分离架构，需要从元数据、文件数据（对象存储）、文件数据本地缓存三方面来思考一致性问题：

对于[元数据缓存](#metadata-cache)，JuiceFS 默认的挂载设置满足「关闭再打开（close-to-open）」一致性，也就是说一个客户端修改并关闭文件之后，其他客户端重新打开这个文件都会看到最新的修改。与此同时，默认的挂载参数设置了 1 秒的内核元数据缓存，满足了一般场景的需要。但如果你的应用需要更激进的缓存设置以提升性能，可以阅读下方章节，对元数据缓存进行针对性的调优。特别地，发起修改的客户端（挂载点）能享受到更强的一致性，阅读[一致性例外](#consistency-exceptions)详细了解。

对于对象存储，JuiceFS 将文件分成一个个数据块（默认 4MiB），赋予唯一 ID 并上传至对象存储服务。文件的任何修改操作都将生成新的数据块，原有块保持不变，所以不用担心数据缓存的一致性问题，因为一旦文件被修改过了，JuiceFS 会从对象存储读取新的数据块。而老的失效数据块，也会随着[回收站](../security/trash.md)或碎片合并机制被删除，避免对象存储泄露。

[本地数据缓存](#client-read-cache)缓存也是以对象存储的数据块做为最小单元。一旦文件数据被下载到缓存盘，一致性就和缓存盘可靠性相关，如果磁盘数据发生了篡改，客户端也会读取到错误的数据。对于这种担忧，可以配置合适的 [`--verify-cache-checksum`](../reference/command_reference.mdx#mount-data-cache-options) 策略，确保缓存盘数据完整性。

## 元数据缓存 {#metadata-cache}

作为用户态文件系统，JuiceFS 元数据缓存既通过 FUSE API，以内核元数据缓存的形式进行管理，同时也直接在客户端内存中维护。

### 内核元数据缓存 {#kernel-metadata-cache}

JuiceFS 客户端可以控制这些内核元数据缓存：文件属性（attribute，包含文件名、大小、权限、修改时间等信息）、文件项（entry 和 direntry，用来区分文件和目录类型的文件），在挂载时，可以使用下方参数，通过 FUSE 控制这些元数据的缓存时间：

```shell
# 文件属性缓存时间（秒），默认为 1，提升 getattr 性能
--attr-cache=1

# 文件类型的缓存时间（秒），默认为 1，提升文件 lookup 性能
--entry-cache=1

# 目录类型文件的缓存时间（秒），默认为 1，提升目录的 lookup 性能
--dir-entry-cache=1

# 失败查询 (lookup 返回 ENOENT) 的缓存时间（秒），默认为 0，提升不存在文件或目录的 lookup 性能
--negative-entry-cache=1
```

让以上元数据默认在内核中缓存 1 秒，能显著提高 `lookup` 和 `getattr` 的性能。

需要注意，`entry` 缓存是随着文件访问逐渐建立起来的，不是一个完整列表，因此不能被 `readdir` 调用或者 `ls` 命令使用，而只对 `lookup` 调用有加速效果。这里的 `dir-entry` 含义也不等同于[「目录项」](https://www.kernel.org/doc/html/latest/filesystems/ext4/directory.html)的概念，他并不用来描述「一个目录下包含哪些文件」，而是和 `entry` 一样，都是文件，只不过对文件是否目录类型做了区分。

在实际场景中，也很少需要对 `--entry-cache` 和 `--dir-entry-cache` 进行区分设置，如果确实要精细化调优，在目录极少变动、而文件频繁变动的场景，可以令 `--dir-entry-cache` 大于 `--entry-cache`。

### 客户端内存元数据缓存 {#client-memory-metadata-cache}

JuiceFS 客户端在 `open` 操作即打开一个文件时，其文件属性会被自动缓存在客户端内存中，这里的属性缓存，不仅包含内核元数据中的文件属性比如文件大小、修改时间信息，还包含 JuiceFS 特有的属性，如[文件和 chunk、slice 的对应关系](../introduction/architecture.md#how-juicefs-store-files)。

为保证「关闭再打开（close-to-open）」一致性，`open` 操作默认需要直接访问元数据引擎，不会利用缓存。也就是说，客户端 A 的修改在客户端 B 不一定能立即看到。但是，一旦这个文件在 A 写入完成并关闭，之后在任何一个客户端重新打开该文件都可以保证能访问到最新写入的数据，不论是否在同一个节点。文件的属性缓存也不一定要通过 `open` 操作建立，比如 `tail -f` 会不断查询文件属性，在这种情况下无需重新打开文件，也能获得最新文件变动。

如果要利用上客户端内存的元数据缓存，需要设置 [`--open-cache`](../reference/command_reference.mdx#mount-metadata-cache-options)，指定缓存的有效时长。在缓存有效期间执行的 `getattr` 和 `open` 操作会从内存缓存中立即返回 slice 信息。有了这些信息，就能省去每次打开文件都重新访问元数据服务的开销。

使用 `--open-cache` 选项设置了缓存时间以后，文件系统就不再满足 close-to-open 一致性了，不过与内核元数据类似，发起修改的客户端同样能享受到客户端内存元数据缓存主动失效，其他客户端就只能等待缓存自然过期。因此为了保证文件系统语义，`--open-cache` 默认关闭。如果文件很少发生修改，或者只读场景下（例如 AI 模型训练），则推荐根据情况设置 `--open-cache`，进一步提高读性能。

作为对比，JuiceFS 商业版提供更丰富的客户端内存的元数据缓存功能，并且支持主动失效，阅读[商业版文档](https://juicefs.com/docs/zh/cloud/guide/cache/#client-memory-metadata-cache)以了解。

### 一致性例外 {#consistency-exceptions}

当文件发生变动时，发起修改的挂载点能够享受到更强的一致性，具体而言：

* 发起修改的挂载点，自身的内核元数据缓存能够主动失效。但对于多个挂载点访问、修改同一文件的情况，只有发起修改的客户端能享受到内核元数据缓存主动失效，其他客户端就只能等待缓存自然过期。
* 存在多挂载点的并发操作时，如果某个客户端删除并重新创建了同名文件，其他客户端因为内核 entry cache 可能继续使用旧文件的 inode，找不到文件或者读取到旧文件内容（开启了回收站功能），需要等待 entry cache 过期。因为已经不是相同文件，也不属于传统的 close-to-open 一致性范畴。
* 调用 `write` 成功后，挂载点自身立刻就能看到文件长度的变化（比如用 `ls -al` 查看文件大小，可能会注意到文件不断变大）——但这并不意味着修改已经成功提交，在 `flush` 成功前，是不会将这些改动同步到对象存储的，其他挂载点也看不到文件的变动。调用 `fsync, fdatasync, close` 都能触发 `flush`，让修改得以持久化、对其他客户端可见。
* 作为上一点的极端情况，如果调用 `write` 写入，并在当前挂载点观察到文件长度不断增长，但最后的 `flush` 因为某种原因失败了，比方说到达了文件系统配额上限，文件长度会立刻发生回退，比如从 10M 变为 0。这是一个容易引人误会的情况——并不是 JuiceFS 清空了你的数据，而是写入自始至终就没有成功，只是由于发起修改的挂载点能够提前预览文件长度的变化，让人误以为写入已经成功提交。
* 发起修改的挂载点，能够监听对应的文件变动（比如使用 [`fswatch`](https://emcrisostomo.github.io/fswatch/) 或者 [`Watchdog`](https://python-watchdog.readthedocs.io/en/stable)）。但范畴也仅限于该挂载点发起修改的文件，也就是说 A 修改的文件，无法在 B 挂载点进行监听。
* 目前而言，由于 FUSE 尚不支持 inotify API，所以如果你希望监听 JuiceFS 特定目录下的文件变化，请使用轮询的方式（比如 [`PollingObserver`](https://python-watchdog.readthedocs.io/en/stable/_modules/watchdog/observers/polling.html#PollingObserver)）。

## 读写缓冲区 {#buffer-size}

读写缓冲区是分配给 JuiceFS 客户端进程的一块内存，通过 [`--buffer-size`](../reference/command_reference.mdx#mount-data-cache-options) 控制着大小，默认 300（单位 MiB）。读和写产生的数据，都会途经这个缓冲区。所以缓冲区的作用非常重要，在大规模场景下遇到性能不足时，提升缓冲区大小也是常见的优化方式。

### 预读和预取 {#readahead-prefetch}

:::tip
为了准确描述 JuiceFS 客户端的工作机制，文档中会用「预读」和「预取」来特指客户端的两种不同提前下载数据、优化读性能的行为。
:::

顺序读文件时，JuiceFS 客户端会进行预读（readahead），也就是提前将文件后续的内容下载下来。事实上同样的行为也早已存在于[内核](https://www.halolinux.us/kernel-architecture/page-cache-readahead.html)：读取文件时，内核也会根据具体的读行为和预读窗口算法，来提前将文件读取到内核页缓存。考虑到 JuiceFS 是个网络文件系统，内核的预读窗口对他来说太小，无法有效提升顺序读的性能，因此在内核的预读之上，JuiceFS 客户端也会发起自己的预读，根据更激进的算法来“猜测”应用接下来要读取的数据范围，然后提前将对象存储对应的数据块下载下来。预读的窗口大小可以通过`max-readahead`参数来控制，在随机读场景中可以考虑将其设置为 0 来禁用预读。

![readahead](../images/buffer-readahead.svg)

由于 readahead 只能优化顺序读场景，因此在 JuiceFS 客户端还存在着另一种相似的机制，称作预取（prefetch）：随机读取文件某个块（Block）的一小段，客户端会异步将整个对象存储块下载下来。

![prefetch](../images/buffer-prefetch.svg)

预取的设计是基于「假如文件的某一小段被应用读取，那么文件附近的区域也很可能会被读取」的假设，对于不同的应用场景，这样的假设未必成立——如果应用对大文件进行偏移极大的、稀疏的随机读，那么不难想象，prefetch 会带来明显的读放大。因此如果你已经对应用场景的读取模式有深入了解，确认并不需要 prefetch，可以通过 [`--prefetch=0`](../reference/command_reference.mdx#mount-data-cache-options) 禁用该行为。

预读和预取分别优化了顺序读、随机读性能，也会带来一定程度的读放大，阅读[「读放大」](../administration/troubleshooting.md#read-amplification)了解更多信息。

### 写入 {#buffer-write}

调用 `write` 成功，并不代表数据被持久化，持久化是 `flush` 的工作。这一点不论对于本地文件系统，还是 JuiceFS 文件系统，都是一样的。在 JuiceFS 中，`write` 会将数据写入缓冲区，写入完毕以后，你甚至会注意到，当前挂载点已经看到文件长度有所变化，不要误会，这并不代表写入已经持久化（这点也在[一致性例外](#consistency-exceptions)话题上有更详细介绍）。总而言之，在 `flush` 来临之前，改动只存在于客户端缓冲区。应用可以显式调用 `flush`，但就算不这样做，当写入超过块大小（默认 4M），或者在缓冲区停留超过一定时间，都会触发自动 `flush`。

结合上方已经介绍过的预读，缓冲区的总体作用可以一张图表示：

![read write buffer](../images/buffer-read-write.svg)

缓冲区是读写共用的，显然「写」具有更高的优先级，这隐含着「写会影响读」的可能性。举例说明，如果对象存储的上传速度不足以支撑写入负载，会发生缓冲区拥堵：

![buffer congestion](../images/buffer-congestion.svg)

如上图所示，写入负载过大，在缓冲区中积攒了太多待写入的 Slice，侵占了缓冲区用于预读的空间，因此读文件会变慢。不仅如此，由于对象存储上传速度不足，写也可能会因为 `flush` 超时而最终失败。

### 观测和调优 {#buffer-observation}

上方小节介绍了缓冲区对读、写都有关键作用，因此在面对高并发读写场景的时候，对 `--buffer-size` 进行相应的扩容，能有效提升性能。但一味地扩大缓冲区大小，也可能产生其他的问题，比如 `--buffer-size` 过大，但对象存储上传速度不足，导致上方小节中介绍的缓冲区拥堵的情况。因此，缓冲区的大小需要结合其他性能参数一起科学地设置。

在调整缓冲区大小前，我们推荐使用 [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) 来观察当前的缓冲区用量大小，这个命令能直观反映出当前的读写性能问题。

如果希望增加顺序读速度，可以增加 `--max-readahead` 和 `--buffer-size`，来放大预读窗口，窗口内尚未下载到本地的数据块，会并发地异步下载。同时注意，单个文件的预读不会把整个缓冲区用完，限制为 1/4 到 1/2。因此如果在优化单个大文件的顺序读时发现 `juicefs stats` 中 `buf` 用量已经接近一半，说明该文件的预读额度已满，此时虽然缓冲区还有空闲，但也需要继续增加 `--buffer-size` 才能进一步提升单个大文件的预读性能。

如果你希望增加写入速度，通过调整 [`--max-uploads`](../reference/command_reference.mdx#mount-data-storage-options) 增大了上传并发度，但并没有观察到上行带宽用量有明显增加，那么此时可能就需要相应地调大 `--buffer-size`，让并发线程更容易申请到内存来工作。这个排查原理反之亦然：如果增大 `--buffer-size` 却没有观察到上行带宽占用提升，也可以考虑增大 `--max-uploads` 来提升上传并发度。

可想而知，`--buffer-size` 也控制着每次 `flush` 操作的上传数据量大小，因此如果客户端处在一个低带宽的网络环境下，可能反而需要降低 `--buffer-size` 来避免 `flush` 超时。关于低带宽场景排查请详见[「与对象存储通信不畅」](../administration/troubleshooting.md#io-error-object-storage)。

## 数据缓存 {#data-cache}

JuiceFS 对数据也提供多种缓存机制来提高性能，包括内核中的页缓存和客户端所在机器的本地缓存，以及客户端自身的内存读写缓冲区。读请求会依次尝试内核分页缓存、JuiceFS 进程的预读缓冲区、本地磁盘缓存，当缓存中没找到对应数据时才会从对象存储读取，并且会异步写入各级缓存保证下一次访问的性能。

![JuiceFS-cache](../images/juicefs-cache.png)

### 内核页缓存 {#kernel-data-cache}

对于已经读过的文件，内核会为其建立页缓存（Page Cache），下次再打开的时候，如果文件没有被更新，就可以直接从内核页缓存读取，获得最好的性能。

JuiceFS 客户端会跟踪所有最近被打开的文件，要重复打开相同文件时，它会根据该文件是否被修改决定是否可以使用内核页数据，如果文件被修改过，则对应的页缓存也将在再次打开时失效，这样保证了客户端能够读到最新的数据。

当重复读 JuiceFS 中的同一个文件时，速度会非常快，延时可低至微秒，吞吐量可以到每秒几 GiB。

### 内核回写模式 {#fuse-writeback-cache}

从 Linux 内核 3.15 开始，FUSE 支持[内核回写（writeback-cache）](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt)模式，内核会把高频随机小 IO（例如 10-100 字节）的写请求合并起来，显著提升随机写入的性能。但其副作用是会将顺序写变为随机写，严重降低顺序写的性能。开启前请考虑使用场景是否匹配。

在挂载命令通过 [`-o writeback_cache`](../reference/fuse_mount_options.md) 选项来开启内核回写模式。注意，内核回写与[「客户端写缓存」](#client-write-cache)并不一样，前者是内核中的实现，后者则发生在 JuiceFS 客户端，二者适用场景也不一样，详读对应章节以了解。

### 客户端读缓存 {#client-read-cache}

客户端会根据应用读数据的模式，自动做预读和缓存操作以提高顺序读的性能。数据会缓存到本地文件系统中，可以是基于硬盘、SSD 或者内存的任意本地文件系统。

JuiceFS 客户端会把从对象存储下载的数据，以及新上传的小于 1 个 block 大小的数据写入到缓存目录中，不做压缩和加密。如果希望保证应用程序首次访问数据的时候就能获得已缓存的性能，可以使用 [`juicefs warmup`](../reference/command_reference.mdx#warmup) 命令来对缓存数据进行预热。

在未开启 `--writeback` 时，如果缓存目录所在的文件系统无法正常工作时 JuiceFS 客户端能立刻返回错误，剔除缓存盘并降级成直接访问对象存储。但在开启 `--writeback` 的情况下，如果缓存目录所在的文件系统异常时体现为读操作卡死（如某些内核态的网络文件系统），那么 JuiceFS 也会随之一起卡住，这就要求你对缓存目录底层的文件系统行为进行调优，做到快速失败。

以下是缓存配置的关键参数（完整参数列表见 [`juicefs mount`](../reference/command_reference.mdx#mount)）：

* `--prefetch`

  并发预取 N 个块（默认 1）。所谓预取（prefetch），就是随机读取文件某个块（block）的一小段，客户端会异步将整个对象存储块下载下来。预取往往能改善随机读性能，但如果你的场景的文件访问模式无法利用到预取数据（比如 offset 跨度极大的大文件随机访问），预取会带来比较明显的读放大，可以考虑设为 0 以禁用预取特性。

  JuiceFS 还内置着另一种类似的预读机制：在顺序读时，会提前下载临近的对象存储块，这在 JuiceFS 内称为 readahead 机制，能有效提高顺序读性能。Readahead 的并发度受[「读写缓冲区」](#buffer-size)的大小影响，读写缓冲区越大并发度越高。

* `--cache-dir`

  缓存目录，默认为 `/var/jfsCache` 或 `$HOME/.juicefs/cache`。请阅读[「缓存位置」](#cache-dir)了解更多信息。

  如果急需释放磁盘空间，你可以手动清理缓存目录下的文件，缓存路径为 `<cache-dir>/<UUID>/raw/`。

* `--cache-size` 与 `--free-space-ratio`

  缓存空间大小（单位 MiB，默认 102400）与缓存盘的最少剩余空间占比（默认 0.1）。这两个参数任意一个达到阈值，均会自动触发缓存淘汰，使用的是类似于 LRU 的策略，即尽量清理较早且较少使用的缓存。

  实际缓存数据占用空间大小可能会略微超过设置值，这是因为对同样一批缓存数据，很难精确计算它们在不同的本地文件系统上所占用的存储空间，JuiceFS 累加所有被缓存对象大小时会按照 4KiB 的最小值来计算，因此与 `du` 得到的数值往往不一致。

* `--cache-partial-only`

  只缓存小文件和随机读的部分，适合对象存储的吞吐比缓存盘还高的情况。默认为 false。

  读一般有两种模式，连续读和随机读。对于连续读，一般需要较高的吞吐。对于随机读，一般需要较低的时延。当本地磁盘的吞吐反而比不上对象存储时，可以考虑启用 `--cache-partial-only`，这样一来，连续读虽然会将一整个对象块读取下来，但并不会被缓存。而随机读（例如读 Parquet 或者 ORC 文件的 footer）所读取的字节数比较小，不会读取整个对象块，此类读取就会被缓存。充分地利用了本地磁盘低时延和网络高吞吐的优势。

### 客户端写缓存 {#client-write-cache}

开启客户端写缓存能提升特定场景下的大量小文件写入性能，请详读本节了解。

客户端写缓存默认关闭，写入的数据会首先进入 JuiceFS 客户端的内存[读写缓冲区](#buffer-size)，当一个 Chunk 被写满，或者应用强制写入（调用 `close()` 或者 `fsync()`）时，才会触发数据上传对象存储。为了确保数据安全性，客户端会等数据上传完成，才提交到元数据服务。

由于默认的写入流程是「先上传，再提交」，可想而知，大量小文件写入时，这样的流程将影响写入性能。启用客户端写缓存以后，写入流程将改为「先提交，再异步上传」，写文件不会等待数据上传到对象存储，而是写入到本地缓存目录并提交到元数据服务后就立即返回，本地缓存目录中的文件数据会在后台异步上传至对象存储。

如果你的场景需要写入大量临时文件，不需要持久化和分布式访问，也可以用 [`--upload-delay`](../reference/command_reference.mdx#mount-data-cache-options) 参数来设置延缓数据上传到对象存储，如果在等待的时间内数据被应用删除，则无需再上传到对象存储，既提升了性能也节省了成本。相较于本地硬盘而言，JuiceFS 提供了后端保障，在缓存目录容量不足时依然会自动将数据上传，确保在应用侧不会因此而感知到错误。

挂载时加入 `--writeback` 参数，便能开启客户端写缓存，但在该模式下请注意：

* 本地缓存本身的可靠性与缓存盘的可靠性直接相关，如果在上传完成前本地数据遭受损害，意味着数据丢失。因此对数据安全性要求越高，越应谨慎使用。
* 待上传的文件默认存储在 `/var/jfsCache/<UUID>/rawstaging/`，只要该目录不为空，就表示还有待上传的文件。务必注意不要删除该目录下的文件，否则将造成数据丢失。
* 写缓存大小由 [`--free-space-ratio`](#client-read-cache) 控制。默认情况下，如果未开启写缓存，JuiceFS 客户端最多使用缓存目录 90% 的磁盘空间（计算规则是 `(1 - <free-space-ratio>) * 100`）。开启写缓存后会超额使用一定比例的磁盘空间，计算规则是 `(1 - (<free-space-ratio> / 2)) * 100`，即默认情况下最多会使用缓存目录 95% 的磁盘空间。
* 写缓存和读缓存共享缓存盘空间，因此会互相影响。例如写缓存占用过多磁盘空间，那么将导致读缓存的大小受到限制，反之亦然。
* 如果本地盘写性能太差，带宽甚至比不上对象存储，那么 `--writeback` 会带来更差的写性能。
* 如果缓存目录的文件系统出错，客户端则降级为同步写入对象存储，情况类似[客户端读缓存](#client-read-cache)。
* 如果节点到对象存储的上行带宽不足（网速太差），本地写缓存迟迟无法上传完毕，此时如果在其他节点访问这些文件，则会出现读错误。低带宽场景的排查请详见[「与对象存储通信不畅」](../administration/troubleshooting.md#io-error-object-storage)。

也正由于写缓存的使用注意事项较多，使用不当极易出问题，我们推荐仅在大量写入小文件时临时开启，比如：

* 解压包含大量小文件的压缩文件
* 软件编译
* 大数据任务的临时存储场景，比如 Spark shuffle

启用 `--writeback` 模式后，除了直接查看 `/var/jfsCache/<UUID>/rawstaging/` 目录，还可以通过以下命令确定文件上传进度：

```shell
# 假设挂载点为 /jfs
$ cd /jfs
$ cat .stats | grep "staging"
juicefs_staging_block_bytes 1621127168  # 待上传的数据块大小
juicefs_staging_block_delay_seconds 46116860185.95535
juicefs_staging_blocks 394  # 待上传的数据块数量
```

### 缓存位置 {#cache-dir}

取决于操作系统，JuiceFS 的默认缓存路径如下：

- **Linux**：`/var/jfsCache`
- **macOS**：`$HOME/.juicefs/cache`
- **Windows**：`%USERPROFILE%\.juicefs\cache`

对于 Linux 系统，要注意默认缓存路径要求管理员权限，普通用户需要有权使用 `sudo` 才能设置成功，例如：

```shell
sudo juicefs mount redis://127.0.0.1:6379/1 /mnt/myjfs
```

另外，可以在挂载文件系统时通过 `--cache-dir` 选项设置在当前系统可以访问的任何存储路径上。对于没有访问 `/var` 目录权限的普通用户，可以把缓存设置在用户的 `HOME` 目录中，例如：

```shell
juicefs mount --cache-dir ~/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

:::tip 提示
建议缓存目录尽量使用独立的高性能盘，不要用系统盘，也不要和其它应用共用。共用不仅会相互影响性能，还可能导致其它应用出错（例如磁盘剩余空间不足）。如果无法避免必须共用那一定要预估好其它应用所需的磁盘容量，限制缓存空间大小（`--cache-size`），避免 JuiceFS 的读缓存或者写缓存占用过多空间。
:::

#### 内存盘

如果对文件的读性能有更高要求，可以把缓存设置在内存盘上。对于 Linux 系统，通过 `df` 命令查看 `tmpfs` 类型的文件系统：

```shell
$ df -Th | grep tmpfs
文件系统         类型      容量   已用  可用   已用% 挂载点
tmpfs          tmpfs     362M  2.0M  360M    1% /run
tmpfs          tmpfs     3.8G     0  3.8G    0% /dev/shm
tmpfs          tmpfs     5.0M  4.0K  5.0M    1% /run/lock
```

其中 `/dev/shm` 是典型的内存盘，可以作为 JuiceFS 的缓存路径使用，它的容量一般是内存的一半，可以根据需要手动调整容量，例如，将缓存盘的容量调整为 32GB：

```shell
sudo mount -o size=32000M -o remount /dev/shm
```

然后使用该路径作为缓存，挂载文件系统：

```shell
juicefs mount --cache-dir /dev/shm/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

除此之外，还可以将 `--cache-dir` 选项设置为 `memory` 来直接使用进程内存作为缓存，与 `/dev/shm` 相比，好处是简单不依赖外部设备，但相应地也无法持久化，一般在测试评估的时候使用。

#### 共享目录

SMB、NFS 等共享目录也可以用作 JuiceFS 的缓存，对于局域网有多个设备挂载了相同 JuiceFS 文件系统的情况，将局域网中的共享目录作为缓存路径，可以有效缓解多个设备重复预热缓存的带宽压力。

以 SMB/CIFS 共享为例，使用 `cifs-utils` 包提供的工具挂载局域网中的共享目录：

```shell
sudo mount.cifs //192.168.1.18/public /mnt/jfscache
```

将共享目录作为 JuiceFS 缓存：

```shell
sudo juicefs mount --cache-dir /mnt/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

#### 多缓存目录

JuiceFS 支持同时设置多个缓存目录，从而解决缓存空间不足的问题，使用 `:`（Linux、macOS）或 `;`（Windows）字符分隔多个路径，例如：

```shell
sudo juicefs mount --cache-dir ~/jfscache:/mnt/jfscache:/dev/shm/jfscache redis://127.0.0.1:6379/1 /mnt/myjfs
```

当设置了多个缓存目录，或者使用多块设备作为缓存盘，`--cache-size` 选项表示所有缓存目录中的数据总大小。客户端会采用哈希策略向各个缓存路径中均匀地写入数据，无法对多块容量或性能不同的缓存盘进行特殊调优。

因此建议不同缓存目录／缓存盘的可用空间保持一致，否则可能造成不能充分利用某个缓存目录空间的情况。例如 `--cache-dir` 为 `/data1:/data2`，其中 `/data1` 的可用空间为 1GiB，`/data2` 的可用空间为 2GiB，`--cache-size` 为 3GiB，`--free-space-ratio` 为 0.1。因为缓存的写入策略是均匀写入，所以分配给每个缓存目录的最大空间是 `3GiB / 2 = 1.5GiB`，会造成 `/data2` 目录的缓存空间最大为 1.5GiB，而不是 `2GiB * 0.9 = 1.8GiB`。


================================================
FILE: docs/zh_cn/guide/clone.md
================================================
---
title: 克隆文件或目录
sidebar_position: 6
---

对指定数据进行克隆，创建克隆时不会实际拷贝对象存储数据，而是仅拷贝元数据，因此不论对多大的文件或目录进行克隆，都非常快。对于 JuiceFS，这个命令是 `cp` 更好的替代，甚至对于 Linux 客户端来说，如果所使用的内核支持 [`copy_file_range`](https://man7.org/linux/man-pages/man2/copy_file_range.2.html)，那么调用 `cp` 时，实际发生的也是同样的元数据拷贝，调用将会格外迅速。

![clone](../images/juicefs-clone.svg)

克隆结果是纯粹的元数据拷贝，实际引用的对象存储块和源文件相同，因此在各方面都和源文件一样，可以正常读写。有任何一方文件数据被实际修改时，对应的数据块变更会以写入时重定向（ROW，Redirect on Write）的方式，写入到新的数据块，并将指针指向新的数据块。而其他未经修改的文件区域，由于对象存储数据块仍然相同，所以引用关系依然保持不变。如果对快照文件进行随机写、覆盖写等操作，和普通的 JuiceFS 文件一样，快照文件也会产生文件碎片，你可以通过执行`juicefs compact`来对文件的碎片进行合并，来提升读取效率。

需要注意的是，**克隆产生的元数据，也同样占用文件系统存储空间，以及元数据引擎的存储空间**，因此对庞大的目录进行克隆操作时请格外谨慎。

```shell
juicefs clone SRC DST

# 克隆文件
juicefs clone /mnt/jfs/file1 /mnt/jfs/file2

# 克隆目录
juicefs clone /mnt/jfs/dir1 /mnt/jfs/dir2
```

## 一致性 {#consistency}

在事务一致性方面，克隆的行为如下：

- 在 `clone` 命令完成前，目标文件不可见。
- 对于文件：`clone` 命令确保原子性，即克隆后的文件始终处于正确和一致的状态。
- 对于目录：`clone` 命令对目录的原子性没有保证。在克隆过程中，如果源目录发生变化，则目标目录与源目录可能不一致。
- 同时往同一个位置创建克隆时，只会有一个成功，失败请求的会清理掉临时创建的目录树。

克隆操作是在挂载进程中进行，如果克隆命令意外退出，克隆操作可能完成或者被中断。失败或者被中断的克隆操作，`mount` 进程会尝试清理已创建好的子树，如果清理子树也失败（元数据不可用或者`mount`进程意外退出），则会导致元数据泄露和可能的对象存储泄露。此时如果源对象被删除了，则会导致其对象存储上的数据不会被释放（因为被未挂载的的子树所引用），直到使用 [`juicefs gc --delete`](../reference/command_reference.mdx#gc) 命令清理。


================================================
FILE: docs/zh_cn/guide/dir-stats.md
================================================
---
title: 目录用量统计
sidebar_position: 5
---

JuiceFS 在 v1.1.0 开始支持目录用量统计并在文件系统格式化时默认开启，旧版本 volume 迁移到新版本后默认关闭（需要[手动开启](#enable-directory-stats)）。目录用量统计可以加速 `quota`、`info` 和 `summary` 等子命令，但由于客户端会异步更新统计信息，启用后也会带来少量开销。

:::tip 提示
由于用量统计需要挂载客户端支持，请确保除所有可写入客户端已升级到 v1.1.0 以上版本再启用此特性。
:::

## 启用目录用量统计 {#enable-directory-stats}

运行 `juicefs config $URL --dir-stats` 来启用目录统计，启用以后，使用 `juicefs config $URL` 命令确认生效：

```shell
$ juicefs config redis://localhost
2023/05/31 15:56:39.721188 juicefs[30626] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/05/31 15:56:39.723284 juicefs[30626] <INFO>: Ping redis latency: 159.226µs [redis.go:3566]
{
  "Name": "myjfs",
  "UUID": "82db28de-bf5f-43bf-bba3-eb3535a86c48",
  "Storage": "file",
  "Bucket": "/root/.juicefs/local/",
  "BlockSize": 4096,
  "Compression": "none",
  "EncryptAlgo": "aes256gcm-rsa",
  "TrashDays": 1,
  "MetaVersion": 1,
  "DirStats": true
}
```

可以看到 `"DirStats": true` 代表目录用量统计已启用，我们可以尝试禁用它：

```shell
$ juicefs config redis://localhost --dir-stats=false
2023/05/31 15:59:39.046134 juicefs[30752] <INFO>: Meta address: redis://localhost [interface.go:494]
2023/05/31 15:59:39.048301 juicefs[30752] <INFO>: Ping redis latency: 171.308µs [redis.go:3566]
 dir-stats: true -> false
```

:::tip 提示
[目录配额](./quota.md#directory-quota)功能依赖目录用量统计，为目录设置配额后会自动开启目录用量统计，并且需要删除所有目录配额后才能禁用目录用量统计。
:::

## 查看目录统计 {#check-directory-stats}

运行 `juicefs info $PATH` 查看单层目录的统计用量：

```shell
$ juicefs info /mnt/jfs/pjdfstest/
/mnt/jfs/pjdfstest/ :
  inode: 2
  files: 10
   dirs: 4
 length: 43.74 KiB (44794 Bytes)
   size: 92.00 KiB (94208 Bytes)
   path: /pjdfstest
```

也可以使用 `juicefs info -r $PATH` 递归查看目录统计并汇总：

```shell
/mnt/jfs/pjdfstest/: 278                       921.0/s
/mnt/jfs/pjdfstest/: 1.6 MiB (1642496 Bytes)   5.2 MiB/s
/mnt/jfs/pjdfstest/ :
  inode: 2
  files: 278
   dirs: 37
 length: 592.42 KiB (606638 Bytes)
   size: 1.57 MiB (1642496 Bytes)
   path: /pjdfstest
```

另外你可以使用 `juicefs summary $PATH` 命令来查看各层级的目录用量：

```shell
$ ./juicefs summary /mnt/jfs/pjdfstest/
/mnt/jfs/pjdfstest/: 315                       1044.4/s
/mnt/jfs/pjdfstest/: 1.6 MiB (1642496 Bytes)   5.2 MiB/s
+------------------+---------+------+-------+
|       PATH       |   SIZE  | DIRS | FILES |
+------------------+---------+------+-------+
| /                | 1.6 MiB |   37 |   278 |
| tests/           | 1.1 MiB |   18 |   240 |
| tests/open/      | 112 KiB |    1 |    26 |
| tests/...        | 328 KiB |    7 |    71 |
| .git/            | 432 KiB |   17 |    26 |
| .git/objects/    | 252 KiB |    3 |     2 |
| ...              |  12 KiB |    0 |     3 |
+------------------+---------+------+-------+
```

:::note 说明
目录统计只计算每个目录的单层用量，如果要查看递归统计用量，需要使用 `juicefs info -r`，对于大目录，遍历汇总可能带来很大的开销。如需持续查看某些特定目录的总用量，可参考目录配额通过[设置空配额](./quota.md#limit-capacity-and-inodes-of-directory)的方式统计目录总用量。

与社区版不同，JuiceFS 企业版的目录大小已经进行了[递归统计](/docs/zh/cloud/guide/quota#file-directory-size)，可以直接用 `ls -lh` 看到递归统计的目录总大小。
:::

## 故障和修复 {#troubleshooting}

由于目录用量是异步统计，当客户端发生异常时可能会丢失部分统计值导致结果不准确。`juicefs info`、`juicefs summary` 和 `juicefs quota` 命令均配有 `--strict` 选项在严苛模式下运行以绕过目录统计（默认模式一般称为快速模式，fast mode）。

如果发现严格模式和快速模式结果不一致，考虑使用 `juicefs fsck` 命令进行诊断和修复：

```shell
$ juicefs info -r /jfs/d
/jfs/d: 1                             3.3/s
/jfs/d: 448.0 MiB (469766144 Bytes)   1.4 GiB/s
/jfs/d :
  inode: 2
  files: 1
   dirs: 1
 length: 448.00 MiB (469762048 Bytes)
   size: 448.00 MiB (469766144 Bytes)
   path: /d

$ juicefs info -r --strict /jfs/d
/jfs/d: 1                            3.3/s
/jfs/d: 1.0 GiB (1073745920 Bytes)   3.3 GiB/s
/jfs/d :
  inode: 2
  files: 1
   dirs: 1
 length: 1.00 GiB (1073741824 Bytes)
   size: 1.00 GiB (1073745920 Bytes)
   path: /d

# 检查目录 /d 的用量统计
$ juicefs fsck sqlite3://test.db --path /d --sync-dir-stat
2023/05/31 17:14:34.700239 juicefs[32667] <INFO>: Meta address: sqlite3://test.db [interface.go:494]
[xorm] [info]  2023/05/31 17:14:34.700291 PING DATABASE sqlite3
2023/05/31 17:14:34.701553 juicefs[32667] <WARNING>: usage stat of /d should be &{1073741824 1073741824 1}, but got &{469762048 469762048 1} [base.go:2010]
2023/05/31 17:14:34.701577 juicefs[32667] <WARNING>: Stat of path /d (inode 2) should be synced, please re-run with '--path /d --repair --sync-dir-stat' to fix it [base.go:2025]
2023/05/31 17:14:34.701615 juicefs[32667] <FATAL>: some errors occurred, please check the log of fsck [main.go:31]

# 修复目录 /d 的用量统计
$ juicefs fsck -v sqlite3://test.db --path /d --sync-dir-stat --repair
2023/05/31 17:14:43.445153 juicefs[32721] <DEBUG>: maxprocs: Leaving GOMAXPROCS=8: CPU quota undefined [maxprocs.go:47]
2023/05/31 17:14:43.445289 juicefs[32721] <INFO>: Meta address: sqlite3://test.db [interface.go:494]
[xorm] [info]  2023/05/31 17:14:43.445350 PING DATABASE sqlite3
2023/05/31 17:14:43.462374 juicefs[32721] <DEBUG>: Stat of path /d (inode 2) is successfully synced [base.go:2018]

# 验证
$ juicefs info -r /jfs/d
/jfs/d: 1                            3.3/s
/jfs/d: 1.0 GiB (1073745920 Bytes)   3.3 GiB/s
/jfs/d :
  inode: 2
  files: 1
   dirs: 1
 length: 1.00 GiB (1073741824 Bytes)
   size: 1.00 GiB (1073745920 Bytes)
   path: /d
```


================================================
FILE: docs/zh_cn/guide/gateway.md
================================================
---
title: S3 网关
description: JuiceFS S3 网关是 JuiceFS 的一个功能，它将 JuiceFS 文件系统以 S3 协议对外提供服务，使得应用可以通过 S3 SDK 访问 JuiceFS 上存储的文件。
sidebar_position: 5
---

JuiceFS S3 网关是 JuiceFS 支持的多种访问方式之一，它可以将 JuiceFS 文件系统以 S3 协议对外提供服务，使得应用可以通过 S3 SDK 访问 JuiceFS 上存储的文件。

## 架构与原理

在 JuiceFS 中，文件是以对象的形式[分块存储到底层的对象存储中](../introduction/architecture.md#how-juicefs-store-files)。JuiceFS 提供了 FUSE POSIX、WebDAV、S3 网关、CSI 驱动等多种访问方式，其中 S3 网关是较为常用的一种，其架构图如下：

![JuiceFS S3 Gateway architecture](../images/juicefs-s3-gateway-arch.png)

JuiceFS S3 网关功能是通过 [MinIO S3 网关](https://github.com/minio/minio/tree/ea1803417f80a743fc6c7bb261d864c38628cf8d/docs/gateway)实现的。我们利用 MinIO 的 [`object 接口`](https://github.com/minio/minio/blob/d46386246fb6db5f823df54d932b6f7274d46059/cmd/object-api-interface.go#L88)将 JuiceFS 文件系统作为 MinIO 服务器的后端存储，提供接近原生 MinIO 的使用体验，同时继承 MinIO 的许多高级功能。在这种架构中，JuiceFS 就相当于 MinIO 实例的一块本地磁盘，原理与 `minio server /data1` 命令类似。

JuiceFS S3 网关的常见的使用场景有：

- **为 JuiceFS 开放 S3 接口**：应用可以通过 S3 SDK 访问 JuiceFS 上存储的文件；
- **使用 S3 客户端**：使用 s3cmd、AWS CLI、MinIO 客户端来方便地访问和操作 JuiceFS 上存储的文件；
- **管理 JuiceFS 中的文件**：S3 网关提供了一个基于网页的文件管理器，可以在浏览器中管理 JuiceFS 中的文件；
- **集群复制**：在跨集群复制数据的场景下，作为集群的统一数据出口，避免跨区访问元数据以提升数据传输性能，详见[「使用 S3 网关进行跨区域数据同步」](../guide/sync.md#sync-across-region)

## 快速开始

启动 S3 网关需要一个已经创建完毕的 JuiceFS 文件系统，如果尚不存在，请参考[文档](../getting-started/standalone.md)来创建。下方假定元数据引擎 URL 为 `redis://localhost:6379/1`。

由于网关基于 MinIO 开发，因此需要先设置 `MINIO_ROOT_USER` 和 `MINIO_ROOT_PASSWORD` 两个环境变量，他们会成为访问 S3 API 时认证身份用的 Access Key 和 Secret Key，是拥有最高权限的管理员凭证。

```shell
export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=12345678

# Windows 用户请改用 set 命令设置环境变量
set MINIO_ROOT_USER=admin
```

注意，`MINIO_ROOT_USER` 的长度至少 3 个字符， `MINIO_ROOT_PASSWORD` 的长度至少 8 个字符，如果未能正确设置，将会遭遇类似 `MINIO_ROOT_USER should be specified as an environment variable with at least 3 characters` 的报错，注意排查。

启动 S3 网关：

```shell
# 第一个参数是元数据引擎的 URL，第二个是 S3 网关监听的地址和端口
juicefs gateway redis://localhost:6379/1 localhost:9000

# 从 v1.2 开始，S3 网关支持后台启动，追加 --background 或 -d 参数均可
# 后台运行场景下，使用 --log 指定日志输出文件路径
juicefs gateway redis://localhost:6379 localhost:9000 -d --log=/var/log/juicefs-s3-gateway.log
```

S3 Gateway 默认没有启用[多桶支持](#多桶支持)，可以添加 `--multi-buckets` 选项开启。还可以添加[其他选项](../reference/command_reference.mdx#gateway)优化 S3 网关，比如，可以将默认的本地缓存设置为 20 GiB。

```shell
juicefs gateway --cache-size 20480 redis://localhost:6379/1 localhost:9000
```

在这个例子中，我们假设 JuiceFS 文件系统使用的是本地的 Redis 数据库。当 S3 网关启用时，在**当前主机**上可以使用 `http://localhost:9000` 这个地址访问到 S3 网关的管理界面。

![S3-gateway-file-manager](../images/s3-gateway-file-manager.jpg)

如果你希望通过局域网或互联网上的其他主机访问 S3 网关，则需要调整监听地址，例如：

```shell
juicefs gateway redis://localhost:6379/1 0.0.0.0:9000
```

这样一来，S3 网关将会默认接受所有网络请求。不同的位置的 S3 客户端可以使用不同的地址访问 S3 网关，例如：

- S3 网关所在主机中的第三方客户端可以使用 `http://127.0.0.1:9000` 或 `http://localhost:9000` 进行访问；
- 与 S3 网关所在主机处于同一局域网的第三方客户端可以使用 `http://192.168.1.8:9000` 访问（假设启用 S3 网关的主机内网 IP 地址为 192.168.1.8）；
- 通过互联网访问 S3 网关可以使用 `http://110.220.110.220:9000` 访问（假设启用 S3 网关的主机公网 IP 地址为 110.220.110.220）。

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=1706122101&bvid=BV1fT421r72r&cid=27189643521&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 访问 S3 网关

各类支持 S3 API 的客户端、桌面程序、Web 程序等都可以访问 JuiceFS S3 网关。使用时请注意 S3 网关监听的地址和端口。

:::tip 提示
以下示例均为使用第三方客户端访问本地主机上运行的 S3 网关。在具体场景下，请根据实际情况调整访问 S3 网关的地址。
:::

### 使用 AWS CLI

从 [https://aws.amazon.com/cli](https://aws.amazon.com/cli) 下载并安装 AWS CLI，然后进行配置：

```bash
$ aws configure
AWS Access Key ID [None]: admin
AWS Secret Access Key [None]: 12345678
Default region name [None]:
Default output format [None]:
```

程序会通过交互式的方式引导你完成新配置的添加，其中 `Access Key ID` 与 `MINIO_ROOT_USER` 相同，`Secret Access Key` 与 `MINIO_ROOT_PASSWORD` 相同，区域名称和输出格式请留空。

之后，即可使用 `aws s3` 命令访问 JuiceFS 存储，例如：

```bash
# List buckets
$ aws --endpoint-url http://localhost:9000 s3 ls

# List objects in bucket
$ aws --endpoint-url http://localhost:9000 s3 ls s3://<bucket>
```

### 使用 MinIO 客户端

为避免兼容性问题，我们推荐采用的 mc 的版本为 RELEASE.2021-04-22T17-40-00Z，你可以在这个[地址](https://dl.min.io/client/mc/release)找到历史版本和不同架构的 mc，比如这是 amd64 架构 RELEASE.2021-04-22T17-40-00Z 版本的 mc 的[下载地址](https://dl.min.io/client/mc/release/linux-amd64/archive/mc.RELEASE.2021-04-22T17-40-00Z)

下载安装完成 mc 后添加一个新的 alias：

```bash
mc alias set juicefs http://localhost:9000 admin 12345678
```

然后，你可以通过 mc 客户端自由的在本地磁盘与 JuiceFS 存储以及其他云存储之间进行文件和文件夹的复制、移动、增删等管理操作。

```shell
$ mc ls juicefs/jfs
[2021-10-20 11:59:00 CST] 130KiB avatar-2191932_1920.png
[2021-10-20 11:59:00 CST] 4.9KiB box-1297327.svg
[2021-10-20 11:59:00 CST]  21KiB cloud-4273197.svg
[2021-10-20 11:59:05 CST]  17KiB hero.svg
[2021-10-20 11:59:06 CST] 1.7MiB hugo-rocha-qFpnvZ_j9HU-unsplash.jpg
[2021-10-20 11:59:06 CST]  16KiB man-1352025.svg
[2021-10-20 11:59:06 CST] 1.3MiB man-1459246.ai
[2021-10-20 11:59:08 CST]  19KiB sign-up-accent-left.07ab168.svg
[2021-10-20 11:59:10 CST]  11MiB work-4997565.svg
```

## 常用功能

### 多桶支持

默认情况下，`juicefs gateway` 只允许一个 bucket，bucket 名字为文件系统名字，如果需要多个桶，可以在启动时添加 `--multi-buckets`开启多桶支持，该参数将会把 JuiceFS 文件系统顶级目录下的每个子目录都导出为一个 bucket。创建 bucket 的行为在文件系统上的反映是顶级目录下创建了一个同名的子目录。

```shell
juicefs gateway redis://localhost:6379/1 localhost:9000 --multi-buckets
```

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=1056147201&bvid=BV1LH4y1A73s&cid=1620258239&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

### 保留 etag

默认 S3 网关不会保存和返回对象的 etag 信息，可以通过`--keep-etag` 开启：

```shell
juicefs gateway myjfs localhost:9000 --keep-etag
```

然后通过网关上传到 JuiceFS 的文件你就可以用 s3API 的 `head-object` 来获取 etag 了：

```shell
aws s3api --endpoint=http://localhost:9000 head-object --bucket myjfs --key test123/test.etag
{
    "AcceptRanges": "bytes",
    "LastModified": "Wed, 23 Apr 2025 00:17:16 GMT",
    "ContentLength": 7,
    "ETag": "\"d2fde576f44a6601b73201234b491904\"",
    "ContentType": "application/octet-stream",
    "Metadata": {}
}
```

这个 etag 是通过 MD5 算法生成的，并且通过 `setXattr` 设置了 key 为 `s3-tag` 的扩展属性到文件中，如果你使用 `--enable-xattr` 挂载 JuiceFS 的话也可以用 `getfattr` 来获取这个 etag：

```shell
getfattr -n s3-etag test.etag
# file: test.etag
s3-etag="d2fde576f44a6601b73201234b491904"
```

### 开启对象标签

默认不支持对象标签，可以通过`--object-tag` 开启

### 开启对象元数据 <VersionAdd>1.3</VersionAdd>

默认不支持对象元数据，可以通过 `--object-meta` 开启，[参考文档](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingMetadata.html)

### 启用虚拟主机风格请求

默认情况下，S3 网关支持格式为 `http://mydomain.com/bucket/object` 的路径类型请求。`MINIO_DOMAIN` 环境变量被用来启用虚拟主机类型请求。如果请求的 `Host` 头信息匹配 `(.+).mydomain.com`，则匹配的模式 `$1` 被用作 bucket，并且路径被用作 object.

示例：

```shell
export MINIO_DOMAIN=mydomain.com
```

### 调整 IAM 刷新时间

默认 IAM 缓存的刷新时间为 5 分钟，可以通过 `--refresh-iam-interval` 调整，该参数的值是一个带单位的时间字符串，例如 "300ms", "-1.5h" 或者 "2h45m"，有效的时间单位是 "ns"、"us" (或 "µs")、"ms"、"s"、"m"、"h"。

例如设置 1 分钟刷新：

```sh
juicefs gateway xxxx xxxx    --refresh-iam-interval 1m
```

### 多 Gateway 实例

JuiceFS 的分布式特性使得可以在多个节点上同时启动多个 S3 网关实例，这样可以提高 S3 网关的可用性和性能。在这种情况下，每个 S3 网关实例都会独立地处理请求，但是它们都会访问同一个 JuiceFS 文件系统。在这种情况下，需要注意以下几点：

1. 需要保证所有实例在启动时使用相同的用户，其 UID 和 GID 相同；
2. 节点之间 IAM 刷新时间可以不同，但是需要保证 IAM 刷新时间不要太短，以免对 JuiceFS 造成过大的压力；
3. 每个实例的监听的地址和端口可以自由设置，如果在同一台机器上启动多个实例，需要确保端口不冲突。

### 以守护进程的形式运行

S3 网关 可以通过以下配置以 Linux 守护进程的形式在后台运行。

```shell
cat > /lib/systemd/system/juicefs-gateway.service<<EOF
[Unit]
Description=Juicefs S3 Gateway
Requires=network.target
After=multi-user.target
StartLimitIntervalSec=0

[Service]
Type=simple
User=root
Environment="MINIO_ROOT_USER=admin"
Environment="MINIO_ROOT_PASSWORD=12345678"
ExecStart=/usr/local/bin/juicefs gateway redis://localhost:6379 localhost:9000
Restart=on-failure
RestartSec=60

[Install]
WantedBy=multi-user.target
EOF
```

设置进程开机自启动

```shell
systemctl daemon-reload
systemctl enable juicefs-gateway --now
systemctl status juicefs-gateway
```

检阅进程的日志

```bash
journalctl -xefu juicefs-gateway.service
```

### 在 Kubernetes 上部署 S3 网关 {#deploy-in-kubernetes}

安装需要 Helm 3.1.0 及以上版本，请参照 [Helm 文档](https://helm.sh/zh/docs/intro/install)进行安装。

```shell
helm repo add juicefs https://juicedata.github.io/charts/
helm repo update
```

Helm chart 同时支持 JuiceFS 社区版和企业版，通过填写 values 中不同的字段来区分具体使用的版本，默认的 [values](https://github.com/juicedata/charts/blob/main/charts/juicefs-s3-gateway/values.yaml) 使用了社区版 JuiceFS 客户端镜像：

```yaml title="values-mycluster.yaml"
secret:
  name: "<name>"
  metaurl: "<meta-url>"
  storage: "<storage-type>"
  accessKey: "<access-key>"
  secretKey: "<secret-key>"
  bucket: "<bucket>"
```

:::tip
别忘了把上方的 `values-mycluster.yaml` 纳入 Git 项目（或者其他的源码管理方式）管理起来，这样一来，就算 values 的配置不断变化，也能对其进行追溯和回滚。
:::

填写完毕保存，就可以使用下方命令部署了：

```shell
# 不论是初次安装，还是后续调整配置重新上线，都可以使用下方命令
helm upgrade --install -f values-mycluster.yaml s3-gateway juicefs/juicefs-s3-gateway
```

部署完毕以后，按照输出文本的提示，获取 Kubernetes Service 的地址，并测试是否可以正常访问。

```shell
$ kubectl -n kube-system get svc -l app.kubernetes.io/name=juicefs-s3-gateway
NAME                 TYPE        CLUSTER-IP      EXTERNAL-IP   PORT(S)    AGE
juicefs-s3-gateway   ClusterIP   10.101.108.42   <none>        9000/TCP   142m
```

部署完成后，会启动一个名为 `juicefs-s3-gateway` 的 Deploy。用下方命令查看部署的 Pod：

```sh
$ kubectl -n kube-system get po -l app.kubernetes.io/name=juicefs-s3-gateway
NAME                                  READY   STATUS    RESTARTS   AGE
juicefs-s3-gateway-5c69d574cc-t92b6   1/1     Running   0          136m
```

## 高级功能

JuiceFS S3 网关的核心功能是对外提供 S3 接口，目前对 S3 协议的支持已经比较完善。在 v1.2 版本中，我们又添加了对身份和访问控制（IAM）和桶事件通知的支持。

这些高级功能要求 mc 客户端的版本为 `RELEASE.2021-04-22T17-40-00Z`，使用方法可以参考当时 MinIO [相关文档](https://github.com/minio/minio/tree/e0d3a8c1f4e52bb4a7d82f7f369b6796103740b3/docs)，也可以直接参考 mc 的命令行帮助信息。如果你不知道有哪些功能或者不知道某个功能如何使用，你可以直接在子命令后加 `-h` 查看帮助说明。

### 身份和访问控制

#### 普通用户

在 v1.2 版本之前，`juicefs gateway` 只有在启动时创建一个超级用户，这个超级用户只属于这个进程，即使多个 gateway 的背后是同一个文件系统，其用户也都是进程间隔离的（你可以为每个 gateway 进程设置不同的超级用户，他们相互独立，互不影响）。

在 v1.2 版本之后，`juicefs gateway` 启动时仍需要设置超级用户，该超级用户仍旧是进程隔离的，但是允许使用 `mc admin user add` 添加新的用户，新添加的用户将是同文件系统共享的。可以使用 `mc admin user` 进行管理，支持添加，关闭，启用，删除用户，也支持查看所有用户以及展示用户信息和查看用户的策略。

```Shell
$ mc admin user -h
NAME:
  mc admin user - manage users

USAGE:
  mc admin user COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add      add a new user
  disable  disable user
  enable   enable user
  remove   remove user
  list     list all users
  info     display info of a user
  policy   export user policies in JSON format
  svcacct  manage service accounts
```

例如，添加用户：

```Shell
# 添加新用户
$ mc admin user add myjfs user1 admin123

# 查看当前用户
$ mc admin user list myjfs
enabled    user1

# 查看当前用户
$ mc admin user list myjfs --json
{
 "status": "success",
 "accessKey": "user1",
 "userStatus": "enabled"
}
```

#### 服务账户

服务账户（service accounts）的作用是为现有用户创建一个相同权限的副本，让不同的应用可以使用独立的访问密钥。服务账户的权限继承自父用户，可以通过 `mc admin user svcacct` 命令管理。

```
$ mc admin user svcacct -h
NAME:
  mc admin user svcacct - manage service accounts

USAGE:
  mc admin user svcacct COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add      add a new service account
  ls       List services accounts
  rm       Remove a service account
  info     Get a service account info
  set      edit an existing service account
  enable   Enable a service account
  disable  Disable a services account
```

:::tip 提示
服务账户会从主账户继承权限并保持与主账户权限一致，而且服务账户不可以直接附加权限策略。
:::

比如，现在有一个名为 `user1` 的用户，通过以下命令为它创建一个名为 `svcacct1` 的服务账户：

```Shell
mc admin user svcacct add myjfs user1 --access-key svcacct1 --secret-key 123456abc
```

如果 `user1` 用户为只读权限，那么 `svcacct1` 也是只读权限。如果想让 `svcacct1` 拥有其他权限，则需要调整 `user1` 的权限。

#### AssumeRole 安全令牌服务

S3 网关安全令牌服务（STS）是一种服务，可让客户端请求 MinIO 资源的临时凭证。临时凭证的工作原理与默认管理员凭证几乎相同，但有一些不同之处：

- **临时凭据顾名思义是短期的。**它们可以配置为持续几分钟到几小时不等。证书过期后，S3 网关将不再识别它们，也不允许使用它们进行任何形式的 API 请求访问。

- **临时凭据不需要与应用程序一起存储，而是动态生成并在请求时提供给应用程序。**当（甚至在）临时凭据过期时，应用程序可以请求新的凭据。

`AssumeRole` 会返回一组临时安全凭证，您可以使用这些凭证访问网关资源。`AssumeRole` 需要现有网关用户的授权凭据，返回的临时安全凭证包括访问密钥、秘密密钥和安全令牌。应用程序可以使用这些临时安全凭证对网关 API 操作进行签名调用，应用于这些临时凭据的策略略继承自网关用户凭据。

默认情况下，`AssumeRole` 创建的临时安全凭证有效期为一个小时，可以通过可选参数 DurationSeconds 指定凭据的有效期，该值范围从 900（15 分钟）到 604800（7 天）。

##### API 请求参数

1. Version

   指示 STS API 版本信息，唯一支持的值是 '2011-06-15'。出于兼容性原因，此值借用自 AWS STS API 文档。

   | Params  | Value  |
   | ------- | ------ |
   | Type    | String |
   | Require | Yes    |

2. AUTHPARAMS

   指示 STS API 授权信息。如果您熟悉 AWS Signature V4 授权头部，此 STS API 支持如[此处](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html)所述的签名 V4 授权。

3. DurationSeconds

   持续时间，以秒为单位。该值可以在 900 秒（15 分钟）至 7 天之间变化。如果值高于此设置，则操作失败。默认情况下，该值设置为 3600 秒。

   | Params      | Value                           |
   | ----------- | ------------------------------- |
   | _Type_      | Integer                         |
   | Valid Range | 最小值为 900，最大值为 604800。 |
   | Required    | No                              |

4. Policy

   您希望将其用作内联会话策略的 JSON 格式的 IAM 策略。此参数是可选的。将策略传递给此操作会返回新的临时凭证。生成会话的权限是预设策略名称和此处设置的策略集合的交集。您不能使用该策略授予比被假定预设策略名称允许的更多权限。

   | Params      | Value                           |
   | ----------- | ------------------------------- |
   | Type        | String                          |
   | Valid Range | 最小长度为 1。最大长度为 2048。 |
   | Required    | No                              |

##### 响应元素

此 API 的 XML 响应类似于 [AWS STS AssumeRole](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html#API_AssumeRole_ResponseElements)

##### 错误

此 API 的 XML 错误响应类似于 [AWS STS AssumeRole](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html#API_AssumeRole_Errors)

##### `POST`请求示例

```
http://minio:9000/?Action=AssumeRole&DurationSeconds=3600&Version=2011-06-15&Policy={"Version":"2012-10-17","Statement":[{"Sid":"Stmt1","Effect":"Allow","Action":"s3:*","Resource":"arn:aws:s3:::*"}]}&AUTHPARAMS
```

##### 响应示例

```
<?xml version="1.0" encoding="UTF-8"?>
<AssumeRoleResponse xmlns="https://sts.amazonaws.com/doc/2011-06-15/">
  <AssumeRoleResult>
    <AssumedRoleUser>
      <Arn/>
      <AssumeRoleId/>
    </AssumedRoleUser>
    <Credentials>
      <AccessKeyId>Y4RJU1RNFGK48LGO9I2S</AccessKeyId>
      <SecretAccessKey>sYLRKS1Z7hSjluf6gEbb9066hnx315wHTiACPAjg</SecretAccessKey>
      <Expiration>2019-08-08T20:26:12Z</Expiration>
      <SessionToken>eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJZNFJKVTFSTkZHSzQ4TEdPOUkyUyIsImF1ZCI6IlBvRWdYUDZ1Vk80NUlzRU5SbmdEWGo1QXU1WWEiLCJhenAiOiJQb0VnWFA2dVZPNDVJc0VOUm5nRFhqNUF1NVlhIiwiZXhwIjoxNTQxODExMDcxLCJpYXQiOjE1NDE4MDc0NzEsImlzcyI6Imh0dHBzOi8vbG9jYWxob3N0Ojk0NDMvb2F1dGgyL3Rva2VuIiwianRpIjoiYTBiMjc2MjktZWUxYS00M2JmLTg3MzktZjMzNzRhNGNkYmMwIn0.ewHqKVFTaP-j_kgZrcOEKroNUjk10GEp8bqQjxBbYVovV0nHO985VnRESFbcT6XMDDKHZiWqN2vi_ETX_u3Q-w</SessionToken>
    </Credentials>
  </AssumeRoleResult>
  <ResponseMetadata>
    <RequestId>c6104cbe-af31-11e0-8154-cbc7ccf896c7</RequestId>
  </ResponseMetadata>
</AssumeRoleResponse>
```

##### AWS cli 使用 AssumeRole API

1. 启动 S3 网关并创建名为 foobar 的用户
2. 配置 AWS cli

    ```
    [foobar]
    region = us-east-1
    aws_access_key_id = foobar
    aws_secret_access_key = foo12345
    ```

3. 使用 AWS cli 请求 AssumeRole API

    :::note 注意
    在以下命令中，“--role-arn”和“--role-session-name”对 S3 网关没有意义，可以设置为满足命令行要求的任何值。
    :::

    ```sh
    $ aws --profile foobar --endpoint-url http://localhost:9000 sts assume-role --policy '{"Version":"2012-10-17","Statement":[{"Sid":"Stmt1","Effect":"Allow","Action":"s3:*","Resource":"arn:aws:s3:::*"}]}' --role-arn arn:xxx:xxx:xxx:xxxx --role-session-name anything
    {
        "AssumedRoleUser": {
            "Arn": ""
        },
        "Credentials": {
            "SecretAccessKey": "xbnWUoNKgFxi+uv3RI9UgqP3tULQMdI+Hj+4psd4",
            "SessionToken": "eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3NLZXkiOiJLOURUSU1VVlpYRVhKTDNBVFVPWSIsImV4cCI6MzYwMDAwMDAwMDAwMCwicG9saWN5IjoidGVzdCJ9.PetK5wWUcnCJkMYv6TEs7HqlA4x_vViykQ8b2T_6hapFGJTO34sfTwqBnHF6lAiWxRoZXco11B0R7y58WAsrQw",
            "Expiration": "2019-02-20T19:56:59-08:00",
            "AccessKeyId": "K9DTIMUVZXEXJL3ATUOY"
        }
    }
    ```

##### go 应用程序访问 AssumeRole API

请参考 MinIO 官方[示例程序](https://github.com/minio/minio/blob/master/docs/sts/assume-role.go)

:::note 注意
环境变量设置的超级用户无法使用 AssumeRole API，只有通过 `mc admin user add` 添加的用户才能使用 AssumeRole API。
:::

#### 权限管理

默认新创建的用户是没有任何权限的，需要使用 `mc admin policy` 为其赋权后才可使用。该命令支持权限的增删改查以及为用户添加删除更新权限。

```Shell
$ mc admin policy -h
NAME:
  mc admin policy - manage policies defined in the MinIO server

USAGE:
  mc admin policy COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add     add new policy
  remove  remove policy
  list    list all policies
  info    show info on a policy
  set     set IAM policy on a user or group
  unset   unset an IAM policy for a user or group
  update  Attach new IAM policy to a user or group
```

S3 网关内置了以下 4 种常用的策略：

- **readonly**：只读用户
- **readwrite**：可读写用户
- **writeonly**：只写用户
- **consoleAdmin**：可读可写可管理，可管理指可以调用管理 API，比如创建用户等等。

例如，设置某个用户为只读：

```Shell
# 设置 user1 为只读
$ mc admin policy set myjfs readonly user=user1

# 查看用户策略
$ mc admin user list myjfs
enabled    user1                 readonly
```

以上是简单的策略，如需设置自定义的策略，可以使用 `mc admin policy add`。

```Shell
$ mc admin policy add -h
NAME:
  mc admin policy add - add new policy

USAGE:
  mc admin policy add TARGET POLICYNAME POLICYFILE

POLICYNAME:
  Name of the canned policy on MinIO server.

POLICYFILE:
  Name of the policy file associated with the policy name.

EXAMPLES:
  1. Add a new canned policy 'writeonly'.
     $ mc admin policy add myjfs writeonly /tmp/writeonly.json
```

这里要添加的策略文件必须是一个 JSON 格式的文件，具有[IAM 兼容](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies.html)的语法，且不超过 2048 个字符。该语法可以实现更为精细化的访问控制，如果不熟悉，可以先用下面的命令查看内置的简单策略并在此基础上加以更改。

```Shell
$ mc admin policy info myjfs readonly
{
 "Version": "2012-10-17",
 "Statement": [
  {
   "Effect": "Allow",
   "Action": [
    "s3:GetBucketLocation",
    "s3:GetObject"
   ],
   "Resource": [
    "arn:aws:s3:::*"
   ]
  }
 ]
}
```

#### 用户组管理

JuiceFS S3 Gateway 支持创建用户组，类似于 Linux 用户组的概念，使用 `mc admin group` 管理。你可以把一个或者多个用户设置为一个组，然后为组统一赋权，该用法与用户管理类似，此处不再赘述。

```Shell
$ mc admin  group -h
NAME:
  mc admin group - manage groups

USAGE:
  mc admin group COMMAND [COMMAND FLAGS | -h] [ARGUMENTS...]

COMMANDS:
  add      add users to a new or existing group
  remove   remove group or members from a group
  info     display group info
  list     display list of groups
  enable   enable a group
  disable  disable a group
```

#### 匿名访问管理

以上是针对有用户记录的管理，如果希望特定的对象或桶可以被任何人访问，可以使用 `mc policy` 命令配置匿名访问策略。

```Shell
Name:
  mc policy - manage anonymous access to buckets and objects

USAGE:
  mc policy [FLAGS] set PERMISSION TARGET
  mc policy [FLAGS] set-json FILE TARGET
  mc policy [FLAGS] get TARGET
  mc policy [FLAGS] get-json TARGET
  mc policy [FLAGS] list TARGET

PERMISSION:
  Allowed policies are: [none, download, upload, public].

FILE:
  A valid S3 policy JSON filepath.

EXAMPLES:
  1. Set bucket to "download" on Amazon S3 cloud storage.
     $ mc policy set download s3/burningman2011

  2. Set bucket to "public" on Amazon S3 cloud storage.
     $ mc policy set public s3/shared

  3. Set bucket to "upload" on Amazon S3 cloud storage.
     $ mc policy set upload s3/incoming

  4. Set policy to "public" for bucket with prefix on Amazon S3 cloud storage.
     $ mc policy set public s3/public-commons/images

  5. Set a custom prefix based bucket policy on Amazon S3 cloud storage using a JSON file.
     $ mc policy set-json /path/to/policy.json s3/public-commons/images

  6. Get bucket permissions.
     $ mc policy get s3/shared

  7. Get bucket permissions in JSON format.
     $ mc policy get-json s3/shared

  8. List policies set to a specified bucket.
     $ mc policy list s3/shared

  9. List public object URLs recursively.
     $ mc policy --recursive links s3/shared/
```

S3 网关默认内置了 4 种匿名权限

- **none**：不允许匿名访问（一般用来清除已有的权限）
- **download**：允许任何人读取
- **upload**：允许任何人写入
- **public**：允许任何人读写

例如，设置一个允许匿名下载对象：

```
# 设置 testbucket1/afile 为匿名访问
mc policy set download useradmin/testbucket1/afile

# 查看具体权限
mc policy get-json useradmin/testbucket1/afile

$ mc policy --recursive links useradmin/testbucket1/
http://127.0.0.1:9001/testbucket1/afile

# 直接下载该对象
wget http://127.0.0.1:9001/testbucket1/afile

# 清除 afile 的 download 权限
mc policy set none  useradmin/testbucket1/afile
```

#### 配置生效时间

JuiceFS S3 网关的所有管理 API 的更新操作都会立即生效并且持久化到 JuiceFS 文件系统中，而且接受该 API 请求的客户端也会立即生效。但是，当 S3 网关多机运行时，情况会有所不同，因为 S3 网关在处理请求鉴权时会直接采用内存缓存信息作为校验基准，避免每次请求都读取配置文件内容作为校验基准将带来不可接受的性能问题。

目前 JuiceFS S3 网关的缓存刷新策略是每 5 分钟强制更新内存缓存（部分操作也会触发缓存更新操作），这样保证多机情况下配置生效最长不会超过 5 分钟，可以通过 `--refresh-iam-interval` 参数来调整该时间。如果希望某个 S3 网关立即生效，可以尝试手动将其重启。

### 生成预签名 URL

JuiceFS S3 网关支持使用 `mc share` 命令来管理 MinIO 存储桶上对象的预签名 URL，用于下载和上传对象。

`mc share` 使用详情请参考 [这里](https://minio.org.cn/docs/minio/linux/reference/minio-mc/mc-share.html#)

### 桶事件通知

桶事件通知功能可以用来监视存储桶中对象上发生的事件，从而触发一些行为。

目前支持的对象事件类型有：

- `s3:ObjectCreated:Put`
- `s3:ObjectCreated:CompleteMultipartUpload`
- `s3:ObjectAccessed:Head`
- `s3:ObjectCreated:Post`
- `s3:ObjectRemoved:Delete`
- `s3:ObjectCreated:Copy`
- `s3:ObjectAccessed:Get`

支持的全局事件有：

- `s3:BucketCreated`
- `s3:BucketRemoved`

可以使用 mc 客户端工具通过 event 子命令设置和监听事件通知。MinIO 发送的用于发布事件的通知消息是 JSON 格式的，JSON 结构参考[这里](https://docs.aws.amazon.com/AmazonS3/latest/dev/notification-content-structure.html)。

JuiceFS S3 网关为了减少依赖，裁剪了部分支持的事件目标类型。目前存储桶事件可以支持发布到以下目标：

- Redis
- MySQL
- PostgreSQL
- WebHooks

```Shell
$ mc admin config get myjfs | grep notify
notify_webhook        publish bucket notifications to webhook endpoints
notify_mysql          publish bucket notifications to MySQL databases
notify_postgres       publish bucket notifications to Postgres databases
notify_redis          publish bucket notifications to Redis datastores
```

:::note
这里假设 JuiceFS 文件系统名为 `images`，启用 S3 Gateway 服务，在 mc 中定义它的别名为 `myjfs`。对于 S3 Gateway 而言，JuiceFS 文件系统名 `images` 就是一个存储桶名。
:::

#### 使用 Redis 发布事件

Redis 事件目标支持两种格式：`namespace` 和 `access`。

如果用的是 `namespacee` 格式，S3 网关将存储桶里的对象同步成 Redis hash 中的条目。对于每一个条目，对应一个存储桶里的对象，其 key 都被设为"存储桶名称/对象名称"，value 都是一个有关这个网关对象的 JSON 格式的事件数据。如果对象更新或者删除，hash 中对象的条目也会相应的更新或者删除。

如果使用的是 access , 网关使用[RPUSH](https://redis.io/commands/rpush)将事件添加到 list 中。这个 list 中每一个元素都是一个 JSON 格式的 list，这个 list 中又有两个元素，第一个元素是时间戳的字符串，第二个元素是一个含有在这个存储桶上进行操作的事件数据的 JSON 对象。在这种格式下，list 中的元素不会更新或者删除。

下面的步骤展示如何在 namespace 和 access 格式下使用通知目标。

1. 配置 Redis 到 S3 网关

   使用 mc admin config set 命令配置 Redis 为 事件通知的目标

    ```Shell
    # 命令行参数
    # mc admin config set myjfs notify_redis[:name] address="xxx" format="namespace|access" key="xxxx" password="xxxx" queue_dir="" queue_limit="0"
    # 具体举例
    $ mc admin config set myjfs notify_redis:1 address="127.0.0.1:6379/1" format="namespace" key="bucketevents" password="yoursecret" queue_dir="" queue_limit="0"
    ```

   你可以通过 `mc admin config get myjfs notify_redis` 来查看有哪些配置项，不同类型的目标其配置项也不同，针对 Redis 类型，其有以下配置项：

    ```Shell
    $ mc admin config get myjfs notify_redis
    notify_redis enable=off format=namespace address= key= password= queue_dir= queue_limit=0
    ```

   每个配置项的含义

    ```Shell
    notify_redis[:name]               支持设置多个 redis，只需要其 name 不同即可
    address*     (address)            Redis 服务器的地址。例如：localhost:6379
    key*         (string)             存储/更新事件的 Redis key, key 会自动创建
    format*      (namespace*|access)  是 namespace 还是 access，默认是 'namespace'
    password     (string)             Redis 服务器的密码
    queue_dir    (path)               未发送消息的暂存目录 例如 '/home/events'
    queue_limit  (number)             未发送消息的最大限制，默认是'100000'
    comment      (sentence)           可选的注释说明
    ```

   S3 网关支持持久事件存储。持久存储将在 Redis broker 离线时备份事件，并在 broker 恢复在线时重播事件。事件存储的目录可以通过 queue_dir 字段设置，存储的最大限制可以通过 queue_limit 设置。例如，queue_dir 可以设置为/home/events, 并且 queue_limit 可以设置为 1000. 默认情况下 queue_limit 是 100000。在更新配置前，可以通过 mc admin config get 命令获取当前配置。

    ```Shell
    $ mc admin config get myjfs notify_redis
    notify_redis:1 address="127.0.0.1:6379/1" format="namespace" key="bucketevents" password="yoursecret" queue_dir="" queue_limit="0"

    # 重启后生效
    $ mc admin config set myjfs notify_redis:1 queue_limit="1000"
    Successfully applied new settings.
    Please restart your server 'mc admin service restart myjfs'.
    # 注意这里无法使用 mc admin service restart myjfs 重启，JuiceFS S3 网关暂不支持该功能，当使用 mc 配置后出现该提醒时需要手动重启 JuiceFS Gateway
    ```

   使用 mc admin config set 命令更新配置后，重启 JuiceFS S3 网关让配置生效。如果一切顺利，JuiceFS S3 网关会在启动时输出一行信息，类似 `SQS ARNs: arn:minio:sqs::1:redis`

   根据你的需要，你可以添加任意多个 Redis 目标，只要提供 Redis 实例的标识符（如上例“notify_redis:1”中的“1”）和每个实例配置参数的信息即可。

2. 启用 bucket 通知

   我们现在可以在一个叫 images 的存储桶上开启事件通知。当一个 JPEG 文件被创建或者覆盖，一个新的 key 会被创建，或者一个已经存在的 key 就会被更新到之前配置好的 Redis hash 里。如果一个已经存在的对象被删除，这个对应的 key 也会从 hash 中删除。因此，这个 Redis hash 里的行，就映射着 images 存储桶里的.jpg 对象。

   要配置这种存储桶通知，我们需要用到前面步骤 S3 网关输出的 ARN 信息。更多有关 ARN 的资料，请参考[这里](http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html)。

   使用 mc 为文件系统启用事件通知：

    ```Shell
    mc event add myjfs/images arn:minio:sqs::1:redis --suffix .jpg
    mc event list myjfs/images
    arn:minio:sqs::1:redis   s3:ObjectCreated:*,s3:ObjectRemoved:*,s3:ObjectAccessed:*   Filter: suffix=".jpg"
    ```

3. 验证 Redis

   启动 `redis-cli` 这个 Redis 客户端程序来检查 Redis 中的内容。运行 monitor Redis 命令将会输出在 Redis 上执行的每个命令的。

    ```Shell
    redis-cli -a yoursecret
    127.0.0.1:6379> monitor
    OK
    ```

   上传一个名为 myphoto.jpg 的文件到 images 存储桶。

    ```Shell
    mc cp myphoto.jpg myjfs/images
    ```

   在上一个终端中，你将看到 S3 网关在 Redis 上执行的操作：

    ```Shell
    127.0.0.1:6379> monitor
    OK
    1712562516.867831 [1 192.168.65.1:59280] "hset" "bucketevents" "images/myphoto.jpg" "{\"Records\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"minio:s3\",\"awsRegion\":\"\",\"eventTime\":\"2024-04-08T07:48:36.865Z\",\"eventName\":\"s3:ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"admin\"},\"requestParameters\":{\"principalId\":\"admin\",\"region\":\"\",\"sourceIPAddress\":\"127.0.0.1\"},\"responseElements\":{\"content-length\":\"0\",\"x-amz-request-id\":\"17C43E891887BA48\",\"x-minio-origin-endpoint\":\"http://127.0.0.1:9001\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"Config\",\"bucket\":{\"name\":\"images\",\"ownerIdentity\":{\"principalId\":\"admin\"},\"arn\":\"arn:aws:s3:::images\"},\"object\":{\"key\":\"myphoto.jpg\",\"size\":4,\"eTag\":\"40b134ab8a3dee5dd9760a7805fd495c\",\"userMetadata\":{\"content-type\":\"image/jpeg\"},\"sequencer\":\"17C43E89196AE2A0\"}},\"source\":{\"host\":\"127.0.0.1\",\"port\":\"\",\"userAgent\":\"MinIO (darwin; arm64) minio-go/v7.0.11 mc/RELEASE.2021-04-22T17-40-00Z\"}}]}"
    ```

   在这我们可以看到 S3 网关在 minio_events 这个 key 上执行了 HSET 命令。

   如果用的是 access 格式，那么 minio_events 就是一个 list，S3 网关就会调用 RPUSH 添加到 list 中，在 monitor 命令中将看到：

    ```Shell
    127.0.0.1:6379> monitor
    OK
    1712562751.922469 [1 192.168.65.1:61102] "rpush" "aceesseventskey" "[{\"Event\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"minio:s3\",\"awsRegion\":\"\",\"eventTime\":\"2024-04-08T07:52:31.921Z\",\"eventName\":\"s3:ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"admin\"},\"requestParameters\":{\"principalId\":\"admin\",\"region\":\"\",\"sourceIPAddress\":\"127.0.0.1\"},\"responseElements\":{\"content-length\":\"0\",\"x-amz-request-id\":\"17C43EBFD35A53B8\",\"x-minio-origin-endpoint\":\"http://127.0.0.1:9001\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"Config\",\"bucket\":{\"name\":\"images\",\"ownerIdentity\":{\"principalId\":\"admin\"},\"arn\":\"arn:aws:s3:::images\"},\"object\":{\"key\":\"myphoto.jpg\",\"size\":4,\"eTag\":\"40b134ab8a3dee5dd9760a7805fd495c\",\"userMetadata\":{\"content-type\":\"image/jpeg\"},\"sequencer\":\"17C43EBFD3DACA70\"}},\"source\":{\"host\":\"127.0.0.1\",\"port\":\"\",\"userAgent\":\"MinIO (darwin; arm64) minio-go/v7.0.11 mc/RELEASE.2021-04-22T17-40-00Z\"}}],\"EventTime\":\"2024-04-08T07:52:31.921Z\"}]"
    ```

#### 使用 MySQL 发布事件

MySQL 通知目标支持两种格式：`namespace` 和 `access`。

如果使用的是 `namespace` 格式，S3 网关将存储桶里的对象同步成数据库表中的行。每一行有两列：key_name 和 value。key_name 是这个对象的存储桶名字加上对象名，value 都是一个有关这个 S3 网关对象的 JSON 格式的事件数据。如果对象更新或者删除，表中相应的行也会相应的更新或者删除。

如果使用的是 `access`，S3 网关将将事件添加到表里，行有两列：event_time 和 event_data。event_time 是事件在 S3 网关 server 里发生的时间，event_data 是有关这个 S3 网关对象的 JSON 格式的事件数据。在这种格式下，不会有行会被删除或者修改。

下面的步骤展示的是如何在 `namespace` 格式下使用通知目标，与 `access` 类似，不再赘述。

1. 确保 MySQL 版本至少满足最低要求

   JuiceFS S3 网关要求 MySQL 版本 5.7.8 及以上，因为使用了 MySQL5.7.8 版本才引入的[JSON](https://dev.mysql.com/doc/refman/5.7/en/json.html) 数据类型。

2. 配置 MySQL 到 S3 网关

   使用 `mc admin config set` 命令配置 MySQL 为事件通知的目标

    ```Shell
    mc admin config set myjfs notify_mysql:myinstance table="minio_images" dsn_string="root:123456@tcp(172.17.0.1:3306)/miniodb"
    ```

   你可以通过 `mc admin config get myjfs notify_mysql` 来查看有哪些配置项，不同类型的目标其配置项也不同，针对 MySQL 类型，其有以下配置项：

    ```shell
    $ mc admin config get myjfs notify_mysql
    format=namespace dsn_string= table= queue_dir= queue_limit=0 max_open_connections=2
    ```

   每个配置项的含义

    ```Shell
    KEY:
    notify_mysql[:name]  发布存储桶通知到 MySQL 数据库。当需要多个 MySQL server endpoint 时，可以为每个配置添加用户指定的“name”（例如"notify_mysql:myinstance"）.

    ARGS:
    dsn_string*  (string)             MySQL 数据源名称连接字符串，例如 "<user>:<password>@tcp(<host>:<port>)/<database>"
    table*       (string)             存储/更新事件的数据库表名，表会自动被创建
    format*      (namespace*|access)  'namespace' 或者 'access', 默认是 'namespace'
    queue_dir    (path)               未发送消息的暂存目录 例如 '/home/events'
    queue_limit  (number)             未发送消息的最大限制，默认是 '100000'
    comment      (sentence)           可选的注释说明
    ```

   dsn_string 是必须的，并且格式为 `<user>:<password>@tcp(<host>:<port>)/<database>`

   MinIO 支持持久事件存储。持久存储将在 MySQL 连接离线时备份事件，并在 broker 恢复在线时重播事件。事件存储的目录可以通过 queue_dir 字段设置，存储的最大限制可以通过 queue_limit 设置。例如，queue_dir 可以设置为 /home/events, 并且 queue_limit 可以设置为 1000。默认情况下 queue_limit 是 100000。

   更新配置前，可以使用 `mc admin config get` 命令获取当前配置：

    ```Shell
    $ mc admin config get myjfs/ notify_mysql
    notify_mysql:myinstance enable=off format=namespace host= port= username= password= database= dsn_string= table= queue_dir= queue_limit=0
    ```

   使用带有 dsn_string 参数的 `mc admin config set` 的命令更新 MySQL 的通知配置：

    ```Shell
    mc admin config set myjfs notify_mysql:myinstance table="minio_images" dsn_string="root:xxxx@tcp(127.0.0.1:3306)/miniodb"
    ```

   请注意，根据你的需要，你可以添加任意多个 MySQL server endpoint，只要提供 MySQL 实例的标识符（如上例中的"myinstance"）和每个实例配置参数的信息即可。

   使用`mc admin config set`命令更新配置后，重启 S3 网关让配置生效。如果一切顺利，S3 网关 Server 会在启动时输出一行信息，类似 `SQS ARNs: arn:minio:sqs::myinstance:mysql`

3. 启用 bucket 通知

   我们现在可以在一个叫 images 的存储桶上开启事件通知，一旦上有文件上传到存储桶中，MySQL 中会 insert 一条新的记录或者一条已经存在的记录会被 update，如果一个存在对象被删除，一条对应的记录也会从 MySQL 表中删除。因此，MySQL 表中的行，对应的就是存储桶里的一个对象。

   要配置这种存储桶通知，我们需要用到前面步骤 MinIO 输出的 ARN 信息。更多有关 ARN 的资料，请参考[这里](http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html)。

   假设 S3 网关服务别名叫 myjfs，可执行下列脚本：

    ```Shell
    # 使用 MySQL ARN 在“images”存储桶上添加通知配置。--suffix 参数用于过滤事件。
    mc event add myjfs/images arn:minio:sqs::myinstance:mysql --suffix .jpg
    # 在“images”存储桶上打印出通知配置。
    mc event list myjfs/images
    arn:minio:sqs::myinstance:mysql s3:ObjectCreated:*,s3:ObjectRemoved:*,s3:ObjectAccessed:* Filter: suffix=”.jpg”
    ```

4. 验证 MySQL

   打开一个新的 terminal 终端并上传一张 JPEG 图片到 images 存储桶。

    ```Shell
    mc cp myphoto.jpg myjfs/images
    ```

   打开一个 MySQL 终端列出表 minio_images 中所有的记录，将会发现一条刚插入的记录。

#### 使用 PostgreSQL 发布事件

整体方法与使用 MySQL 发布 MinIO 事件相同，这里不再赘述。

需要注意的是，该功能要求 PostgreSQL 9.5 版本及以上。S3 网关用了 PostgreSQL 9.5 引入的[INSERT ON CONFLICT](https://www.postgresql.org/docs/9.5/static/sql-insert.html#SQL-ON-CONFLICT) (aka UPSERT) 特性，以及 9.4 引入的[JSONB](https://www.postgresql.org/docs/9.4/static/datatype-json.html) 数据类型。

#### 使用 Webhook 发布事件

[Webhooks](https://en.wikipedia.org/wiki/Webhook) 采用推的方式获取数据，而不是一直去拉取。

1. 配置 webhook 到 S3 网关

   S3 网关支持持久事件存储。持久存储将在 webhook 离线时备份事件，并在 broker 恢复在线时重播事件。事件存储的目录可以通过 `queue_dir` 字段设置，存储的最大限制可以通过 `queue_limit` 设置。例如，`/home/events`，并且 `queue_limit` 可以设置为 1000。默认情况下 `queue_limit` 是 100000。

    ```Shell
    KEY:
    notify_webhook[:name]  发布存储桶通知到 webhook endpoints

    ARGS:
    endpoint*    (url)       webhook server endpoint，例如 http://localhost:8080/minio/events
    auth_token   (string)    opaque token 或者 JWT authorization token
    queue_dir    (path)      未发送消息的暂存目录 例如 '/home/events'
    queue_limit  (number)    未发送消息的最大限制，默认是 '100000'
    client_cert  (string)    Webhook 的 mTLS 身份验证的客户端证书
    client_key   (string)    Webhook 的 mTLS 身份验证的客户端证书密钥
    comment      (sentence)  可选的注释说明
    ```

   用 `mc admin config set` 命令更新配置，这里的 endpoint 是监听 webhook 通知的服务地址。保存配置文件并重启 MinIO 服务让配配置生效。注意，在重启 MinIO 时，这个 endpoint 必须是启动并且可访问到。

    ```Shell
    mc admin config set myjfs notify_webhook:1 queue_limit="0"  endpoint="http://localhost:3000" queue_dir=""
    ```

2. 启用 bucket 通知

    在这里，ARN 的值是 `arn:minio:sqs::1:webhook`。更多有关 ARN 的资料，请参考[这里](http://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html)。

    ```Shell
    mc mb myjfs/images-thumbnail
    mc event add myjfs/images arn:minio:sqs::1:webhook --event put --suffix .jpg
    ```

    如果 mc 报告无法创建 Bucket，请检查 S3 Gateway 是否启用了[多桶支持](#多桶支持)。

3. 采用 Thumbnailer 进行验证

   [Thumbnailer](https://github.com/minio/thumbnailer) 项目是一个使用 MinIO 的 listenBucketNotification API 的缩略图生成器示例，我们使用 [Thumbnailer](https://github.com/minio/thumbnailer) 来监听 S3 网关通知。如果有文件上传于是 S3 网关服务，Thumnailer 监听到该通知，生成一个缩略图并上传到 S3 网关服务。安装 Thumbnailer:

    ```Shell
    git clone https://github.com/minio/thumbnailer/
    npm install
    ```

   然后打开 Thumbnailer 的 `config/webhook.json` 配置文件，添加有关 MinIO server 的配置，使用下面的方式启动 Thumbnailer:

    ```Shell
    NODE_ENV=webhook node thumbnail-webhook.js
    ```

   Thumbnailer 运行在 `http://localhost:3000/`

   下一步，配置 MinIO server，让其发送消息到这个 URL（第一步提到的），并使用 mc 来设置存储桶通知（第二步提到的）。然后上传一张图片到 S3 网关 server：

    ```Shell
    mc cp ~/images.jpg myjfs/images
    .../images.jpg:  8.31 KB / 8.31 KB ┃▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓┃ 100.00% 59.42 KB/s 0s
    ```

   稍等片刻，然后使用 mc ls 检查存储桶的内容，你将看到有个缩略图出现了。

    ```Shell
    mc ls myjfs/images-thumbnail
    [2017-02-08 11:39:40 IST]   992B images-thumbnail.jpg
    ```


================================================
FILE: docs/zh_cn/guide/quota.md
================================================
---
title: 存储配额
sidebar_position: 4
---

JuiceFS 同时支持文件系统总配额和子目录配额，均可用于限制可用容量和可用 inode 数量。文件系统配额和目录配额均是硬限制，当文件系统总配额用尽时，后续写入会返回 `ENOSPC`（No space left）错误；而当目录配额用尽时，后续写入会返回 `EDQUOT`（Disk quota exceeded）错误。

:::tip 提示
存储限额设置会保存在元数据引擎中以供所有挂载点读取，每个挂载点的客户端也会缓存自己的已用容量和 inodes 数，周期性地向元数据引擎同步。与此同时，客户端也会周期性地从元数据引擎读取最新的用量值，从而实现用量信息在每个挂载点之间同步，但这种信息同步机制并不能保证用量数据被精确统计，可能会存在十秒级延迟。
:::

## 文件系统配额 {#file-system-quota}

JuiceFS v1.0 支持文件系统级别的存储配额。以 Linux 环境为例，使用系统自带的 `df` 命令可以看到，一个 JuiceFS 类型的文件系统默认的容量标识为 `1.0P` ：

```shell
$ df -Th | grep juicefs
JuiceFS:ujfs   fuse.juicefs  1.0P  682M  1.0P    1% /mnt
```

:::note 说明
JuiceFS 通过 FUSE 实现对 POSIX 接口的支持，因为底层通常是容量能够无限扩展的对象存储，所以标识容量只是一个估值（也代表无限制）并非实际容量，它会随着实际用量动态变化。
:::

通过客户端自带的 `config` 命令可以查看一个文件系统的详细信息：

```shell
$ juicefs config $METAURL
{
  "Name": "ujfs",
  "UUID": "1aa6d290-279b-432f-b9b5-9d7fd597dec2",
  "Storage": "minio",
  "Bucket": "127.0.0.1:9000/jfs1",
  "AccessKey": "herald",
  "SecretKey": "removed",
  "BlockSize": 4096,
  "Compression": "none",
  "Shards": 0,
  "Partitions": 0,
  "Capacity": 0,
  "Inodes": 0,
  "TrashDays": 0
}
```

### 限制总容量 {#limit-total-capacity}

可以在创建文件系统时通过 `--capacity` 设置容量限额，单位 GiB，例如创建一个可用容量为 100 GiB 文件系统的：

```shell
juicefs format --storage minio \
    --bucket 127.0.0.1:9000/jfs1 \
    ... \
    --capacity 100 \
    $METAURL myjfs
```

也可以通过 `config` 命令，为一个已创建的文件系统设置容量限额：

```shell
$ juicefs config $METAURL --capacity 100
2022/01/27 12:31:39.506322 juicefs[16259] <INFO>: Meta address: postgres://herald@127.0.0.1:5432/jfs1
2022/01/27 12:31:39.521232 juicefs[16259] <WARNING>: The latency to database is too high: 14.771783ms
  capacity: 0 GiB -> 100 GiB
```

设置了存储限额的文件系统，标识容量会变成限制容量：

```shell
$ df -Th | grep juicefs
JuiceFS:ujfs   fuse.juicefs  100G  682M  100G    1% /mnt
```

### 限制 inode 总量 {#limit-total-number-of-inodes}

在 Linux 系统中，每个文件（文件夹也是文件的一种）不论大小都有一个 inode，因此限制 inode 数量等同于限制文件数量。

可以在创建文件系统时通过 `--inodes` 设置限额，例如：

```shell
juicefs format --storage minio \
    --bucket 127.0.0.1:9000/jfs1 \
    ... \
    --inodes 100 \
    $METAURL myjfs
```

以上命令创建的文件系统仅允许存储 100 个文件，但不限制单个文件的大小，比如单个文件 1TB 甚至更大也没有问题，只要文件总数不超过 100 个即可。

也可以通过 `config` 命令，为一个已创建的文件系统设置容量限额：

```shell
$ juicefs config $METAURL --inodes 100
2022/01/27 12:35:37.311465 juicefs[16407] <INFO>: Meta address: postgres://herald@127.0.0.1:5432/jfs1
2022/01/27 12:35:37.322991 juicefs[16407] <WARNING>: The latency to database is too high: 11.413961ms
    inodes: 0 -> 100
```

### 组合使用 {#limit-total-capacity-and-inodes}

你可以结合 `--capacity` 和 `--inodes` 更灵活的设置文件系统的容量限额，比如，创建一个文件系统，限制总容量为 100TiB 且仅允许存储 100000 文件：

```shell
juicefs format --storage minio \
    --bucket 127.0.0.1:9000/jfs1 \
    ... \
    --capacity 102400 \
    --inodes 100000 \
    $METAURL myjfs
```

同样地，对于已创建的文件系统，可分别进行设置：

```shell
juicefs config $METAURL --capacity 102400
```

```shell
juicefs config $METAURL --inodes 100000
```

:::tip 提示
客户端会定期从元数据引擎读取最新的文件系统存储限额设置来更新本地的设置。刷新间隔由 `--heartbeat` 参数控制（默认值：12 秒）。其他挂载点可能需要等待最多一个 heartbeat 间隔时间才能完成限额设置的更新。
:::

## 目录配额 {#directory-quota}

JuiceFS v1.1 开始支持目录级别的存储配额，可以使用 `juicefs quota` 子命令进行目录配额管理和查询。

:::tip 提示
由于用量统计需要挂载客户端支持，请确保除所有可写入客户端已升级到 v1.1.0 以上版本再使用此特性。
:::

### 限制目录容量 {#limit-directory-capacity}

可以使用 `juicefs quota set $METAURL --path $DIR --capacity $N` 设置目录容量限额，单位 GiB。例如给目录`/test`设置 1GiB 的容量配额：

```shell
$ juicefs quota set $METAURL --path /test --capacity 1
+-------+---------+---------+------+-----------+-------+-------+
|  Path |   Size  |   Used  | Use% |   Inodes  | IUsed | IUse% |
+-------+---------+---------+------+-----------+-------+-------+
| /test | 1.0 GiB | 1.6 MiB |   0% | unlimited |   314 |       |
+-------+---------+---------+------+-----------+-------+-------+
```

设置成功后你可以看到有一个表格描述当前设置配额的目录、配额大小、当前用量等信息。

:::tip 提示
`quota` 子命令的使用无需本地挂载点，期望输入的目录路径为相对 JuiceFS 根目录的路径而非本地挂载路径。给大目录设置配额可能需要等待较长时间，因为需要计算目录当前用量。
:::

如果需要查询某个目录的配额和当前用量，可以使用 `juicefs quota get $METAURL --path $DIR` 命令：

```shell
$ juicefs quota get $METAURL --path /test
+-------+---------+---------+------+-----------+-------+-------+
|  Path |   Size  |   Used  | Use% |   Inodes  | IUsed | IUse% |
+-------+---------+---------+------+-----------+-------+-------+
| /test | 1.0 GiB | 1.6 MiB |   0% | unlimited |   314 |       |
+-------+---------+---------+------+-----------+-------+-------+
```

也可以使用 `juicefs quota ls $METAURL` 命令列出所有的目录配额。

### 限制目录的 inode 总量 {#limit-total-number-of-directory-inodes}

可以使用 `juicefs quota set $METAURL --path $DIR --inodes $N` 设置目录 inode 限额，单位为个。例如给目录`/test`设置 400 个 inode 的配额：

```shell
$ juicefs quota set $METAURL --path /test --inodes 400
+-------+---------+---------+------+--------+-------+-------+
|  Path |   Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+---------+---------+------+--------+-------+-------+
| /test | 1.0 GiB | 1.6 MiB |   0% |    400 |   314 |   78% |
+-------+---------+---------+------+--------+-------+-------+
```

### 组合使用 {#limit-capacity-and-inodes-of-directory}

可以结合 `--capacity` 和 `--inodes` 更灵活地设置目录的容量限额。比如，给`/test`目录设置 10GiB 和 1000 个 inode 的配额：

```shell
$ juicefs quota set $METAURL --path /test --capacity 10 --inodes 1000
+-------+--------+---------+------+--------+-------+-------+
|  Path |  Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+--------+---------+------+--------+-------+-------+
| /test | 10 GiB | 1.6 MiB |   0% |  1,000 |   314 |   31% |
+-------+--------+---------+------+--------+-------+-------+
```

另外，你也可以不限制目录的容量和 inode 数（设为 `0` 表示不限制），只通过 `quota` 命令统计目录的当前用量：

```shell
$ juicefs quota set $METAURL --path /test --capacity 0 --inodes 0
+-------+-----------+---------+------+-----------+-------+-------+
|  Path |    Size   |   Used  | Use% |   Inodes  | IUsed | IUse% |
+-------+-----------+---------+------+-----------+-------+-------+
| /test | unlimited | 1.6 MiB |      | unlimited |   314 |       |
+-------+-----------+---------+------+-----------+-------+-------+
```

### 配额嵌套 {#nested-quota}

JuiceFS 允许自由地设置各级目录配额，实际使用的时候会递归地向上查询，确保当前目录用量满足每一级目录的配额设置。也就是说，就算父目录设置了一个较小的配额，也不影响子目录可以设置更大配额。

### 子目录挂载 {#subdirectory-mount}

JuiceFS 支持使用 [`--subdir`](../reference/command_reference.mdx#mount-metadata-options) 挂载任意子目录。如果挂载的子目录设置了目录配额，则可以使用系统自带的 `df` 命令查看目录配额和当前使用量。比如文件系统配额为 1PiB 和 10M 个 inode，而 `/test` 目录的配额为 1GiB 和 400 个 inode。使用根目录挂载时 `df` 命令的输出为：

```shell
$ df -h
Filesystem      Size  Used Avail Use% Mounted on
...
JuiceFS:myjfs   1.0P  1.6M  1.0P   1% /mnt/jfs

$ df -i -h
Filesystem     Inodes IUsed IFree IUse% Mounted on
...
JuiceFS:myjfs     11M   315   10M    1% /mnt/jfs
```

而使用 `/test` 子目录挂载时，`df` 命令的输出为：

```shell
$ df -h
Filesystem      Size  Used Avail Use% Mounted on
...
JuiceFS:myjfs   1.0G  1.6M 1023M   1% /mnt/jfs

$ df -i -h
Filesystem     Inodes IUsed IFree IUse% Mounted on
...
JuiceFS:myjfs     400   314    86   79% /mnt/jfs
```

:::note 说明
当挂载的子目录没有设置配额，JuiceFS 会逐级往上查询知道找到最近的目录配额再返回给 `df`。如果有多级父目录均设置目录配额，JuiceFS 会在计算后返回最小的可用容量和 inode 数量。
:::

### 用量检查与修复 {#usage-check-and-fix}

由于目录用量的更新是滞后且异步的，在异常情况下可能会发生丢失（比如客户端意外退出）。我们可以使用 `juicefs quota check $METAURL --path $DIR` 命令进行检查或修复：

```shell
$ juicefs quota check $METAURL --path /test
2023/05/23 15:40:12.704576 juicefs[1638846] <INFO>: quota of /test is consistent [base.go:839]
+-------+--------+---------+------+--------+-------+-------+
|  Path |  Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+--------+---------+------+--------+-------+-------+
| /test | 10 GiB | 1.6 MiB |   0% |  1,000 |   314 |   31% |
+-------+--------+---------+------+--------+-------+-------+
```

目录用量正确时会输出当前的目录配额用量；失败时候则会输出错误日志：

```shell
$ juicefs quota check $METAURL --path /test
2023/05/23 15:48:17.494604 juicefs[1639997] <WARNING>: /test: quota(314, 4.0 KiB) != summary(314, 1.6 MiB) [base.go:843]
2023/05/23 15:48:17.494644 juicefs[1639997] <FATAL>: quota of /test is inconsistent, please repair it with --repair flag [main.go:31]
```

这时你可以使用 `--repair` 选项来修复目录用量：

```shell
$ juicefs quota check $METAURL --path /test --repair
2023/05/23 15:50:08.737086 juicefs[1640281] <WARNING>: /test: quota(314, 4.0 KiB) != summary(314, 1.6 MiB) [base.go:843]
2023/05/23 15:50:08.737123 juicefs[1640281] <INFO>: repairing... [base.go:852]
+-------+--------+---------+------+--------+-------+-------+
|  Path |  Size  |   Used  | Use% | Inodes | IUsed | IUse% |
+-------+--------+---------+------+--------+-------+-------+
| /test | 10 GiB | 1.6 MiB |   0% |  1,000 |   314 |   31% |
+-------+--------+---------+------+--------+-------+-------+
```


================================================
FILE: docs/zh_cn/guide/sync.md
================================================
---
title: 数据同步
sidebar_position: 7
description: 了解如何使用 JuiceFS 中的数据同步工具。
---

[`juicefs sync`](../reference/command_reference.mdx#sync) 是强大的数据同步工具，可以在所有支持的存储之间并发同步或迁移数据，包括对象存储、JuiceFS、本地文件系统，你可以在这三者之间以任意方向和搭配进行数据同步。除此之外，还支持同步通过 SSH 访问远程目录、HDFS、WebDAV 等，同时提供增量同步、模式匹配（类似 rsync）、分布式同步等高级功能。

:::tip 混用社区版和企业版客户端
`juicefs sync` 功能的代码在社区版和企业版之间共享代码，因此即便交叉混用不同版本的 JuiceFS 客户端，`sync` 命令也能正常工作——除了一个特例，就是使用 [`jfs://`](#sync-without-mount-point) 协议头的情况。社区版和企业版客户端有着不同的元数据引擎实现，因此如果用到了 `jfs://` 协议头，则不能混用不同版本的客户端。
:::

`juicefs sync` 用法以及常见示范如下：

```shell
juicefs sync [command options] SRC DST

# 从 OSS 同步到 S3
juicefs sync oss://mybucket.oss-cn-shanghai.aliyuncs.com s3://mybucket.s3.us-east-2.amazonaws.com

# 拷贝所有以 .gz 结尾的文件
juicefs sync --match-full-path --include='**.gz' --exclude='*' s3://xxx s3://xxx

# 拷贝不以 .gz 结尾的所有文件
juicefs sync --match-full-path --exclude='**.gz' s3://xxx/ s3://xxx

# 拷贝所有文件，但忽略名为 tmpdir 的子目录
juicefs sync --match-full-path --exclude='**/tmpdir/**' s3://xxx/ s3://xxx
```

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114856149652272&bvid=BV1JruJzbEDB&cid=31047811517&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 模式匹配 {#pattern-matching}

你可以通过 `--exclude` 和 `--include` 来包含或排除要同步的文件路径。如果不提供任何规则，默认会同步所有扫描到的文件（默认就是 `--include='*'`）。但如果需要使用 `--include` 实现只包含特定命名模式的文件，则**必须同时使用 `--exclude` 来排除其他文件**，具体请参考上方的示范命令。

:::tip
当提供多个匹配模式时，取决于你具体使用的「过滤模式」，对于判断是否要同步某个文件可能会变得很困难。此时建议加上 `--dry --debug` 选项提前查看要同步的具体文件是否符合预期，如果不符合预期则需要调整匹配模式。
:::

### 匹配规则 {#matching-rules}

匹配规则指的是给定一个路径与一个模式，然后确定该路径能否匹配上该模式。模式可以包含一些特殊字符（类似 shell 通配符）：

+ 单个 `*` 匹配任意字符，但在遇到 `/` 时终止匹配；
+ `**` 匹配任意字符，包括 `/`；
+ `?` 匹配任意非 `/` 的单个字符；
+ `[...]` 匹配一组字符，例如 `[a-z]` 匹配任意小写字母；
+ `[^...]` 不匹配一组字符，例如 `[^abc]` 匹配除 `a`、`b`、`c` 外的任意字符。

此外，还有一些匹配规则需要注意：

- 如果匹配模式中不包含特殊字符，将会完整匹配路径中的文件名。比如 `foo` 可以匹配 `foo` 和 `xx/foo`，但不匹配 `foo1`（无法前缀匹配）、`2foo`（无法后缀匹配）和 `foo/xx`（`foo` 不是目录）；
- 如果匹配模式以 `/` 结尾，将只匹配目录，而不匹配普通文件；
- 如果匹配模式以 `/` 开头，则表示匹配完整路径（路径不需要以 `/` 开头），因此 `/foo` 匹配的是传输中根目录的 `foo` 文件。

以下是一些匹配模式的例子：

+ `--exclude '*.o'` 将排除所有文件名能匹配 `*.o` 的文件；
+ `--exclude '/foo/*/bar'` 将排除根目录中名为 `foo` 的目录向下「两层」的目录中名为 `bar` 的文件；
+ `--exclude '/foo/**/bar'` 将排除根目录中名为 `foo` 的目录向下「任意层级」的目录中名为 `bar` 的文件。

`sync` 命令支持「完整路径过滤」和「逐层过滤」两种模式，这两种模式都支持使用 `--include` 和 `--exclude` 来过滤文件，但是解析的行为并不一样：默认情况下，`sync` 命令使用逐层过滤模式，这种模式的过滤行为无论是理解还是使用都较为复杂，但是基本兼容 rsync 的 `--include/--exclude` 选项，所以只推荐已经习惯了 rsync 过滤行为的用户使用。对于大多数 JuiceFS 用户，推荐通过 `--match-full-path` 选项来使用完整路径过滤模式，他的工作流程更容易理解。

### 完整路径过滤模式（推荐） <VersionAdd>1.2.0</VersionAdd> {#full-path-filtering-mode}

从 v1.2.0 开始，sync 命令支持 `--match-full-path` 选项。完整路径过滤模式是指对于待匹配的对象，直接将其「全路径」与多个模式依次进行匹配，一旦某个匹配模式匹配成功将会直接返回结果（「同步」或者「排除」），忽略后续的匹配模式。

下面是完整路径过滤模式的工作流程图：

![完整路径过滤模式流程图](../images/sync-full-path-filtering-mode-flow-chart.svg)

例如有一个路径为 `a1/b1/c1.txt` 的文件，以及 3 个匹配模式 `--include 'a*.txt' --inlude 'c1.txt' --exclude 'c*.txt'`。在完整路径过滤模式下，会直接将 `a1/b1/c1.txt` 这个字符串与匹配模式依次进行匹配。具体步骤为：

1. 尝试将 `a1/b1/c1.txt` 与 `--include 'a*.txt'` 匹配，结果是不匹配。因为 `*` 不能匹配 `/` 字符，参见[「匹配规则」](#matching-rules)；
2. 尝试将 `a1/b1/c1.txt` 与 `--inlude 'c1.txt'` 匹配，此时根据匹配规则将会匹配成功。后续的 `--exclude 'c*.txt'` 虽然根据匹配规则也能匹配上，但是根据完整路径过滤模式的逻辑，一旦匹配上某个模式，后续的模式将不再尝试匹配。所以最终的匹配结果是「同步」。

以下是更多示例：

+ `--exclude '/foo**'` 将排除所有根目录名为 `foo` 的文件或目录；
+ `--exclude '**foo/**'` 将排除所有以 `foo` 结尾的目录；
+ `--include '*/' --include '*.c' --exclude '*'` 将只包含所有目录和后缀名为 `.c` 的文件，除此之外的所有文件和目录都会被排除；
+ `--include 'foo/bar.c' --exclude '*'` 将只包含 `foo` 目录和 `foo/bar.c` 文件。

### 逐层过滤模式 {#layer-by-layer-filtering-mode}

逐层过滤模式的核心是先将完整路径按照目录层级拆分，并逐层组合成多个字符串序列。比如完整路径为 `a1/b1/c1.txt`，组成的序列就是 `a1`、`a1/b1`、`a1/b1/c1.txt`。然后将这个序列中的每个元素都当成完整路径过滤模式中的路径，依次执行[「完整路径过滤」](#full-path-filtering-mode)。

如果某个元素匹配上了某个模式，则会有两种处理逻辑：

- 如果该模式是 exclude 模式，则直接返回「排除」行为，作为最终的匹配结果；
- 如果该模式是 include 模式，则跳过本层级的后续待匹配的模式，直接进入下一层级。

如果某层的所有模式都未匹配，则进入下一层级。**如果所有层级匹配完毕后都没有返回「排除」，则返回默认的行为——即「同步」。**

下面是逐层过滤模式的工作流程图：

![逐层过滤模式流程图](../images/sync-layer-by-layer-filtering-mode-flow-chart.svg)

例如有一个路径为 `a1/b1/c1.txt` 的文件，以及 3 个匹配模式 `--include 'a*.txt' --inlude 'c1.txt' --exclude 'c*.txt'`。在逐层过滤模式中，组成的序列就是 `a1`、`a1/b1`、`a1/b1/c1.txt`。具体匹配步骤为：

1. 第一层级的路径为 `a1`，根据匹配模式，结果是全部未匹配。进入下一层级；
2. 第二层级的路径为 `a1/b1`，根据匹配模式，结果是全部未匹配。进入下一层级；
3. 第三层级的路径为 `a1/b1/c1.txt`，根据匹配模式，将会匹配上 `--inlude 'c1.txt'` 模式。该模式的行为是「同步」，进入下一层级；
4. 由于没有下一层级了，所以最终返回的行为是「同步」。

上面的例子是到最后一层才匹配成功，除此之外可能还有两种情况：

- 在最后一层之前匹配成功，且匹配模式是 exclude 模式，则直接返回「排除」行为作为最终结果，跳过后续的所有层级；
- 所有层级都已经匹配完毕，但都未匹配上，此时也将会返回「同步」行为。

如果你已经熟悉上一小节的“完整路径过滤模式”，那么逐层过滤其实就是按路径层级由高到低依次执行完整路径过滤，每层过滤只有两种结果：要么直接得到「排除」作为最终结果，要么进入下一层级。得到「同步」结果的唯一方式就是执行完所有过滤层级。

以下是更多示例：

+ `--exclude /foo` 将排除所有根目录名为 `foo` 的文件或目录；
+ `--exclude foo/` 将排除所有名为 `foo` 的目录；
+ 对于 `dir_name/.../.../...` 这种多级目录来说，将按照目录层级匹配 `dir_name` 下的所有路径。如果某个文件的父目录被「排除」了，那即使加上了这个文件的 include 规则，也不会同步这个文件。如果想要同步这个文件就必须保证它的「所有父目录」都不要被排除。例如，下面的例子中 `/some/path/this-file-will-not-be-synced` 文件将不会被同步，因为它的父目录 `some` 已经被规则 `--exclude '*'` 所排除：

  ```shell
  --include '/some/path/this-file-will-not-be-synced' \
  --exclude '*'
  ```

  一种解决方式是包含目录层级中的所有目录，也就是使用 `--include '*/'` 规则（需放在 `--exclude '*'` 规则的前面）；另一种解决方式是为所有父目录增加 include 规则，例如：

  ```shell
  --include '/some/' \
  --include '/some/path/' \
  --include '/some/path/this-file-will-be-synced' \
  --exclude '*'
  ```

## 存储协议 {#storage-protocols}

凡是 JuiceFS 支持的[存储系统](../reference/how_to_set_up_object_storage.md)，都可以使用 sync 命令来同步数据。特别一提，如果其中一端是 JuiceFS 文件系统，那么建议优先使用[无挂载点同步](#sync-without-mount-point)方式。

### 无挂载点同步 <VersionAdd>1.1</VersionAdd> {#sync-without-mount-point}

在两个存储系统之间同步数据，如果其中一方是 JuiceFS，推荐直接使用 `jfs://` 协议头，而不是先挂载 JuiceFS，再访问本地目录。这样便能跳过挂载点，直接读取或写入数据，在大规模场景下，绕过 FUSE 挂载点将能节约资源开销以及提升数据同步性能。

```shell
myfs=redis://10.10.0.8:6379/1 juicefs sync s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/movies/ jfs://myfs/movies/
```

### 对象存储与 JuiceFS 之间同步 {#synchronize-between-object-storage-and-juicefs}

将对象存储的 `movies` 目录同步到 JuiceFS 文件系统：

```shell
# 挂载 JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# 执行同步
juicefs sync s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/movies/ /mnt/jfs/movies/
```

将 JuiceFS 文件系统的 `images` 目录同步到对象存储：

```shell
# 挂载 JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# 执行同步
juicefs sync /mnt/jfs/images/ s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/images/
```

### 对象存储与对象存储之间同步 {#synchronize-between-object-storages}

将对象存储的全部数据同步到另一个对象存储桶：

```shell
juicefs sync s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com oss://ABCDEFG:HIJKLMN@bbb.oss-cn-hangzhou.aliyuncs.com
```

### 本地及服务器之间同步 {#synchronize-between-local-and-remote-servers}

对于本地计算机上的目录之间拷贝文件，直接指定数据源与目标端的路径即可，比如将 `/media/` 目录同步到 `/backup/` 目录：

```shell
juicefs sync /media/ /backup/
```

如果需要在服务器之间同步，可以通过 SFTP/SSH 协议访问目标服务器，例如，将本地的 `/media/` 目录同步到另一台服务器的 `/backup/` 目录：

```shell
juicefs sync /media/ username@192.168.1.100:/backup/
# 指定密码（可选）
juicefs sync /media/ "username:password"@192.168.1.100:/backup/
```

当使用 SFTP/SSH 协议时，如果没有指定密码，执行 sync 任务时会提示输入密码。如果希望显式指定用户名和密码，则需要用半角引号把用户名和密码括起来，用户名和密码之间用半角冒号分隔。

## 同步行为

### 增量同步与全量同步 {#incremental-and-full-synchronization}

`juicefs sync` 默认以增量同步方式工作，对于已存在的文件，仅在文件大小不一样时，才再次同步进行覆盖。在此基础上，还可以指定 [`--update`](../reference/command_reference.mdx#sync)，在源文件 `mtime` 更新时进行覆盖。如果你的场景对正确性有着极致要求，可以指定 [`--check-new`](../reference/command_reference.mdx#sync) 或 [`--check-all`](../reference/command_reference.mdx#sync)，来对两边的文件进行字节流比对，确保数据一致。

如需全量同步，即不论目标路径上是否存在相同的文件都重新同步，可以使用 `--force-update` 或 `-f`。例如，将对象存储的 `movies` 目录全量同步到 JuiceFS 文件系统：

```shell
# 挂载 JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# 执行全量同步
juicefs sync --force-update s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/movies/ /mnt/jfs/movies/
```

### 目录结构与文件权限 {#directory-structure-and-file-permissions}

默认情况下，sync 命令只同步文件对象以及包含文件对象的目录，空目录不会被同步。如需同步空目录，可以使用 `--dirs` 选项。

另外，在 local、SFTP、HDFS 等文件系统之间同步时，如需保持文件权限，可以使用 `--perms` 选项。

### 拷贝符号链接 {#copy-symbolic-links}

JuiceFS `sync` 在**本地目录之间**同步时，支持通过设置 `--links` 选项开启遇到符号链时同步其自身而不是其指向的对象的功能。同步后的符号链接指向的路径为源符号链接中存储的原始路径，无论该路径在同步前后是否可达都不会被转换。

另外需要注意的几个细节

1. 符号链接自身的 `mtime` 不会被拷贝；
1. `--check-new` 和 `--perms` 选项的行为在遇到符号链接时会被忽略。

### 数据同步与碎片合并 {#sync-and-compaction}

对于顺序写场景，一定要尽力保证每个文件的写入都有最少 4M（默认块大小）的缓冲区可用，如果写并发太高，或者缓冲区设置太小，都会导致原本高效的“大块写”退化为“碎片化缓慢写”。叠加上 JuiceFS 的碎片合并，可能会带来严重的写放大问题。

碎片合并情况可以通过 `juicefs_compact_size_histogram_bytes` 这个指标来观测。如果在 `sync` 期间碎片合并流量很高，说明需要进行相关调优。推荐实践和调优思路如下：

* 如果对象存储的写带宽不足，慎用高并发（`--threads`），最好从默认值甚至更低的并发开始测起，谨慎增加到满意的速度；
* 如果目的地是 JuiceFS 文件系统，确保该文件系统的 JuiceFS 客户端有着充足的[读写缓冲区](./cache.md#buffer-size)，按照每个文件的写入都必须起码预留 4M 的写入空间，那么 `--buffer-size` 起码要大于等于 `--threads` 参数的 4 倍，如果希望进一步提高写入并发，那么建议使用 8 或 12 倍的并发量来设置缓冲区。特别注意，根据写入目的地使用的协议头不同，设置缓冲区的方法也不同：
  * 目的地是 `jfs://` 协议头的文件系统，客户端进程就是 `juicefs sync` 命令本身，此时 `--buffer-size` 参数需要追加到 `juicefs sync` 命令里；
  * 目的地是本地的 FUSE 挂载点，那么客户端进程是宿主机上运行的 `juicefs mount` 命令，此时 `--buffer-size` 参数追加到该挂载点的 `juicefs mount` 命令里。
* 如果需要施加限速，那么加上了 `--bwlimit` 参数后，需要降低 `--threads`，避免过高的并发争抢带宽，产生类似的碎片化问题。每个对象存储的延迟和吞吐不尽相同，再次无法给出细致的调优计算流程，建议从更低的并发开始重新测试。

### 删除特定文件

模式匹配还可以实现删除存储系统中特定文件。诀窍是在本地创建一个空目录，将其作为 `SRC`。

示范如下，谨慎起见，所有示范均添加了 `--dry --debug` 选项来空运行，不会实际删除任何文件，而是打印执行计划。验证成功后，去掉这两个选项便能实际执行。

```shell
mkdir empty-dir
# 删除 mybucket 中所有对象，但保留后缀名为 .gz 的文件
juicefs sync ./empty-dir/ s3://mybucket.s3.us-east-2.amazonaws.com/ --match-full-path --delete-dst --exclude='**.gz' --include='*' --dry --debug
# 删除 mybucket 中所有后缀名为 .gz 的文件
juicefs sync ./empty-dir/ s3://mybucket.s3.us-east-2.amazonaws.com/ --match-full-path --delete-dst --include='**.gz' --exclude='*' --dry --debug
```

## 加速同步 {#accelerate-sync}

`juicefs sync` 默认启用 10 个线程执行同步任务，可以根据需要设置 `--threads` 选项调大或减少线程数。但也要注意，受限于有限的单机资源，一味增加 `--threads` 未必能持续提升同步速度，反而可能会导致 OOM。因此如果同步速度不足，还需要考虑：

* `SRC` 和 `DST` 的存储系统是否已经达到了带宽上限，如果其中一个存储已经到达带宽限制，同步的瓶颈就在这里，增加并发度也不会继续提升同步速度；
* 单机资源是否吃紧，比如 CPU、网卡拥堵。如果同步受限于单机资源，那么可以考虑：
  * 如果运行环境有硬件条件更好的节点（CPU、网络出口带宽等），可以换用该节点来运行 `juicefs sync`，通过 SSH 访问源数据，例如 `juicefs sync root@src:/data /jfs/data`；
  * 使用[分布式同步](#distributed-sync)，在下方相关小节介绍。
* 如果同步的数据以小文件为主，并且 `SRC` 的存储系统的 `list` API 性能极佳，那么 `juicefs sync` 默认的单线程 `list` 可能会成为瓶颈。此时考虑启用[并发 `list`](#concurrent-list) 操作，在下一小节介绍。

### 并发 `list` {#concurrent-list}

在 `juicefs sync` 命令的输出中，关注 `Pending objects` 的数量，如果该值持续为 0，说明消费速度大于生产，可以增大 `--list-threads` 来启用并发 `list`，以及用 `--list-depth` 来控制并发 `list` 的目录深度。

比方说，如果你面对的是 JuiceFS 所使用的对象存储服务，那么目录结构为 `/<vol-name>/chunks/xxx/xxx/...`，对于这样的目录结构，使用 `--list-depth=2` 来实现对于 `/<vol-name>/chunks` 的并发列表操作，是比较合适的选择。

### 分布式同步 {#distributed-sync}

在两个对象存储之间同步数据，就是从一端拉取数据再推送到另一端，同步的效率取决于客户端与云之间的带宽：

![JuiceFS-sync-single](../images/juicefs-sync-single.png)

在同步大量数据时，单机带宽往往会被占满出现瓶颈，针对这种情况，考虑使用多机并发同步：

![JuiceFS-sync-worker](../images/juicefs-sync-worker.png)

Manager 作为主控执行 `sync` 命令，通过 `--worker` 参数定义多个 Worker 节点（Manager 自身也参与同步），JuiceFS 会根据 Worker 的总数量，动态拆分同步任务并分发给各个节点并发执行，单位时间内能处理的数据量更大，总带宽也成倍增加。

在配置多机并发同步任务时，需要提前配置好 Manager 节点到 Worker 节点的 SSH 免密登录，如果 Worker 节点的 SSH 端口不是默认的 22，请在 Manager 节点的 `~/.ssh/config` 设置其端口号。Manager 会将 JuiceFS 客户端程序分发到 Worker 节点，为避免兼容性问题，Manager 和 Worker 应使用相同类型和架构的操作系统。

举例说明，用分布式同步的方式进行对象存储间的数据同步：

```shell
juicefs sync --worker bob@192.168.1.21,tom@192.168.1.22 s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com oss://ABCDEFG:HIJKLMN@bbb.oss-cn-hangzhou.aliyuncs.com
```

运行这个命令后，当前节点与两个 Worker 节点 `bob@192.168.1.21` 和 `tom@192.168.1.22` 将共同分担数据同步任务。

上方的示范中是对象存储 → 对象存储的数据同步，如果需要基于 FUSE 挂载点做数据同步，那么可以在所有节点挂载 JuiceFS，然后用类似下方的命令来进行分布式同步：

```shell
# 源文件系统需要更好的读性能，因此增大 buffer-size
parallel-ssh -h hosts.txt -i juicefs mount -d redis://10.10.0.8:6379/1 /jfs-src --buffer-size=1024 --cache-size=0

# 目标文件系统需要更好的写性能
parallel-ssh -h hosts.txt -i juicefs mount -d redis://10.10.0.8:6379/1 /jfs-dst --buffer-size=1024 --cache-size=0 --max-uploads=50

# 挂载完毕后，用下方命令拷贝数据
juicefs sync --worker host1,host2 /jfs-src /jfs-dst
```

## 观测和监控 {#observation}

简单来说，用 `sync` 命令拷贝大文件时，进度条可能会迟迟不更新，如果担心命令未能正常工作，可以用其他手段对传输情况进行观测。

`sync` 假定了使用场景是拷贝大量文件，因此进度的计算也是针对多个文件设计的：每一个文件完成了拷贝后，进度会更新一次。因此如果面对的都是大文件，单个文件的拷贝速度太慢，进度条就会变化缓慢，或者呈现卡死的状态。如果目的地端协议不支持 multipart upload（比如 `file`、`sftp`、`jfs`、`gluster` 协议头），单个文件会单线程进行复制，无法对大文件进行并发上传。可想而知，大文件 + 不支持 multipart upload，将会更容易出现进度条卡死的情况。

如果你观察到进度不再变化，参考下列手段进行观测和排查：

* 为 `juicefs sync` 添加 [`--verbose` 或 `--debug`](../reference/command_reference.mdx#global-options) 参数，打印 debug 日志；
* 如果数据同步的两方有任何一个是 JuiceFS 宿主机挂载点：
  * 用 [`juicefs stats`](../administration/fault_diagnosis_and_analysis.md#stats) 快速查看文件系统是否正在写入（或读出）；
  * 阅读[客户端日志](../administration/fault_diagnosis_and_analysis.md#client-log)（默认 `/var/log/juicefs.log`），观察是否有[慢请求或者超时错误日志](../administration/troubleshooting.md#io-error-object-storage)。
* 如果数据同步的目的地是宿主机本地盘，可以直接观察目录下是否存在名称中带 `.jfs.xxx.tmp.xxx` 后缀的临时文件，`sync` 过程中会将传输结果写入临时文件，待传输完成后进行重命名，才完成最终的写入。观察临时文件大小是否变化，就能确定当前的写入状况；
* 如果传输目的地均为对象存储，可以通过类似 `nethogs` 的命令，查看出入网流量，来判断传输进展；
* 以上手段均未能获得有效排查信息，则需要对 `sync` 进程采集 goroutine，结合源码分析排查：

   ```shell
   # 将 <PID> 替换为卡死的 sync 命令的 PID，记下 pprof 监听端口
   lsof -p <PID> | grep TCP | grep LISTEN
   # pprof 端口一般是 6061，如果已经被占用，则需要递增，需要根据实际情况修改
   curl -s localhost:6061/debug/pprof/goroutine?debug=1
   ```

如果需要监控 `sync` 命令的进度，可以使用 [`--metrics`](../reference/command_reference.mdx#sync-metrics-related-options) 参数指定监控指标地址，默认为 `127.0.0.1:9567`。用 Prometheus 抓取这些指标，就能进行监控。

## 场景应用 {#application-scenarios}

### 数据异地容灾备份 {#geo-disaster-recovery-backup}

异地容灾备份针对的是文件本身，因此应将 JuiceFS 中存储的文件同步到其他的对象存储，例如，将 JuiceFS 文件系统中的文件同步到对象存储：

```shell
# 挂载 JuiceFS
juicefs mount -d redis://10.10.0.8:6379/1 /mnt/jfs
# 执行同步
juicefs sync /mnt/jfs/ s3://ABCDEFG:HIJKLMN@aaa.s3.us-west-1.amazonaws.com/
```

### 建立 JuiceFS 数据副本 {#build-a-juicefs-data-copy}

与面向文件本身的容灾备份不同，建立 JuiceFS 数据副本的目的是为 JuiceFS 的数据存储建立一个内容和结构完全相同的镜像，当使用中的对象存储发生了故障，可以通过修改配置切换到数据副本继续工作。需要注意这里仅复制了 JuiceFS 文件系统的数据，并没有复制元数据，元数据引擎的数据备份依然需要。

这需要直接操作 JuiceFS 底层的对象存储，将它与目标对象存储之间进行同步。例如，要把对象存储作为 JuiceFS 文件系统的数据副本：

```shell
juicefs sync cos://ABCDEFG:HIJKLMN@ccc-125000.cos.ap-beijing.myqcloud.com oss://ABCDEFG:HIJKLMN@bbb.oss-cn-hangzhou.aliyuncs.com
```

### 使用 S3 网关进行跨区域数据同步 {#sync-across-region}

通过 POSIX 方式访问 JuiceFS 时，会有频繁的元数据访问，跨区域访问元数据的延迟比较高会影响访问性能。如果需要跨区域传输大量小文件，这时元数据服务延迟高对性能影响更严重。

![sync via public metadata service](../images/sync-public-metadata.svg)

在这种情况下，可以通过跨区访问部署在源区域的 S3 网关来提升性能，它可以大幅减少跨区域访问的请求数。

![sync via gateway](../images/sync-via-gateway.svg)

阅读[「S3 网关」](./gateway.md)学习如何使用和部署 S3 网关。


================================================
FILE: docs/zh_cn/introduction/README.md
================================================
---
title: JuiceFS 简介
sidebar_position: 1
slug: .
pagination_next: introduction/architecture
---

**JuiceFS** 是一款面向云原生设计的高性能分布式文件系统，在 Apache 2.0 开源协议下发布。提供完备的 [POSIX](https://en.wikipedia.org/wiki/POSIX) 兼容性，可将几乎所有对象存储接入本地作为海量本地磁盘使用，亦可同时在跨平台、跨地区的不同主机上挂载读写。

JuiceFS 采用「数据」与「元数据」分离存储的架构，从而实现文件系统的分布式设计。文件数据本身会被切分保存在[对象存储](../reference/how_to_set_up_object_storage.md#supported-object-storage)（例如 Amazon S3），而元数据则可以保存在 Redis、MySQL、TiKV、SQLite 等多种[数据库](../reference/how_to_set_up_metadata_engine.md)中，你可以根据场景与性能要求进行选择。

JuiceFS 提供了丰富的 API，适用于各种形式数据的管理、分析、归档、备份，可以在不修改代码的前提下无缝对接大数据、机器学习、人工智能等应用平台，为其提供海量、弹性、低价的高性能存储。运维人员不用再为可用性、灾难恢复、监控、扩容等工作烦恼，专注于业务开发，提升研发效率。同时运维细节的简化，对 DevOps 极其友好。

<div className="video-container">
  <iframe src="//player.bilibili.com/player.html?aid=931107196&bvid=BV1HK4y197va&cid=350876578&page=1&autoplay=0" width="100%" height="360" scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="true"> </iframe>
</div>

## 核心特性 {#features}

1. **POSIX 兼容**：像本地文件系统一样使用，无缝对接已有应用，无业务侵入性；
2. **HDFS 兼容**：完整兼容 [HDFS API](../deployment/hadoop_java_sdk.md)，提供更强的元数据性能；
3. **S3 兼容**：提供 [S3 网关](../guide/gateway.md) 实现 S3 协议兼容的访问接口；
4. **云原生**：通过 [Kubernetes CSI 驱动](../deployment/how_to_use_on_kubernetes.md) 轻松地在 Kubernetes 中使用 JuiceFS；
5. **分布式设计**：同一文件系统可在上千台服务器同时挂载，高性能并发读写，共享数据；
6. **强一致性**：确认的文件修改会在所有服务器上立即可见，保证强一致性；
7. **强悍性能**：毫秒级延迟，近乎无限的吞吐量（取决于对象存储规模），查看[性能测试结果](../benchmark/benchmark.md)；
8. **数据安全**：支持传输中加密（encryption in transit）和静态加密（encryption at rest），[查看详情](../security/encryption.md)；
9. **文件锁**：支持 BSD 锁（flock）和 POSIX 锁（fcntl）；
10. **数据压缩**：支持 [LZ4](https://lz4.github.io/lz4) 和 [Zstandard](https://facebook.github.io/zstd) 压缩算法，节省存储空间。

## 应用场景 {#scenarios}

JuiceFS 为海量数据存储设计，可以作为很多分布式文件系统和网络文件系统的替代，特别是以下场景：

- **大数据分析**：HDFS 兼容；与主流计算引擎（Spark、Presto、Hive 等）无缝衔接；无限扩展的存储空间；运维成本几乎为 0；性能远好于直接对接对象存储。
- **机器学习**：POSIX 兼容，可以支持所有机器学习、深度学习框架；方便的文件共享还能提升团队管理、使用数据效率。
- **Kubernetes**：JuiceFS 支持 Kubernetes CSI；为容器提供解耦的文件存储，令应用服务可以无状态化；方便地在容器间共享数据。
- **共享工作区**：可以在任意主机挂载；没有客户端并发读写限制；POSIX 兼容已有的数据流和脚本操作。
- **数据备份**：在无限平滑扩展的存储空间备份各种数据，结合共享挂载功能，可以将多主机数据汇总至一处，做统一备份。

## 数据隐私 {#data-privacy}

JuiceFS 是开源软件，你可以在 [GitHub](https://github.com/juicedata/juicefs) 找到完整的源代码。在使用 JuiceFS 存储数据时，数据会按照一定的规则被拆分成数据块并保存在你自己定义的对象存储或其它存储介质中，数据所对应的元数据则存储在你自己定义的数据库中。

## 更多相关信息 {#more-info}

* **案例**：想了解更多相似场景的实践案例，请访问[用户案例](https://juicefs.com/zh-cn/blog/user-stories)。
* **视频**：我们在 [Bilibili 频道](https://space.bilibili.com/1206844881)提供了丰富的视频教程。
* **加入社群**：欢迎加入我们的[微信用户组](https://juicefs.com/zh-cn/wechat-user-group)（中文）或者 [Slack](https://go.juicefs.com/slack)（英文），与 JuiceFS 用户共同探讨。
* **Office Hours**：每月第 2 周的星期三 16:00-17:00（UTC+8）在线上举行，Juicedata 工程师将为你实时答疑解惑。请加入微信用户组获取最新活动信息。
* **AI 助手**：如果你遇到了任何问题，欢迎使用「Ask AI」功能（右下角）求助 AI 助手。AI 助手的知识库来源于文档以及 GitHub 中的相关内容。


================================================
FILE: docs/zh_cn/introduction/architecture.md
================================================
---
title: 技术架构
sidebar_position: 2
slug: /architecture
description: 本文介绍 JuiceFS 的技术架构以及由此带来的技术优势，同时介绍 JuiceFS 的文件存储原理。
---

JuiceFS 文件系统由三个部分组成：

![JuiceFS-arch](../images/juicefs-arch.svg)

**JuiceFS 客户端（Client）**：所有文件读写，以及碎片合并、回收站文件过期删除等后台任务，均在客户端中发生。客户端需要同时与对象存储和元数据引擎打交道。客户端支持多种接入方式：

- 通过 **FUSE**，JuiceFS 文件系统能够以 POSIX 兼容的方式挂载到服务器，将海量云端存储直接当做本地存储来使用。点击[此处](https://juicefs.com/docs/zh/community/getting-started/installation)查看使用详情。
- 通过 **Python SDK**，在无法通过 FUSE 挂载，或需要在 Python 进程中直接访问文件系统的场景，可以使用 Python SDK 直接读写文件系统。此外，Python SDK 原生实现了 fsspec 便于接入 Ray 等框架。点击[此处](https://juicefs.com/docs/zh/community/deployment/python_sdk)查看使用详情。
- 通过 **Windows 客户端**，获得接近本地的文件系统体验。点击[此处](https://juicefs.com/docs/zh/community/tutorials/windows)查看使用详情。
- 通过 **Hadoop Java SDK**，JuiceFS 文件系统能够直接替代 HDFS，为 Hadoop 提供低成本的海量存储。点击[此处](https://juicefs.com/docs/zh/community/hadoop_java_sdk)查看使用细节。
- 通过 **Kubernetes CSI 驱动**，JuiceFS 文件系统能够直接为 Kubernetes 提供海量存储。点击[此处](https://juicefs.com/docs/zh/csi/introduction)查看 JuiceFS CSI 文档。
- 通过 **S3 网关**，使用 S3 作为存储层的应用可直接接入，同时可使用 AWS CLI、s3cmd、MinIO client 等工具访问 JuiceFS 文件系统。点击[此处](https://juicefs.com/docs/zh/community/guide/gateway)查看使用详情。
- 通过 **WebDAV 服务**，以 HTTP 协议，以类似 RESTful API 的方式接入 JuiceFS 并直接操作其中的文件。

**数据存储（Data Storage）**：文件将会被切分上传至对象存储服务。JuiceFS 支持几乎所有的公有云对象存储，同时也支持 OpenStack Swift、Ceph、MinIO 等私有化的对象存储。

**元数据引擎（Metadata Engine）**：用于存储文件元数据（metadata），包含以下内容：

- 常规文件系统的元数据：文件名、文件大小、权限信息、创建修改时间、目录结构、文件属性、符号链接、文件锁等。
- 文件数据的索引：文件的数据分配和引用计数、客户端会话等。

JuiceFS 采用多引擎设计，目前已支持 Redis、TiKV、MySQL/MariaDB、PostgreSQL、SQLite 等作为元数据服务引擎，也将陆续实现更多元数据存储引擎。欢迎[提交 Issue](https://github.com/juicedata/juicefs/issues) 反馈你的需求。

## JuiceFS 如何存储文件 {#how-juicefs-store-files}

与传统文件系统只能使用本地磁盘存储数据和对应的元数据的模式不同，JuiceFS 会将数据格式化以后存储在对象存储，同时会将文件的元数据存储在元数据引擎。在这个过程中，Chunk、Slice、Block 是三个重要的概念：

对于 JuiceFS，每一个文件都由 1 或多个「Chunk」组成，每个 Chunk 最大 64M。不论文件有多大，所有的读写都会根据其偏移量（也就是产生读写操作的文件位置）来定位到对应的 Chunk。正是这种分而治之的设计，让 JuiceFS 面对大文件也有优秀的性能。只要文件总长度没有变化，不论经历多少修改写入，文件的 Chunk 切分都是固定的。

![file-and-chunks](../images/file-and-chunks.svg)

Chunk 的存在是为了优化查找定位，实际的文件写入则在「Slice」上进行。在 JuiceFS 中，一个 Slice 代表一次连续写入，隶属于某个 Chunk，并且不能跨越 Chunk 边界，因此 Slice 长度也不会超 64M。

举例说明，如果一个文件是由一次连贯的顺序写生成，那么每个 Chunk 中只将会仅包含一个 Slice。上方的示意图就属于这种情况：顺序写入一个 160M 文件，最终会产生 3 个 Chunk，而每个 Chunk 仅包含一个 Slice。

文件写入会产生 Slice，而调用 `flush` 则会将这些 Slice 持久化。`flush` 可以被用户显式调用，就算不调用，JuiceFS 客户端也会自动在恰当的时机进行 `flush`，防止[缓冲区](../guide/cache.md#buffer-size)被写满。持久化到对象存储时，为了能够尽快写入，会对 Slice 进行进一步拆分成一个个「Block」（默认最大 4M），多线程并发写入以提升写性能。上边介绍的 Chunk、Slice，其实都是逻辑数据结构，Block 则是最终的物理存储形式，是对象存储和磁盘缓存的最小存储单元。

![slice-to-block](../images/slice-to-block.svg)

因此，文件写入 JuiceFS 后，你不会在对象存储中找到原始文件，存储桶中只有一个 `chunks` 目录和一堆数字编号的目录和文件，让人不禁疑惑「我的文件到底去了哪儿」？但事实上，这些数字编号的对象存储文件正是经过 JuiceFS 拆分存储的 Block，而这些 Block 与 Chunk、Slice 的对应关系，以及其他元数据信息（比如文件名、大小等属性）则存储在元数据引擎中，这样的分离设计，让 JuiceFS 文件系统得以高性能运作。

![how-JuiceFS-stores-files](../images/how-juicefs-stores-files.svg)

回到逻辑数据结构的话题，如果文件并不是由连贯的顺序写生成，而是多次追加写，每次追加均调用 `flush` 触发写入上传，就会产生多个 Slice。如果每次追加写入的数据量不足 4M，那么最终存入对象存储的数据块，也会是一个个小于 4M 的 Block。

![small-append](../images/small-append.svg)

取决于写入模式，Slice 的排列模式可以是多种多样的：如果文件在相同区域被反复修改，Slice 之间会发生重叠。如果在互不重合的区域进行写入，Slice 中间会有间隔。但不论 Slice 的排列有多复杂，当读文件发生时，对于每一处文件位置，都会读到该位置最新写入的 Slice，用下图可以更加直观地理解：Slice 虽然会相互堆叠，但读文件一定是“从上往下看”，因此一定会看到该文件的最新状态。

![complicate-pattern](../images/complicate-pattern.svg)

正是由于 Slice 会相互覆盖，JuiceFS 在 Chunk 与 Slice 的引用关系中，[标记了各个 Slice 的有效数据偏移范围](../development/internals.md#sliceref)，用这种方式告诉文件系统，每一个 Slice 中的哪些部分是有效的数据。

但也不难想象，读取文件需要查找「当前读取范围内最新写入的 Slice」，在上图所示的大量堆叠 Slice 的情况下，这样的反复查找将会显著影响读性能，我们称之为文件「碎片化」。碎片化不仅影响读性能，还会在各个层面（对象存储、元数据）增加空间占用。因此每当写入发生时，客户端都会判断文件的碎片化情况，并异步地运行碎片合并，将同一个 Chunk 内的所有 Slice 合并为一。

![compaction](../images/compaction.svg)

最后，JuiceFS 的存储设计，还有着以下值得一提的技术特点：

* 对于任意大小的文件，JuiceFS 都不进行合并存储，这也是为了性能考虑，避免读放大。
* 提供强一致性保证，但也可以根据场景需要与缓存功能一起调优，比如通过设置出更激进的元数据缓存，牺牲一部分一致性，换取更好的性能。详见[「元数据缓存」](../guide/cache.md#metadata-cache)。
* 支持并默认开启[「回收站」](../security/trash.md)功能，删除文件后保留一段时间才彻底清理，最大程度避免误删文件导致事故。


================================================
FILE: docs/zh_cn/introduction/comparison/_category_.yml
================================================
position: 4
label: "Comparing with Others"
# collapsible: true 
# collapsed: true 

================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_3fs.md
================================================
---
slug: /comparison/juicefs_vs_3fs
description: 本文对比了 DeepSeek 3FS 和 JuiceFS 在 AI 存储场景中的架构、特性和创新技术。
---

# JuiceFS 对比 3FS

3FS (Fire-Flyer File System) 是一款分布式文件系统，针对 AI 训练和推理工作负载设计，由 DeepSeek 开源。该系统使用 NVMe SSD 和 RDMA 网络提供共享存储层，面向大规模 AI 应用的 I/O 需求。

JuiceFS 是一个云原生分布式文件系统，其数据存储在对象存储中。社区版可与多种元数据服务集成，适用场景广泛，于 2021 年在 GitHub 开源。企业版专为高性能场景设计，广泛应用于大规模 AI 任务，涵盖生成式 AI、自动驾驶、量化金融和生物科技等。

本文从架构设计、文件分布、RPC 框架和功能特性等方面对 3FS 和 JuiceFS 进行全面对比。

## 架构对比

### 3FS

3FS 采用针对 AI 工作负载设计的架构，包含以下关键组件：

- **集群管理服务（Cluster Manager）**：处理成员变更，并将集群配置分发给其他服务和客户端。为了提高系统可靠性和避免单点故障，会部署多个集群管理服务，其中一个被选为主节点。
- **元数据服务（Metadata Service）**：无状态服务，处理文件元数据操作，依靠支持事务的键值数据库 FoundationDB 来存储元数据。
- **存储服务（Storage Service）**：使用本地 NVMe SSD 管理数据存储，采用 CRAQ（Chain Replication with Apportioned Queries）算法确保数据一致性。
- **客户端（Clients）**：提供 FUSE Client 以实现 POSIX 兼容性，以及 Native Client API 用于高性能零拷贝操作。

所有组件通过 RDMA 进行高性能网络通信。集群配置通常存储在可靠的分布式服务中，例如 ZooKeeper 或 etcd。

![3FS architecture](https://static1.juicefs.com/images/3FS_JiaGou.original.png)

### JuiceFS

JuiceFS 采用模块化的云原生架构，包含三个核心组件：

- **元数据引擎**：用于存储文件元数据，包括常规文件系统的元数据和文件数据的索引。社区版支持 Redis、TiKV、MySQL、PostgreSQL、FoundationDB 等多种数据库。企业版使用自研高性能元数据服务。
- **数据存储**：一般是对象存储服务，可以是公有云的对象存储也可以是私有部署的对象存储服务。支持与各种存储后端集成。
- **JuiceFS 客户端**：提供 POSIX（FUSE）、Hadoop SDK、CSI Driver、S3 网关等不同的接入方式。

![JuiceFS Community Edition architecture](../../images/juicefs-arch.svg)

### 架构差异

#### 存储模块

3FS 使用本地 NVMe SSD 进行数据存储，为了保证数据存储的一致性，采用 CRAQ（Chain Replication with Apportioned Queries）算法。几个副本被组成一个 Chain，写请求从 Chain 的 Head 开始，一直到达 Chain 的 Tail 时返回写成功应答。读请求可以发送到 Chain 的所有副本，如果读到脏节点的数据，该节点会联系 Tail 节点检查状态。

![CRAQ consistency algorithm](https://static1.juicefs.com/images/CRAQ_YiZhiXingSuanFa.original.png)

数据的写入是按顺序逐节点传递，因此会带来比较高的延时，但这种设计优先考虑读性能，这对于读密集型的 AI 工作负载至关重要。

相比之下，JuiceFS 利用对象存储作为数据存储解决方案，从而可享有对象存储带来的若干优势，如数据可靠性、一致性等。存储模块提供了一组用于对象操作的标准接口（GET/PUT/HEAD/LIST），可以与各种存储后端无缝集成。社区版 JuiceFS 提供本地缓存来应对 AI 场景下的带宽需求，企业版使用分布式缓存满足更大的聚合读带宽的需求。

#### 元数据模块

在 3FS 中，文件的属性以 KV 的形式存储在元数据服务中。该服务是一个无状态的高可用服务，依靠 FoundationDB 做支撑。FoundationDB 所有键值使用 Key 做全局排序，然后均匀拆分到不同的节点上。为了优化 list 目录的效率，3FS 使用字符 "DENT" 前缀加父目录 inode 号和名字作为 dentry 的 Key。

JuiceFS 社区版的元数据模块提供一组操作元数据的接口，可以接入不同的元数据服务，比如 Redis、TiKV 等 KV 数据库，MySQL、PostgreSQL 等关系型数据库，也可以使用 FoundationDB。JuiceFS 企业版使用自研高性能元数据服务，可根据负载情况来平衡数据和热点操作，以避免大规模训练中元数据服务热点集中在某些节点的问题。

#### 客户端

3FS 的客户端除了提供 FUSE 操作外，还提供了一组 API 用于绕过 FUSE 直接操作数据，也就是 Native Client。这组 API 的作用是避免使用 FUSE 模块带来的数据拷贝，从而减少 I/O 延迟和对内存带宽的占用，通过共享内存和信号量实现零拷贝通信。

![3FS native client API](https://static1.juicefs.com/images/3FS_NATIVE_Client_API.original.png)

3FS 通过 `hf3fs_iov` 保存共享内存的大小、地址和其他一些属性，使用 `IoRing` 在两个进程间通信。系统创建虚拟文件并使用信号量来促进用户进程和 FUSE 进程之间的通信。

JuiceFS 的 FUSE 客户端实现更加全面，提供以下功能：

- 在每次成功上传对象后会立即更新文件长度
- 支持 BSD 锁（flock）和 POSIX 锁（fcntl）
- 支持高级接口如 `file_copy_range`、`readdirplus` 和 `fallocate`

除了 FUSE 客户端，JuiceFS 社区版还提供 Java SDK、Python SDK、S3 网关、CSI Driver 等用于用户空间执行的功能，企业版在此基础上提供了更多企业级特性。

## 文件分布对比

### 3FS 文件分布

3FS 将每个文件分成固定长度的 chunk，每个 chunk 位于一个链上（CRAQ 算法）。因为 3FS 中的 chunk 是固定的，客户端只需要获取一次 inode 的 chain 信息，就可以根据文件 inode 和 I/O 请求的 offset、length 计算出这个请求位于哪些 chunk 上，从而避免了每个 I/O 都从数据库查询的需求。可以通过 `offset/chunk_size` 得到 chunk 的索引，而 chunk 所在的 chain 的索引就是 `chunk_id%stripe`。

为了应对数据不平衡问题，每个文件的第一个 chain 按照轮询（round robin）的方式选择。创建文件时，系统会将选择的 chain 做随机排序，然后存储到元数据中。

![3FS file distribution](https://static1.juicefs.com/images/3FS_WenJianFenBu.original.png)

### JuiceFS 文件分布

JuiceFS 按照 Chunk、Slice、Block 的规则进行数据块管理。每个 Chunk 的大小固定为 64M，主要用于优化数据的查找和定位。实际的文件写入操作则在 Slice 上执行，Slice 代表块内连续的写入过程。Block（默认大小为 4M）则是物理存储的基本单位，用于在对象存储和磁盘缓存中实现数据的最终存储。

![JuiceFS file distribution](../../images/file-and-chunks.svg)

JuiceFS 中的 Slice 是在其他文件系统中不常见的一个结构。主要功能是记录文件的写入操作，并在对象存储中进行持久化。由于对象存储不支持原地文件修改，JuiceFS 通过引入 Slice 结构允许更新文件内容，而无需重写整个文件。JuiceFS 的所有 Slice 均为一次性写入，这减少了对底层对象存储一致性的依赖，并大大简化了缓存系统的复杂度。

## 3FS RPC 框架

3FS 使用 RDMA 作为底层网络通信协议，目前 JuiceFS 尚未支持。3FS 通过实现一个 RPC 框架，来完成对底层 IB 网络的操作。除了网络操作外，RPC 框架还提供序列化、小包合并等能力，使用模版实现了一个反射库，用于序列化 RPC 使用的 request、response 等数据结构。

![3FS FUSE client RPC process](https://static1.juicefs.com/images/3FS_FUSE_Client_DiaoYong_MetadataFuWuDe_RPC_Guo.original.png)

3FS 的缓存有两部份组成，一个 TLS（Thread-Local Storage）队列和一个全局队列。从 TLS 队列获取缓存时不需要加锁；当 TLS 缓存为空时就得加锁，从全局队列中获取缓存。多个 RPC 请求可能被合并为一个 InfiniBand 请求以提高效率。

## 功能特性对比

| 功能特性 | 3FS | JuiceFS 社区版 | JuiceFS 企业版 |
|----------|-----|---------------|---------------|
| 元数据 | 无状态元数据服务+FoundationDB | 独立数据库服务 | 自研高性能分布式元数据引擎（可横向扩展） |
| 数据存储 | 自主管理 | 使用对象存储 | 使用对象存储 |
| 冗余保护 | 多副本 | 对象存储提供 | 对象存储提供 |
| 数据缓存 | 无缓存 | 本地缓存 | 自研高性能多副本分布式缓存 |
| 数据加密 | 不支持 | 支持 | 支持 |
| 数据压缩 | 不支持 | 支持 | 支持 |
| 配额管理 | 不支持 | 支持 | 支持 |
| 网络协议 | RDMA | TCP | TCP |
| 快照 | 不支持 | 支持克隆 | 支持克隆 |
| POSIX ACL | 不支持 | 支持 | 支持 |
| POSIX 兼容性 | 少量子集 | 完全兼容 | 完全兼容 |
| CSI 驱动 | 没有官方支持 | 支持 | 支持 |
| 客户端 | FUSE + Native Client | POSIX（FUSE）、Java SDK、Python SDK、S3 网关 | POSIX（FUSE）、Java SDK、S3 网关、Python SDK |
| 多云镜像 | 不支持 | 不支持 | 支持 |
| 跨云和跨区数据复制 | 不支持 | 不支持 | 支持 |
| 主要维护者 | DeepSeek | Juicedata | Juicedata |
| 开发语言 | C++, Rust (本地存储引擎) | Go | Go |
| 开源协议 | MIT | Apache License 2.0 | 商业软件 |

## 总结

大规模 AI 训练中最主要的需求是高读带宽，为此 3FS 采用了性能优先的设计策略：

- **本地存储**：将数据存储在本地 NVMe SSD 上，用户需要自行管理底层数据存储基础设施
- **零拷贝优化**：实现了客户端到网卡的零拷贝，利用共享内存和信号量减少 I/O 延迟和内存带宽占用
- **RDMA 网络**：引入了 RDMA 技术，提供更好的网络性能
- **优化的 I/O**：通过带 TLS 的 I/O buffer pool 和合并网络请求，增强了小 I/O 和文件元数据操作的能力

这种方法提升了性能，但成本较高，维护也更繁重。

JuiceFS 使用对象存储作为底层数据存储，用户因此可大幅降低存储成本并简化维护工作。为了满足 AI 场景的对读性能的需求：

- **企业版功能**：分布式缓存、分布式元数据服务和 Python SDK
- **即将推出的优化**：v5.2 企业版中，在 TCP 网络中实现了零拷贝，进一步提升数据传输效率
- **云原生优势**：提供完整的 POSIX 兼容性和成熟活跃的开源生态，支持 Kubernetes CSI
- **企业级能力**：Quota、安全管理和数据灾备等多项企业级管理功能


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_alluxio.md
================================================
---
slug: /comparison/juicefs_vs_alluxio
---

# JuiceFS 对比 Alluxio

用户在存算分离的数据平台和 AI 训练加速场景中经常会比较 Alluxio 和 JuiceFS 两个产品。除了使用场景有相似之处，更多的原因是这两个产品都与对象存储相结合，都提供了访问数据时的缓存加速能力，看上去像是同一个场景中的「替代产品」，但两个产品又有很多的不同。本章详细介绍二者的功能区别，两个系统都是开源项目，但也都各自提供功能更为强大的企业版，本文的对比也会考虑到不同版本的区别，帮助你的团队进行技术选型。

Alluxio 的定位是在现有的存储系统之上提供缓存加速层，在实际项目中存储系统大多是对象存储系统。JuiceFS 的定位是为云环境设计的分布式文件系统，可以通过缓存机制加速数据访问。

从架构设计角度讲，Alluxio 与对象存储是两套系统，Alluxio 是业务应用于对象存储之间的中间件，维护多个节点中的存储空间形成一个缓存系统，存储应用访问过的热数据。

JuiceFS 用对象存储做数据持久层，对象存储可以看做是 JuiceFS 的一个内部组件，打比方说对象存储好像一块容量无限大的硬盘，JuiceFS 对这块「硬盘」进行格式化，JuiceFS 的元数据服务就是分区表，结合在一起形成完整的「文件系统」概念。

Alluxio 和 JuiceFS 虽然都能提供文件系统服务，但架构以及使用场景存在很大差异：Alluxio 的主要作用是为各个数据存储系统提供统一接入平台（你的数据仍存储在外部系统），为应用提供高速缓存层。而 JuiceFS 则是一个分布式高性能文件系统，你可以将其作为大数据存储平台，也可以用他来替换当前的存储系统，为你的业务增效降本。

考虑到二者的定位有很大不同，下方表格只能呈现各自作为文件系统角色时的功能特性，并不是一个「公平的对比」。JuiceFS 并不提供多数据源聚合功能，因此也无法与 Alluxio 进行比较。如果你对两个产品都感兴趣，请继续阅读表格下方的章节。

| 特性 | Alluxio | JuiceFS |
|:---:|:---:|:---:|
| 多级缓存 | 支持 | 支持 |
| Hadoop 兼容 | 支持 | 支持 |
| S3 兼容 | 支持 | 支持 |
| Kubernetes CSI 驱动 | 支持 | 支持 |
| WebDAV 协议 | 不支持 | 支持 |
| Hadoop 数据本地性 | 支持 | 支持 |
| 完全兼容 POSIX | 不支持 | 支持 |
| 一致性 | 不一定 | 强一致性|
| 数据压缩 | 不支持 | 支持 |
| 数据加密 | 不支持 | 支持 |
| 服务端运维 | 复杂 | 推荐直接使用云服务商托管服务，实现零运维 |
| 开发语言 | Java | Go |
| 开源协议 | Apache License 2.0 | Apache License 2.0 |
| 开源时间 | 2014 | 2021.1 |

## 架构与核心特性 {#architecture-and-key-features}

### 存储与缓存 {#storage-and-cache}

Alluxio 自身不是一个存储系统，而是一个强大的聚合层，来为不同的存储系统（比如 HDFS、NFS）提供统一接入和缓存服务。这也是为什么我们无法将存储和缓存拆开来讨论与对比，因为 Alluxio 自己的存储层，作用实际上就是提供缓存服务（更多关于架构信息请阅读其[官方文档](https://docs.alluxio.io/os/user/stable/cn/core-services/Caching.html)）。

在 Alluxio 的架构中，背后的存储系统称作「UFS」（Under File Storage），可想而知，这些存储系统都是外部系统，不受 Alluxio 管辖，他们各自的存储格式与 Alluxio 无关。

UFS 层让 Alluxio 能够聚合不同的文件系统，但 Alluxio 的重要作用是为这些存储系统提供缓存服务，因此 Alluxio 也有自己的数据存储，称作 Alluxio storage，会被部署成 Alluxio workers，用来提供缓存服务。

在 Alluxio 存储层，默认使用 64MB 作为缓存块大小，并且在缓存盘之上优先使用内存，为热数据提供更加高速的缓存服务。新版实验功能中也引入了[可调节缓存块大小的设计](https://docs.alluxio.io/os/user/stable/en/core-services/Caching.html#experimental-paging-worker-storage)，来调节缓存粒度，优化性能。

JuiceFS 是一个分布式文件系统，实现了自己的存储格式，文件会被视作一个个最大 64MB 的逻辑数据块（Chunk），再拆成 4MB 的 Block 上传至对象存储，作为最基本的物理存储单位。Block 也是本地缓存的粒度，相比 Alluxio 的 64MB 缓存块，JuiceFS 的粒度更小，更适合随机读取（例如 Parquet 和 ORC）工作负载，缓存管理也更有效率。JuiceFS 的存储设计，在[架构文档](../architecture.md#how-juicefs-store-files)中有更详细的介绍。

Alluxio 和 JuiceFS 都支持多级缓存，设计上各有特色，但都能够支持用硬盘、SSD、内存来灵活配置大容量或者高性能缓存，详见：

* [Alluxio 缓存](https://docs.alluxio.io/os/user/stable/cn/core-services/Caching.html)
* [JuiceFS 缓存](../../guide/cache.md)
* JuiceFS 企业版在社区版的基础上，支持更为强大的[分布式缓存](/docs/zh/cloud/guide/distributed-cache)

### 一致性 {#consistency}

JuiceFS 是一个强一致性的分布式文件系统，它的原子性依赖底层元数据引擎的事务支持（比如 [Redis 事务](https://redis.io/topics/transactions)），因此大部分元数据操作都具有原子性，例如重命名文件、删除文件、重命名目录。

Alluxio 自身并不是一个存储系统，但你依然可以通过 Alluxio 进行写入，不过原子性肯定就无法支持了，因为 Alluxio 依赖 UFS 来实现元数据操作，比如重命名文件操作会变成复制和删除操作。

继续讨论一致性之前，必须先简单了解 Alluxio 的写入是如何实现的。上一小节已经介绍过，Alluxio 存储层和 UFS 是分离的——你可以写存储层，也可以写 UFS，具体文件写入要如何在两个层之间协调，通过以下几种[写入策略](https://docs.alluxio.io/os/user/stable/cn/overview/Architecture.html#%E6%95%B0%E6%8D%AE%E6%B5%81%E5%86%99%E6%93%8D%E4%BD%9C)来控制：

* `MUST_CACHE`：写入 Alluxio worker 内存，性能最好，但 worker 异常会导致数据丢失。适合用来写入临时数据。
* `THROUGH`：直接写入 UFS，性能取决于底层存储。适合用来写入需要持久化，但最近不需要用到的数据。
* `CACHE_THROUGH`：同时写入 Alluxio worker 内存和底层 UFS
* `ASYNC_THROUGH`：先写入 Alluxio worker 内存，再异步提交给 UFS。

可想而知，任何在 Alluxio 中进行的数据写入，都面临着写入性能和一致性之间的取舍。为了达到最理想的性能，用户需要仔细研究写入场景，并为其分配合适的写入策略。显而易见的是，使用 `MUST_CACHE` 或 `ASYNC_THROUGH` 策略一定没有一致性保证，如果写入操作过程中发生故障，其状态是不可预测的。

以上是两个系统在写入一致性方面的对比，至于读数据，Alluxio 会按需从 UFS 加载元数据，并且它在启动时没有关于 UFS 的信息。默认情况下，Alluxio 期望对 UFS 的所有修改都通过 Alluxio 进行。如果直接对 UFS 进行更改，则需要手动或定期在 Alluxio 和 UFS 之间同步元数据，这也容易成为成为不一致的来源。

JuiceFS 则不存在这方面的问题，这是因为 JuiceFS 以元数据服务作为唯一的真实来源（single source of truth），对象存储在这个架构下，只作为数据存储使用，不管理任何元数据。

### 数据压缩 {#data-compression}

JuiceFS 支持使用 [LZ4](https://lz4.github.io/lz4) 或 [Zstandard](https://facebook.github.io/zstd) 来压缩数据。

Alluxio 本质上并不是一个存储系统，虽然你也可以通过 Alluxio 进行数据写入，但[并不支持压缩](https://alluxio.atlassian.net/browse/ALLUXIO-31)。

### 数据加密 {#data-encryption}

Alluxio 仅在[企业版](https://docs.alluxio.io/ee/user/stable/en/security/Security.html#encryption)支持数据加密。

JuiceFS 支持[传输中加密以及静态加密](../../security/encryption.md)。

## 客户端协议对比 {#client-protocol-comparison}

### POSIX

JuiceFS[完全兼容 POSIX](../../reference/posix_compatibility.md)，完整通过用于检验 POSIX 兼容性的 [pjdfstest](https://github.com/pjd/pjdfstest)，并以 99% 以上的成功率通过用于检验 Linux 软件可靠性的 [Linux Test Project](https://github.com/linux-test-project/ltp)，无缝对接已有应用。

除了 pjdfstest 的兼容性测试外，JuiceFS 支持 mmap、fallocate 文件打洞、xattr、BSD 锁（flock）和 POSIX 记录锁（fcntl）。

Alluxio 没有通过 POSIX 兼容性测试。[京东](https://www.slideshare.net/Alluxio/using-alluxio-posix-fuse-api-in-jdcom)的 pjdfstest 测试表明 Alluxio 不支持符号链接、truncate、fallocate、append、xattr、mkfifo、mknod 和 utimes。

### HDFS

二者均兼容 HDFS，包括 Hadoop 2.x 和 Hadoop 3.x，以及 Hadoop 生态系统中的各种组件。详见：

* [JuiceFS Hadoop SDK](../../deployment/hadoop_java_sdk.md)
* [Alluxio 集成 HDFS 作为底层存储](https://docs.alluxio.io/os/user/stable/cn/ufs/HDFS.html)

### S3

JuiceFS 实现了 [S3 网关](../../guide/gateway.md)，因此如果有需要，可以通过 S3 API 直接访问文件系统，也能使用 s3cmd、AWS CLI、MinIO Client（mc）等工具直接管理文件系统。

Alluxio 也支持大部分 S3 API，详见[文档](https://docs.alluxio.io/os/user/stable/cn/api/S3-API.html)。

### Kubernetes CSI Driver

二者均提供 Kubernetes CSI 驱动：

* [JuiceFS CSI Driver](/docs/zh/csi/introduction) 由 Juicedata 团队持续维护
* [Alluxio CSI Driver](https://github.com/Alluxio/alluxio/tree/master-2.x/integration/docker/csi) 由 Alluxio 团队持续维护，相对来说迭代速度较慢。

### WebDAV

JuiceFS 实现了 [WebDAV 服务](../../deployment/webdav.md)，用户可以通过 WebDAV 协议管理文件系统中的数据。

Alluxio 不支持 WebDAV 协议。

## 云上部署和运维 {#deployment-and-operation}

这一小节只讨论两个产品的社区版，两个产品的企业版都能获取技术支持服务，因此不作讨论。

Alluxio 的架构可以分为 3 个组件：master、worker 和客户端。一个典型的集群由一个主节点（master）、多个备用主节点（standby master）、一个作业主节点（job master）、多个备用作业主节点（standby job master）、多个 worker 和 job worker 组成。需要自己部署及运维这些节点，详见[文档](https://docs.alluxio.io/os/user/stable/cn/overview/Getting-Started.html#%E9%83%A8%E7%BD%B2-alluxio)。

JuiceFS 使用 Redis 或者其它流行的数据库作为[元数据引擎](../../reference/how_to_set_up_metadata_engine.md)，大部分公有云服务商都提供这些数据库的全托管服务，你可以直接将其作为 JuiceFS 元数据引擎，没有任何运维负担。


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_cephfs.md
================================================
---
slug: /comparison/juicefs_vs_cephfs
description: Ceph 是一套提供对象存储、块存储和文件存储的统一系统，本文从架构和特性两个维度来对比 JuiceFS 与 Ceph 的异同。
---

# JuiceFS 对比 CephFS

## 共同点

两者都是高可靠，高性能的弹性分布式文件系统，且均有良好的 POSIX 兼容性，在各种文件系统使用场景都可一试。

## 不同点

### 系统架构

两者都采用了数据和元数据分离的架构，但在组件实现上有很大区别。

#### CephFS

是一套完整且独立的系统，倾向于私有云部署；所有数据和元数据都会持久化在 Ceph 自己的存储池（RADOS Pool）中。

- 元数据
  - 服务进程（MDS）：无状态且理论可水平扩展。目前已有成熟的主备机制，但多主部署依然有性能和稳定性隐患；生产环境通常采用一主多备或者多主静态隔离
  - 持久化：独立的 RADOS 存储池，通常采用 SSD 或更高性能的硬件存储
- 数据：一个或多个 RADOS 存储池，支持通过 Layout 指定不同的配置，如分块大小（默认 4 MiB），冗余方式（多副本，EC）等
- 客户端：支持内核客户端（`kcephfs`），用户态客户端（`ceph-fuse`）以及基于 libcephfs 实现的 C++、Python 等 SDK；近来社区也提供了 Windows 客户端（`ceph-dokan`）。同时生态中也有与 Samba 对接的 VFS object 和与 NFS-Ganesha 对接的 FSAL 模块可供考虑。

#### JuiceFS

JuiceFS 主要实现一个 libjfs 库和 FUSE 客户端程序、Java SDK 等，支持对接多种元数据引擎和对象存储，适合在公有云、私有云或混合云环境下部署。

- 元数据：支持多种已有的[数据库实现](../../reference/how_to_set_up_metadata_engine.md)，包括：
  - Redis 及各种兼容 Redis 协议的变种（需要支持事务）；
  - SQL 系列：MySQL，PostgreSQL，SQLite 等；
  - 分布式 K/V 存储：TiKV，FoundationDB，etcd；
  - 自研引擎：用于公有云上的 JuiceFS 全托管服务；
- 数据：支持超过 30 种公有云上的[对象存储](../../reference/how_to_set_up_object_storage.md)，也可以和 MinIO，Ceph RADOS，Ceph RGW 等对接；
- 客户端：支持 Unix 用户态挂载，Windows 挂载，完整兼容 HDFS 语义的 Java SDK，[Python SDK](https://github.com/megvii-research/juicefs-python) 以及内置的 S3 网关。

### 功能特性

|                         | CephFS            | JuiceFS            |
| ----------------------- | ----------------- | ------------------ |
| 文件分块<sup> [1]</sup> | ✓                 | ✓                  |
| 元数据事务              | ✓                 | ✓                  |
| 强一致性                | ✓                 | ✓                  |
| Kubernetes CSI Driver   | ✓                 | ✓                  |
| Hadoop 兼容             | ✓                 | ✓                  |
| 数据压缩<sup> [2]</sup> | ✓                 | ✓                  |
| 数据加密<sup> [3]</sup> | ✓                 | ✓                  |
| 快照                    | ✓                 | ✕                  |
| 客户端数据缓存          | ✕                 | ✓                  |
| Hadoop 数据本地性       | ✕                 | ✓                  |
| S3 兼容                 | ✕                 | ✓                  |
| 配额                    | 目录级配额        | 目录级配额         |
| 开发语言                | C++               | Go                 |
| 开源协议                | LGPLv2.1 & LGPLv3 | Apache License 2.0 |

#### 注 1：文件分块

虽然两者都做了大文件的分块，但在实现原理上有本质区别。CephFS 会将文件按 [`object_size`](https://docs.ceph.com/en/latest/cephfs/file-layouts/#reading-layouts-with-getfattr)（默认为 4MiB）拆分，每个分块对应一个 RADOS object。而 JuiceFS 则将文件先按 64MiB Chunk 拆分，每个 Chunk 在写入时根据实际情况进一步拆分成一个或多个逻辑 Slice，每个 Slice 在写入对象存储时再拆分成默认 4MiB 的 Block，Block 与对象存储中 object 一一对应。在处理覆盖写时，CephFS 需要直接修改对应的 objects，流程较为复杂；尤其是冗余策略为 EC 或者开启数据压缩时，往往需要先读取部分 object 内容，在内存中修改后再写入，这个流程会带来很大的性能开销。而 JuiceFS 在覆盖写时将更新数据作为新 objects 写入并修改元数据即可，性能大幅提升；此外，过程中出现的冗余数据会异步完成垃圾回收。

#### 注 2：数据压缩

严格来讲，CephFS 本身并未提供数据压缩功能，其实际依赖的是 RADOS 层 BlueStore 的压缩。而 JuiceFS 则可以在 Block 上传到对象存储之前就进行一次数据压缩，以减少对象存储中的容量使用。换言之，如果用 JuiceFS 对接 RADOS，是能做到在 Block 进 RADOS 前后各进行一次压缩。另外，就像在**文件分块**中提到的，出于对覆盖写的性能保障，CephFS 一般不会开启 BlueStore 的压缩功能。

#### 注 3：数据加密

Ceph **Messenger v2** 支持网络传输层的数据加密，存储层则与压缩类似，依赖于 OSD 创建时提供的加密功能。JuiceFS 是在上传对象前和下载后执行加解密，在对象存储侧完全透明。


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_glusterfs.md
================================================
---
title: JuiceFS 对比 GlusterFS
slug: /comparison/juicefs_vs_glusterfs
description: 本文对比 JuiceFS 和 GlusterFS 的架构、元数据管理、数据管理、访问协议及扩展功能。
---

[GlusterFS](https://github.com/gluster/glusterfs) 是一款开源的软件定义分布式存储解决方案，能够在单个集群中支持高达 PiB 级别的数据存储。

JuiceFS 是一款专为云端设计的开源、高性能分布式文件系统，以较低的成本提供了大规模、弹性和高性能的存储能力。

本文先通过一份表格简要对比 JuiceFS 和 GlusterFS 的主要特点，然后进行详细探讨。你可以通过下表速查二者的关键特性对比，然后在本文中选取感兴趣的话题详细阅读。

## JuiceFS 和 GlusterFS 对比一览 {#a-quick-summary-of-glusterfs-vs-juicefs}

下表快速概述了 GlusterFS 和 JuiceFS 之间的差异：

| 对比项 | GlusterFS | JuiceFS |
| :--- | :--- | :--- |
| 元数据 | 纯分布式 | 独立数据库服务 |
| 数据存储 | 自主管理 | 依赖对象存储服务 |
| 大文件拆分 | 不拆分 | 拆分 |
| 冗余保护 | 副本、纠删码 | 依赖对象存储服务 |
| 数据压缩 | 部分支持 | 支持 |
| 数据加密 | 部分支持 | 支持 |
| POSIX 兼容性 | 完整 | 完整 |
| NFS 协议 | 不直接支持 | 不直接支持 |
| CIFS 协议 | 不直接支持 | 不直接支持 |
| S3 协议 | 支持（久未更新） | 支持 |
| HDFS 兼容性 | 支持（久未更新） | 支持 |
| CSI 驱动 | 支持 | 支持 |
| POSIX ACLs | 支持 | 支持 |
| 跨域复制 | 支持 | 依赖外部服务 |
| 目录配额 | 支持 | 支持 |
| 快照 | 支持 | 不支持（但支持克隆） |
| 回收站 | 支持 | 支持 |
| 主要维护者 | Red Hat, Inc | Juicedata, Inc |
| 开发语言 | C | Go |
| 开源协议 | GPLv2 and LGPLv3+ | Apache License 2.0 |

## 系统架构对比 {#system-architecture-comparison}

### GlusterFS 的架构 {#glusterfs-architectire}

GlusterFS 采用的是全分布式的架构，没有中心化节点。GlusterFS 集群主要由服务端和客户端两大部分组成。其中服务端负责管理和存储数据，通常被称为可信存储池（Trusted Storage Pool）。这个存储池由一系列对等的 Server 节点组成，一般会运行两类进程：

* glusterd：每个节点一个，负责配置管理和分发等。
* glusterfsd：每个 [Brick](https://docs.gluster.org/en/latest/glossary/#Brick) 一个，负责处理数据请求和对接底层文件系统。

每个 Brick 上的所有文件可以看成是 GlusterFS 的一个子集，就文件内容而言，通过 Brick 直接访问和通过 GlusterFS 客户端访问看到的结果通常是一致的。因此，在 GlusterFS 异常情况下，用户通过整合多个 Bricks 内容就能一定程度上恢复出原有数据。另外在部署时，为了确保某台机器故障时，整个文件系统的访问不受影响，通常会对数据做冗余保护。在 GlusterFS 中，多个 Bricks 会组成一个冗余组，互相之间通过副本或纠删码的方式实现数据保护。当某个节点故障时，只能在冗余组内做恢复，恢复的时间会比较长。在 GlusterFS 集群扩容时，需要以冗余组为单位整体扩容。

客户端是挂载了 GlusterFS 的节点，负责对应用程序展示统一的命名空间。其架构图如下（出自 [Gluster 架构](https://docs.gluster.org/en/latest/Quick-Start-Guide/Architecture)）：

![Gluster 架构](../../images/glusterfs-architecture.jpg)

### JuiceFS 的架构 {#juicefs-architecture}

JuiceFS 采用「数据」与「元数据」分离存储的架构，文件数据本身会被切分保存在对象存储（如 Amazon S3）当中，而元数据则是会被保存在用户自行选择的数据库里（如 Redis、MySQL）。通过共享同一个份数据库与对象存储，JuiceFS 实现了一个强一致性保证的分布式文件系统，同时还具有「POSIX 完全兼容」、「高性能」等诸多特性。更详细的介绍参见[文档](../architecture.md)。

![JuiceFS 架构](../../images/juicefs-arch-new.png)

## 元数据管理对比 {#metadata-management-comparison}

### GlusterFS {#glusterfs}

GlusterFS 元数据是纯分布式的，没有集中的元数据服务。客户端通过对文件名哈希确定其所属的 Brick；当请求需要跨多个 Bricks 访问（如 mv，ls 等）时，由客户端负责协调。这种设计架构上比较简单，但当系统规模扩大时，往往会带来性能瓶颈。比如，ls 一个大目录时可能会需要访问多个 Bricks 来获得完整的结果，其中任何一个的卡顿都会导致整个请求变慢。另外，跨 Bricks 修改操作在途中遇到故障时，元数据一致性也比较难保证。在严重故障时，还可能出现脑裂，需要[手动恢复](https://docs.gluster.org/en/latest/Troubleshooting/resolving-splitbrain)数据到统一版本。

### JuiceFS {#juicefs}

JuiceFS 的元数据存储在一个独立的数据库（称为元数据引擎）中，客户端会将文件元数据操作转换成此数据库的一个事务，借助数据库的事务能力来保证操作的原子性。这种设计使得 JuiceFS 的实现变得简单，但对元数据引擎提出了较高的要求。目前 JuiceFS 支持三大类 10 种事务型数据库，具体可参见[元数据引擎文档](../../reference/how_to_set_up_metadata_engine.md)。

## 数据管理对比 {#data-management-comparison}

GlusterFS 通过整合多个服务端节点的 Bricks（一般构建在本地文件系统之上，如 XFS）来存储数据。因此，它本身提供了一定的数据管理功能，如分布管理、冗余保护、故障切换、静默错误检测等。

JuiceFS 则不直接使用硬盘，而是通过对接各种对象存储来管理数据，大部分特性都依赖于对象存储自身的实现。

### 大文件拆分 {#large-file-splitting}

在分布式系统中，将大文件拆分成多个小块散列存储在不同节点中是一种常见的优化手段。这往往能让应用在访问此文件时有更高的并发度和整体带宽。

* GlusterFS：不拆分（曾有过 Striped Volume 会拆分大文件，现已不再支持）。
* JuiceFS：文件先按大小拆成 64 MiB 的 Chunks，每个 Chunk 再根据写入模式进一步拆成默认 4 MiB 的 Blocks；具体可参见[架构文档](../architecture.md#how-juicefs-store-files)。

### 冗余保护 {#redundancy-protection}

GlusterFS 支持副本（Replicated Volume）和纠删码（Dispersed Volume）两种类型。

JuiceFS 依赖于使用的对象存储。

### 数据压缩 {#data-compression}

GlusterFS：

* 仅支持传输层压缩，文件由客户端执行压缩，传输到服务端后再由 Brick 负责解压缩。
* 不直接实现存储层压缩，而是依赖于 Brick 使用的底层文件系统，如 [ZFS](https://docs.gluster.org/en/latest/Administrator-Guide/Gluster-On-ZFS)。

JuiceFS 同时支持传输层压缩和存储层压缩，数据的压缩和解压缩都在客户端执行。

### 数据加密 {#data-encryption}

GlusterFS：

* 仅支持[传输层加密](https://docs.gluster.org/en/latest/Administrator-Guide/SSL)，依赖于 SSL/TLS。
* 曾支持过[存储层加密](https://github.com/gluster/glusterfs-specs/blob/master/done/GlusterFS%203.5/Disk%20Encryption.md)，但现已不再支持。

JuiceFS 同时支持[传输层加密和存储层加密](../../security/encryption.md)，数据的加密和解密都在客户端进行。

## 访问协议 {#access-protocols}

### POSIX 兼容性 {#posix-compatibility}

[GlusterFS](https://docs.gluster.org/en/latest/glossary) 和 [JuiceFS](../../reference/posix_compatibility.md) 都提供 POSIX 兼容性。

### NFS 协议 {#nfs-protocol}

GlusterFS 曾有内嵌服务来支持 NFSv3，但现已[不再推荐使用](https://github.com/gluster/glusterfs-specs/blob/master/done/GlusterFS%203.8/gluster-nfs-off.md)，而是建议用 NFS server 将挂载点导出。

JuiceFS 不直接支持，需要挂载后[通过其他 NFS server 导出](../../deployment/nfs.md)。

### CIFS 协议 {#cifs-protocol}

GlusterFS 内嵌支持 Windows，Linux Samba client 和 macOS 的 CLI 访问，不支持 macOS Finder。然而，文档中建议用[通过 Samba 将挂载点导出](https://docs.gluster.org/en/latest/Administrator-Guide/Setting-Up-Clients/#testing-mounted-volumes)的方式使用。

JuiceFS 不直接支持，需要挂载后[通过 Samba 导出](../../deployment/samba.md)。

### S3 协议 {#s3-protocol}

GlusterFS 通过 [`gluster-swift`](https://github.com/gluster/gluster-swift) 项目支持，但其最近更新停留在 2017 年 11 月。

JuiceFS 通过 [S3 网关](../../guide/gateway.md)支持。

### HDFS 兼容性 {#hdfs-compatibility}

GlusterFS 通过 [`glusterfs-hadoop`](https://github.com/gluster/glusterfs-hadoop) 项目支持，但其最近更新停留在 2015 年 5 月。

JuiceFS 完整[兼容 HDFS API](../../deployment/hadoop_java_sdk.md)。

### CSI 驱动 {#csi-driver}

GlusterFS 曾[支持过](https://github.com/gluster/gluster-csi-driver)，但最近版本发布于 2018 年 11 月，且仓库已被标记 DEPRECATED。

JuiceFS 支持，具体可参见 [JuiceFS CSI 驱动文档](https://juicefs.com/docs/zh/csi/introduction)。

## 扩展功能 {#extended-features}

### POSIX ACLs {#posix-acls}

Linux 下对文件的访问权限控制一般有三类实体，即文件拥有者（owner）、拥有组（group）和其他（other）。当我们有更复杂的需求，比如要给本属于 other 的某个特定用户单独赋予权限时，这套机制就做不到了。POSIX Access Control Lists (ACLs) 提供增强的权限管理功能，可用来为任意用户/用户组指定权限。

GlusterFS [支持](https://docs.gluster.org/en/main/Administrator-Guide/Access-Control-Lists)，且支持 access ACLs 和 default ACLs。

JuiceFS 从 v1.2 版本开始支持 [POSIX ACLs](../../security/posix_acl.md) 特性。

### 跨域复制 {#cross-cluster-replication}

跨域复制是指在两套独立的集群间进行数据复制，一般被用来实现异地灾备。

GlusterFS [支持单向的异步增量复制](https://docs.gluster.org/en/main/Administrator-Guide/Geo-Replication)，但需要两边是同版本的 Gluster 集群。

JuiceFS 依赖元数据引擎和对象存储自身的复制能力，可以做单向复制。

### 目录配额 {#directory-quotas}

[GlusterFS](https://docs.gluster.org/en/main/Administrator-Guide/Directory-Quota) 和 [JuiceFS](../../guide/quota.md#directory-quota) 都支持目录配额，包括容量和/或文件数限制。

### 快照 {#snapshots}

GlusterFS 仅[支持存储卷级别的快照](https://docs.gluster.org/en/main/Administrator-Guide/Managing-Snapshots)，而且需要所有 Bricks 部署在 LVM 精简卷（Thinly-Provisioned LVM）上。

JuiceFS 不支持快照，但支持[目录级别的克隆](../../guide/clone.md)。

### 回收站 {#trash}

GlusterFS [支持](https://docs.gluster.org/en/main/Administrator-Guide/Trash)，且默认关闭。

JuiceFS [支持](../../security/trash.md)，且默认打开。


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_lustre.md
================================================
---
slug: /comparison/juicefs_vs_lustre
description: 本文对比了 Lustre 和 JuiceFS 在架构设计、文件分布和功能特性方面的差异。
---

# JuiceFS 对比 Lustre

Lustre 是一款专为高性能计算（HPC）环境设计的并行分布式文件系统，最初在美国政府资助下，由多个国家实验室联合开发，旨在支持大规模科学研究和工程计算任务。当前，Lustre 的主要开发与维护由 DDN（DataDirect Networks）负责，广泛应用于超算中心、科研机构及企业级 HPC 集群中。

JuiceFS 是一个云原生分布式文件系统，其数据存储在对象存储中。社区版可与多种元数据服务集成，适用场景广泛，于 2021 年在 GitHub 开源。企业版专为高性能场景设计，广泛应用于大规模 AI 任务，涵盖生成式 AI、自动驾驶、量化金融和生物科技等。

本文从架构设计、文件分布和功能特性等方面对 Lustre 和 JuiceFS 进行全面对比。

## 架构对比

### Lustre

Lustre 采用传统的客户端 - 服务器架构，由以下几个核心模块组成：

- **元数据服务器（MDS）**：负责处理命名空间相关操作，如文件创建、删除、权限检查等。自 2.4 版本起引入了分布式命名空间（DNE）功能，支持将单个文件系统的不同目录分布在多个元数据服务器上，实现元数据访问负载的横向扩展。
- **对象存储服务器（OSS）**：负责实际的数据读写，提供高性能的大规模 I/O 服务。
- **管理服务器（MGS）**：作为全局配置注册中心，负责存储和分发 Lustre 文件系统的配置信息。MGS 在功能上独立于具体的 Lustre 实例。
- **客户端（Client）**：为用户应用程序提供访问 Lustre 文件系统的接口，实现标准的 POSIX 文件操作语义。

各组件通过 Lustre 专用的网络协议 LNet 连接，构成一个统一高效的文件系统整体。

![Lustre architecture](https://static1.juicefs.com/images/Lustre_JiaGouTu_SWMlRaK.original.png)

### JuiceFS

JuiceFS 采用模块化架构，包括三个核心组件：

- **元数据引擎**：用于存储文件元数据，包括常规文件系统的元数据和文件数据的索引。社区版支持 Redis、TiKV、MySQL、PostgreSQL、FoundationDB 等多种数据库。企业版使用自研高性能元数据服务。
- **数据存储**：一般是对象存储服务，可以是公有云的对象存储也可以是私有部署的对象存储服务。支持 30 多种对象存储，包括 AWS S3、Azure Blob、Google Cloud Storage、MinIO、Ceph RADOS 等。
- **客户端**：提供 POSIX（FUSE）、Hadoop SDK、CSI Driver、S3 网关、Python SDK 等不同的接入方式。

![JuiceFS Community Edition architecture](../../images/juicefs-arch.svg)

### 架构差异

#### 客户端实现

Lustre 采用 C 语言实现，其客户端模块运行在内核态；而 JuiceFS 使用 Go 语言开发，客户端通过 FUSE（Filesystem in Userspace）暴露文件系统接口，运行在用户态。由于 Lustre 客户端运行于内核空间，访问元数据服务器（MDS）或对象存储服务器（OSS）时无需进行用户态与内核态的上下文切换或额外的内存拷贝，从而显著减少了系统调用所带来的性能开销，在吞吐和延迟方面具备一定优势。

然而，内核态实现也带来了运维和调试的复杂性。相比用户态的开发环境和调试工具，内核态工具门槛更高，不易为普通开发者所掌握。同时，与 C 语言相比，Go 语言更易于学习、维护和开发，具备更高的开发效率和可维护性。

#### 存储模块

Lustre 在部署时通常需要配置一块或多块共享磁盘来存储文件数据。这一设计源于其早期版本尚不支持文件级冗余（File Level Redundancy，FLR）。为了实现高可用性（HA），当某个节点下线时，必须将其文件系统挂载到对等节点，否则该节点上的数据块将不可访问。因此，数据的可靠性需依赖于共享存储本身的高可用机制，或用户自行配置的软件 RAID 实现。

JuiceFS 利用对象存储作为数据存储解决方案，从而可享有对象存储带来的若干优势，如数据可靠性、一致性等。用户可以根据自己的需求对接具体的存储系统，既包括主流云厂商的对象存储，也支持如 MinIO、Ceph RADOS 等私有部署的对象存储系统。社区版 JuiceFS 提供本地缓存来应对 AI 场景下的带宽需求，企业版使用分布式缓存满足更大的聚合读带宽的需求。

#### 元数据模块

Lustre 的 MDS 高可用性依赖于软硬件协同实现：

- **硬件层面**：MDS 使用的磁盘需配置 RAID，以避免因单点磁盘故障导致服务不可用；磁盘也需具备共享能力，以便当主节点宕机时，备节点能接管磁盘资源。
- **软件层面**：使用 Pacemaker 与 Corosync 构建高可用集群，确保任一时刻仅有一个 MDS 实例处于活动状态。

JuiceFS 社区版的元数据模块提供一组操作元数据的接口，可以接入不同的元数据服务，包括 Redis、TiKV、MySQL、PostgreSQL、FoundationDB 等不同类型的数据库。JuiceFS 企业版使用自研高性能元数据服务，可根据负载情况来平衡数据和热点操作，以避免大规模训练中元数据服务热点集中在某些节点的问题。

## 文件分布对比

### Lustre 文件分布

#### Normal File Layout (NFL)

Lustre 早期采用的文件分布方式被称为 Normal File Layout。在该模式下，文件被切分为多个数据块，并分别存储在多个对象存储目标（OSTs）上，其策略类似于 RAID 0。

文件分布策略主要由以下两个参数控制：

- **Stripe Count**：指定文件可以同时分布到多少个 OST 上。该值越大，文件并行访问能力越强，但也可能带来额外的调度和管理开销。
- **Stripe Size**：定义在切换到下一个 OST 之前，每个数据块的大小。也就是说，写入达到设定的 Stripe Size 后，数据将被写入下一个 OST，这也决定了每个 Chunk 的粒度。

![Lustre NFL file distribution](https://static1.juicefs.com/images/Lustre_NFL_WenJianFenBuShiLi.original.png)

上图展示了一个 Stripe Count 为 3、Stripe Size 为 1 MB 的文件在多个 OST 上的分布方式。每个数据块（Stripe）采用轮询（Round-Robin）方式依次分布到不同的 OST 上。

主要限制包括：

- 一旦文件创建，配置参数不可变
- 如果任何目标 OST 空间耗尽，可能导致 ENOSPC（空间不足）错误
- 随时间推移可能导致存储不均衡

#### Progressive File Layout (PFL)

为了解决 NFL 在应对动态数据增长和资源分配方面存在的局限，Lustre 引入了一种新的文件分布机制，称为 Progressive File Layout (PFL)。

![Lustre PFL file distribution](https://static1.juicefs.com/images/Lustre_PFL_WenJianFenBuShiLi.original.png)

PFL 支持为同一个文件的不同区段定义不同的布局策略，具备以下优势：

- 动态适应文件增长
- 减缓磁盘不均衡问题
- 提高空间利用率和灵活性

虽然 PFL 引入了更具弹性的布局策略，但 Lustre 进一步结合 Lazy Initialization 技术，以实现更高效的资源调度。

#### File Level Redundancy (FLR)

Lustre 引入了文件级冗余来简化 HA 架构并提升系统容错能力。FLR 允许为每个文件配置一个或多个副本，实现文件级别的冗余保护。在写入操作发生时，数据仅写入其中一个副本，其余副本会被标记为 STALE（过期）。随后，系统通过一个称为 Resync 的同步过程，确保数据一致性。

### JuiceFS 文件分布

JuiceFS 按照 Chunk、Slice、Block 的规则进行数据块管理。每个 Chunk 的大小固定为 64M，主要用于优化数据的查找和定位。实际的文件写入操作则在 Slice 上执行，每个 Slice 代表一次连续的写入过程，属于特定的 Chunk，并且不会跨越 Chunk 的边界，因此长度不超过 64M。Block（默认大小为 4M）则是物理存储的基本单位，用于在对象存储和磁盘缓存中实现数据的最终存储。

![JuiceFS file distribution](../../images/file-and-chunks.svg)

JuiceFS 中的 Slice 是在其他文件系统中不常见的一个结构。主要功能是记录文件的写入操作，并在对象存储中进行持久化。对象存储不支持原地文件修改，因此，JuiceFS 通过引入 Slice 结构允许更新文件内容，而无需重写整个文件。当修改文件时，系统会创建新的 Slice，并在该 Slice 上传完毕后更新元数据，从而将文件内容指向新的 Slice。

JuiceFS 的所有 Slice 均为一次性写入，这减少了对底层对象存储一致性的依赖，并大大简化了缓存系统的复杂度，使数据一致性更易于保证。

## 功能特性对比

| 功能特性 | Lustre | JuiceFS 社区版 | JuiceFS 企业版 |
|----------|--------|---------------|---------------|
| 元数据 | 分布式元数据服务 | 独立数据库服务 | 自研高性能分布式元数据引擎（可横向扩展） |
| 元数据冗余保护 | 需要存储设备提供 | 取决于使用的数据库 | 三副本 |
| 数据存储 | 自主管理 | 使用对象存储 | 使用对象存储 |
| 数据冗余保护 | 存储设备提供或异步复制 | 对象存储提供 | 对象存储提供 |
| 数据缓存 | 客户端本地缓存 | 客户端本地缓存 | 自研高性能多副本分布式缓存 |
| 数据加密 | 支持 | 支持 | 支持 |
| 数据压缩 | 支持 | 支持 | 支持 |
| 配额管理 | 支持 | 支持 | 支持 |
| 网络协议 | 支持多种网络协议 | TCP | TCP |
| 快照 | 文件系统级别快照 | 文件级别快照 | 文件级别快照 |
| POSIX ACL | 支持 | 支持 | 支持 |
| POSIX 兼容性 | 兼容 | 完全兼容 | 完全兼容 |
| CSI 驱动 | 非官方支持 | 支持 | 支持 |
| 客户端 | POSIX | POSIX（FUSE）、Java SDK、S3 网关、Python SDK | POSIX（FUSE）、Java SDK、S3 网关、Python SDK |
| 多云镜像 | 不支持 | 不支持 | 支持 |
| 跨云和跨区数据复制 | 不支持 | 不支持 | 支持 |
| 主要维护者 | DDN | Juicedata | Juicedata |
| 开发语言 | C | Go | Go |
| 开源协议 | GPL 2.0 | Apache License 2.0 | 商业软件 |

## 小结

Lustre 是一款高性能并行分布式文件系统，客户端运行于内核态，直接与元数据服务器（MDS）和对象存储服务器（OSS）交互，避免了用户态与内核态之间的上下文切换。结合高性能存储设备，Lustre 在高带宽 I/O 场景下展现出卓越的性能。

然而，由于客户端运行在内核态，这使得运维过程更具挑战性，运维团队需具备深入的内核调试经验和底层系统故障排查能力。此外，由于 Lustre 使用固定容量的存储方案，文件分布设计相对复杂，需要精细的规划与配置来实现资源的高效利用。因此，Lustre 的部署和运维门槛较高。

JuiceFS 是一款云原生、用户态分布式文件系统，紧密集成对象存储，并原生支持 Kubernetes CSI，从而简化了在云平台上的部署和运维。用户无需深入关注底层存储设备和复杂的存储调度机制，即可在容器化环境中实现弹性扩展、高可用数据服务。在性能方面，JuiceFS 企业版通过分布式缓存，有效降低对象存储的访问延迟，提升文件操作的响应速度。

从成本角度看，Lustre 需要依赖高性能的专用存储设备，初始投资和长期维护成本较高。对象存储则更加经济，具备天然的可扩展性以及按需付费的灵活性。

两个系统各有优势：Lustre 在传统 HPC 环境中追求极致性能方面表现卓越，而 JuiceFS 在云原生和 AI 工作负载方面提供了更好的灵活性、更容易的管理和更高的性价比。


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_s3fs.md
================================================
---
slug: /comparison/juicefs_vs_s3fs
---

# JuiceFS 对比 S3FS

[S3FS](https://github.com/s3fs-fuse/s3fs-fuse) 是一个 C++ 开发的开源工具，可以将 S3 对象存储通过 FUSE 挂载到本地，像本地磁盘一样进行读写访问。除了 Amazon S3，它还支持所有兼容 S3 API 的对象存储。

在基本功能方面，S3FS 与 JuiceFS 都能通过 FUSE 将对象存储 Bucket 挂载到本地并以 POSIX 接口使用。但在功能细节和技术实现上，二者有着本质的不同。

## 产品定位

S3FS 是一种实用工具，可以方便地将对象存储 Bucket 挂载到本地，以用户熟悉的方式进行读写，面向那些对性能和网络延迟不敏感的一般使用场景。

JuiceFS 是分布式文件系统，具有独特的数据管理方式以及一系列针对高性能、可靠性和安全性等方面的技术优化，主要解决海量数据的存储需求。

## 系统架构

S3FS 没有针对文件做特别的优化处理，它就像一个本地与对象存储之间的访问通道，本地挂载点看到的内容与对象存储浏览器上看到的一致，这样可以很方便地实现在本地使用云端存储。但从另一个角来看，正是因为这种简单的架构，使得 S3FS 对文件的检索和读写都需要与对象存储直接交互，网络延迟对性能和用户体验都会有较大的影响。

JuiceFS 采用数据和元数据分离的技术架构，任何文件都会先按照特定规则拆分成数据块再上传到对象存储，相应的元数据会存储在独立的数据库中。这样带来的好处是对文件的检索以及文件名等元信息的修改可以直接与响应速度更快的数据库交互，避开了与对象存储交互的网络延迟影响。

另外，在大文件的处理方面，虽然 S3FS 可以通过分块上传解决大文件的传输问题，但对象存储的特性决定了追加和改写文件需要重写整个对象。对于几十几百 GB 甚至 TB 级的大文件来说，重复上传势必会浪费大量的时间和带宽资源。

JuiceFS 则规避了此类问题，不论单个文件尺寸多大，在上传之前都会预先在本地按照特定规则拆分成数据块（默认 4MiB）。对任何文件的改写和追加最终都会变成生成新的数据块，而不是修改已生成的数据块，大大减少了时间和带宽资源的浪费。

有关 JuiceFS 的详细架构介绍请参考[文档](../../introduction/architecture.md)。

## 缓存机制

S3FS 支持磁盘缓存，但默认不启用。可以通过 `-o use_cache` 指定一个缓存路径来启用本地缓存。启用缓存后，任何文件的读写都会先写入缓存，然后再执行操作。S3FS 通过 MD5 来检测数据变化，确保数据正确性，同时降低文件的重复下载。由于 S3FS 涉及的所有操作都需要与 S3 交互，因此是否启用缓存对其应用体验有显著的影响。

S3FS 默认不限制缓存空间上限，对于较大的 Buket 可能导致缓存把磁盘写满，需要通过 `-o ensure_diskfree` 定义为磁盘保留的空间。另外，S3FS 没有缓存过期和清理机制，用户需要定期手动清理缓存，一旦缓存空间被存满，未缓存文件操作则需要直接与对象存储交互，处理大规模文件会有一定影响。

在缓存方面，JuiceFS 与 S3FS 完全不同，首先，JuiceFS 是保证数据一致性的。其次，JuiceFS 默认定义了 100GiB 的磁盘缓存使用上限，用户可以根据需要自由调整该值，而且默认会确保磁盘剩余空间低于 10% 时不再使用更多空间。当缓存用量达到上限，JuiceFS 会采用类似 LRU 的算法自动进行清理，确保后续的读写操作始终有缓存可用。

有关 JuiceFS 缓存的更多内容请参考[文档](../../guide/cache.md)。

## 功能特性

|                | S3FS                             | JuiceFS                                 |
|----------------|----------------------------------|-----------------------------------------|
| 数据存储       | S3                               | S3、其他对象存储、WebDAV、本地磁盘      |
| 元数据存储     | 无                               | 独立数据库                              |
| 系统           | Linux、macOS                     | Linux、macOS、Windows                   |
| 访问接口       | POSIX                            | POSIX、HDFS API、S3 Gateway、CSI Driver |
| POSIX 兼容     | 部分兼容                         | 完全兼容                                |
| 共享挂载       | 支持但不保证数据的完整性和一致性 | 保证强一致性                            |
| 本地缓存       | ✓                                | ✓                                       |
| 符号链接       | ✓                                | ✓                                       |
| 标准 Unix 权限 | ✓                                | ✓                                       |
| 强一致性       | ✕                                | ✓                                       |
| 扩展属性       | ✕                                | ✓                                       |
| 硬链接         | ✕                                | ✓                                       |
| 文件分块       | ✕                                | ✓                                       |
| 原子操作       | ✕                                | ✓                                       |
| 数据压缩       | ✕                                | ✓                                       |
| 客户端加密     | ✕                                | ✓                                       |
| 开发语言       | C++                              | Go                                      |
| 开源协议       | GPL v2.0                         | Apache License 2.0                      |

## 补充说明

[OSSFS](https://github.com/aliyun/ossfs)、[COSFS](https://github.com/tencentyun/cosfs)、[OBSFS](https://github.com/huaweicloud/huaweicloud-obs-obsfs) 等都是基于 S3FS 开发的衍生品，功能特性和用法与 S3FS 基本一致。


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_s3ql.md
================================================
---
slug: /comparison/juicefs_vs_s3ql
---

# JuiceFS 对比 S3QL

与 JuiceFS 类似，S3QL 也是一款由对象存储和数据库组合驱动的开源网络文件系统，所有存入的数据会被分块后存储到亚马逊 S3、Backblaze B2、OpenStack Swift 等主流的对象存储中，相应的元数据会存储在数据库中。

## 共同点

- 都是通过 FUSE 模块实现对标准 POSIX 文件系统接口的支持，从而可以将海量的云端存储挂载到本地，像本地存储一样使用。
- 都能提供标准的文件系统功能：硬链接、符号链接、扩展属性、文件权限。
- 都支持数据压缩和加密，但二者采用的算法各有不同。
- 都支持元数据库备份，S3QL 自动备份 SQLite 数据库到对象存储。JuiceFS 每小时自动将元数据导出为 JSON 格式文件并备份到对象存储，便于恢复以及在各种元数据引擎间迁移。

## 不同点

- S3QL 仅支持 SQLite 一种数据库，而 JuiceFS 除了支持 SQLite 以外还支持 Redis、TiKV、MySQL、PostgreSQL 等数据库。
- S3QL 没有分布式能力，**不支持**多主机同时挂载。JuiceFS 是典型的分布式文件系统，在使用基于网络的数据库时，支持多主机分布式挂载读写。
- S3QL 在一个数据块几秒内未被访问时将其上传到对象存储。文件被关闭甚者 fsync 后其仍仅保证在系统内存中，节点故障时可能丢失数据。JuiceFS 确保了数据的高可靠性，在文件关闭时会将其同步上传到对象存储。
- S3QL 提供数据去重，相同数据只存储一份，可以降低对象存储的用量，但也会加重系统的性能开销。相比之下，JuiceFS 更注重性能，对大规模数据去重代价过高，暂不提供该功能。

|                       | **S3QL**           | **JuiceFS**                |
| :-------------------- | :----------------- | :------------------------- |
| 项目状态              | 活跃维护              | 活跃开发                    |
| 元数据引擎            | SQLite             | Redis、MySQL、SQLite、TiKV |
| 存储引擎              | 对象存储、本地磁盘 | 对象存储、WebDAV、本地磁盘 |
| 操作系统              | Unix-like          | Linux、macOS、Windows      |
| 压缩算法              | LZMA, bzip2, gzip  | LZ4, zstd                  |
| 加密算法              | AES-256            | AES-GCM, RSA               |
| POSIX 兼容            | ✓                  | ✓                          |
| 硬链接                | ✓                  | ✓                          |
| 符号链接              | ✓                  | ✓                          |
| 扩展属性              | ✓                  | ✓                          |
| 标准 Unix 权限        | ✓                  | ✓                          |
| 数据分块              | ✓                  | ✓                          |
| 本地缓存              | ✓                  | ✓                          |
| 空间弹性伸缩          | ✓                  | ✓                          |
| 元数据备份            | ✓                  | ✓                          |
| 数据去重              | ✓                  | ✕                          |
| 只读目录              | ✓                  | ✕                          |
| 快照                  | ✓                  | ✕                          |
| 共享挂载              | ✕                  | ✓                          |
| Hadoop SDK            | ✕                  | ✓                          |
| Kubernetes CSI Driver | ✕                  | ✓                          |
| S3 网关               | ✕                  | ✓                          |
| 开发语言              | Python             | Go                         |
| 开源协议              | GPLv3              | Apache License 2.0                     |
| 开源时间              | 2011               | 2021.1                     |

## 易用性

这部分主要评估两个产品在安装和使用上的的易用程度。

### 安装

在安装过程中，我们使用 Rocky Linux 8.4 操作系统（内核版本 4.18.0-305.12.1.el8_4.x86_64）。

#### S3QL

S3QL 采用 Python 开发，在安装时需要依赖 `python-devel` 3.7 及以上版本。另外，还需要至少满足以下依赖：`fuse3-devel`、`gcc`、`pyfuse3`、`sqlite-devel`、`cryptography`、`defusedxml`、`apsw`、`dugong`。另外，需要特别注意 Python 的包依赖和位置问题。

S3QL 会在系统中安装 12 个二进制程序，每个程序都提供一个独立的功能，如下图。

![S3QL-bin](../../images/s3ql-bin.jpg)

#### JuiceFS

JuiceFS 客户端采用 Go 语言开发，直接下载预编译的二进制文件即可直接使用。JuiceFS 客户端只有一个二进制程序 `juicefs`，将其拷贝到系统的任何一个可执行路径下即可，比如：`/usr/local/bin`。

### 使用

S3QL 和 JuiceFS 都使用数据库保存元数据，S3QL 仅支持 SQLite 数据库，JuiceFS 支持 Redis、TiKV、MySQL、MariaDB、PostgreSQL 和 SQLite 等数据库。

这里使用本地创建的 MinIO 对象存储，使用两款工具分别创建文件系统：

#### S3QL

S3QL 使用 `mkfs.s3ql` 工具创建文件系统：

```shell
mkfs.s3ql --plain --backend-options no-ssl -L s3ql s3c://127.0.0.1:9000/s3ql/
```

挂载文件系统使用 `mount.s3ql`：

```shell
mount.s3ql --compress none --backend-options no-ssl s3c://127.0.0.1:9000/s3ql/ mnt-s3ql
```

S3QL 在创建和挂载文件系统时都需要通过命令行交互式的提供对象存储 API 的访问密钥。

#### JuiceFS

JuiceFS 使用 `format` 子命令创建文件系统：

```shell
juicefs format --storage minio \
    --bucket http://127.0.0.1:9000/myjfs \
    --access-key minioadmin \
    --secret-key minioadmin \
    sqlite3://myjfs.db \
    myjfs
```

挂载文件系统使用 `mount` 子命令：

```shell
sudo juicefs mount -d sqlite3://myjfs.db mnt-juicefs
```

JuiceFS 只在创建文件系统时设置对象存储 API 访问密钥，相关信息会写入元数据引擎，之后挂载使用无需重复提供对象存储地址、密钥等信息。

## 对比总结

**S3QL** 采用对象存储 + SQLite 的存储结构，数据分块存储既能提高文件的读写效率，也能降低文件修改时的资源开销。贴心的提供了快照、数据去重、数据保持等高级功能，加之默认的数据压缩和数据加密，让 S3QL 非常适合个人在云存储上用较低的成本、更安全的存储文件。

**JuiceFS** 支持对象存储、HDFS、WebDAV、本地磁盘作为数据存储引擎，支持 Redis、TiKV、MySQL、MariaDB、PostgreSQL、SQLite 等流行的数据作为元数据存储引擎。除了通过 FUSE 提供标准的 POSIX 文件系统接口以外，JuiceFS 还提供 Java API，可以直接替代 HDFS 为 Hadoop 提供存储。同时还提供 [Kubernetes CSI Driver](https://github.com/juicedata/juicefs-csi-driver)，可以作为 Kubernetes 的存储层做数据持久化存储。JuiceFS 是为企业级分布式数据存储场景设计的文件系统，广泛应用于大数据分析、机器学习、容器共享存储、数据共享及备份等多种场景。


================================================
FILE: docs/zh_cn/introduction/comparison/juicefs_vs_seaweedfs.md
================================================
---
title: JuiceFS 对比 SeaweedFS
slug: /comparison/juicefs_vs_seaweedfs
description: 本文对比 JuiceFS 和 SeaweedFS 的架构、存储机制、客户端协议及其他高级功能。
---

[SeaweedFS](https://github.com/seaweedfs/seaweedfs) 与 [JuiceFS](https://github.com/juicedata/juicefs) 皆是开源的高性能分布式文件存储系统，但二者存在诸多设计区别与功能差异，本章将会详述他们的区别和各自适用场景，帮助你的团队进行技术选型。

SeaweedFS 和 JuiceFS 都采用了对商用更友好的 Apache License 2.0，但 JuiceFS 分为[社区版](https://juicefs.com/docs/zh/community/introduction)和[企业版](https://juicefs.com/zh-cn/blog/solutions/juicefs-enterprise-edition-features-vs-community-edition)，企业版提供多种交付形式，例如私有部署和[云服务](https://juicefs.com/docs/zh/cloud)。JuiceFS 企业版使用自研的闭源元数据引擎，其客户端则与[社区版](https://github.com/juicedata/juicefs)大量共享代码。你可以通过下表速查两者的关键特性对比，然后在本文中选取感兴趣的话题详细阅读。

## JuiceFS 和 SeaweedFS 对比一览

| 对比项 | SeaweedFS | JuiceFS |
| :--- | :--- | :--- |
| 元数据引擎 | 支持多种数据库 | 社区版支持多种数据库、企业版使用自研高性能元数据引擎 |
| 元数据操作原子性 | 未保证 | 社区版通过数据库事务保证、企业版元数据引擎自身保证操作原子性 |
| 变更日志 | 支持 | 仅企业版支持 |
| 数据存储 | 自包含 | 依赖对象存储 |
| 纠删码 | 支持 | 依赖对象存储 |
| 数据合并 | 支持 | 依赖对象存储 |
| 文件拆分 | 8MB | 64MB 逻辑块 + 4MB 物理存储块 |
| 分层存储 | 支持 | 依赖对象存储 |
| 数据压缩 | 支持（基于扩展名） | 支持（全局设置） |
| 存储加密 | 支持 | 支持 |
| POSIX 兼容性 | 基本 | 完整 |
| S3 协议 | 基本 | 基本 |
| WebDAV 协议 | 支持 | 支持 |
| HDFS 兼容性 | 基本 | 完整 |
| CSI 驱动 | 支持 | 支持 |
| 客户端缓存 | 支持 | 支持 |
| 集群数据复制 | 支持单向、双向复制模式 | 仅企业版支持单向复制 |
| 云上数据缓存 | 支持（手动同步） | 仅企业版支持 |
| 回收站 | 不支持 | 支持 |
| 运维与监控 | 支持 | 支持 |
| 发布时间 | 2015.4 | 2021.1 |
| 主要维护者 | 个人（Chris Lu） | 公司（Juicedata Inc） |
| 语言 | Go | Go |
| 开源协议 | Apache License 2.0 | Apache License 2.0 |

## SeaweedFS 技术架构

系统由 3 部分组成：

- **Volume Server**，用于底层存储文件
- **Master Server**，用于管理集群
- **Filer**，一个向上提供更多特性的可选组件

![SeaweedFS 系统架构](../../images/seaweedfs_arch_intro.png)

Volume Server 与 Master Server 一并构成文件存储服务：

- Volume Server 专注于数据的写入与读取
- Master Server 负责管理集群与 Volumes

在读写数据时，SeaweedFS 的实现与 Haystack 相似，用户创建的文件系统（Volume）实际上是一个大磁盘文件，也就是下图的 Superblock。在此 Volume 中，用户写入的所有文件都会被合并到该大磁盘文件中，借用 Haystack 的术语，每一个文件都是“一根针”，needle。

![SeaweedFS Superblock](../../images/seaweedfs_superblock.png)

SeaweedFS 中数据写入和读取流程：

1. 在开始写入数据之前，客户端向 Master Server 发起写入申请。
2. SeaweedFS 根据当前的数据量返回一个 File ID，这个 ID 由 \<volume id, file key, file cookie\> 三部分构成。在写入的过程中，一并被写入的还有基础的元数据信息（文件长度与 Chunk 等信息）。
3. 当写入完成之后，调用者需要在一个外部系统（例如 MySQL）中对该文件与返回的 File ID 进行关联保存。
4. 在读取数据时，由于 Volume 的索引信息已被加载入内存，可以通过 File ID 直接获取文件位置（偏移）的所有信息，因此可以高效地将文件的内容读取出来。

在上述的底层存储服务之上，SeaweedFS 提供了一个名为 Filer 的组件，他对接 Volume Server 与 Master Server，对外提供丰富的功能与特性，如 POSIX 支持、WebDAV、S3 API。与 JuiceFS 相同，Filer 也需要对接一个外部数据库以保存元数据信息。

## JuiceFS 技术架构

JuiceFS 采用元数据与数据分离存储的架构：

- 文件数据本身会被切分保存在对象存储（如 S3）当中
- 元数据被保存在元数据引擎中，元数据引擎是一个由用户自行选择数据库，如 Redis、MySQL。

客户端连接元数据引擎获取元数据服务，然后将实际数据写入对象存储，实现强一致性分布式文件系统。

![JuiceFS Arch](../../images/juicefs-arch-new.png)

JuiceFS 的架构在[「技术架构」](../architecture.md)有更详细的介绍。

## 架构对比

### 元数据

SeaweedFS 与 JuiceFS 都支持通过外部数据库以存储文件系统的元数据信息：

- SeaweedFS 支持多达 [24](https://github.com/seaweedfs/seaweedfs/wiki/Filer-Stores) 种数据库。
- JuiceFS 对数据库事务能力要求更高（下方会详细介绍），当前支持了 [3 类共 10 种事务型数据库](../../reference/how_to_set_up_metadata_engine.md)。

### 原子性操作

* JuiceFS 严格确保每一项操作的原子性，因此对于元数据引擎（例如 Redis、MySQL）的事务能力有着较强的要求，因此支持的数据库更少。
* SeaweedFS 则对操作的原子性保证较弱，目前而言 SeaweedFS 仅在执行 `rename` 操作时启用了部分数据库（SQL、ArangoDB 和 TiKV）的事务，因此对于数据库的事务能力要求较低。同时，由于 SeaweedFS 在 `rename` 操作中拷贝元数据时，未对原目录或文件进行加锁，高负载下可能造成数据丢失。

### 变更日志以及相关功能

SeaweedFS 会为所有的元数据操作生成变更日志（changelog），日志可以被传输、重放，保证数据安全的同时，还能用来实现文件系统数据复制、操作审计等功能。

SeaweedFS 支持在多个集群之间进行文件系统数据复制，存在两种异步数据复制模式：

- 「Active-Active」：此模式中，两个集群都能够参与文件写入并双向同步。如果集群节点数量超过 2，SeaweedFS 的一些操作（如重命名目录）会受到一些限制。
- 「Active-Passive」：此模式是主从关系，Passive 一方只读。

这两种模式都是通过传递 changelog 再应用的机制实现了不同集群数据间的一致性，对于每一条 changelog，其中会有一个签名信息以保证同一个修改不会被循环多次。

JuiceFS 社区版没有实现变更日志，但可以自行使用元数据引擎和对象存储自身的数据复制能力实现文件系统镜像功能，比方说 [MySQL](https://dev.mysql.com/doc/refman/8.0/en/replication.html) 或 [Redis](https://redis.io/docs/management/replication) 仅支持数据复制，配合上 [S3 的复制对象功能](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/userguide/replication.html)，就能脱离 JuiceFS 实现类似 SeaweedFS 的 Active-Passive 模式。

顺带一提，JuiceFS 企业版的元数据引擎也是基于变更日志实现，支持[数据复制](https://juicefs.com/docs/zh/cloud/guide/replication)、[镜像文件](https://juicefs.com/docs/zh/cloud/guide/mirror)系统，可以点击对应文档链接以了解更多。

## 存储对比

如前文所述，SeaweedFS 的数据存储由 Volume Server + Master Server 实现，支持小数据块的合并存储、纠删码等特性。而 JuiceFS 的数据存储则是依托于对象存储服务服务，相关的特性也都由对象存储提供。

### 文件拆分

SeaweedFS 与 JuiceFS 都会将文件拆分成若干个小块再持久化到底层的数据系统中：

- SeaweedFS 将文件拆分成 8MB 的块，对于超大文件（超过 8GB），它会将 Chunk 索引也保存到底层的数据系统中。
- JuiceFS 内部会使用 64MB 的逻辑数据块（Chunk），再拆成 4MB 的 Block 上传至对象存储，这点在[架构文档](../architecture.md#how-juicefs-store-files)中有更详细的介绍。

### 分层存储

对于新创建的 Volume，SeaweedFS 会把数据存储在本地，而对于较旧的 Volume，SeaweedFS 支持将他们上传至云端以达到[冷热数据的分离](https://github.com/seaweedfs/seaweedfs/wiki/Tiered-Storage)。JuiceFS 自身并没有实现分层存储的功能，而是直接使用对象存储提供的分层管理服务，比如 [S3 存储类](https://aws.amazon.com/cn/s3/storage-classes/glacier/?nc1=h_ls)。

### 数据压缩

JuiceFS 支持使用 LZ4 或者 Zstandard 来为所有写入的数据进行压缩，而 SeaweedFS 则是根据写入文件的扩展名、文件类型等信息来选择是否进行压缩。

### 加密

二者均支持加密，包括传输中加密及静态加密：

* SeaweedFS 支持传输中加密与静态加密。在开启了数据加密后，所有写入 Volume Server 的数据都会使用随机的密钥进行加密，而这些对应的随机密钥信息则由维护文件元数据的 Filer 进行管理，详见 [Wiki](https://github.com/seaweedfs/seaweedfs/wiki/Filer-Data-Encryption)。
* JuiceFS 的加密功能详见[文档](../../security/encryption.md)。

## 客户端协议对比

### POSIX

JuiceFS [完全兼容 POSIX](../../reference/posix_compatibility.md)，而 SeaweedFS 目前[实现了部分的 POSIX 兼容](https://github.com/seaweedfs/seaweedfs/wiki/FUSE-Mount)），功能还持续完善中。

### S3

JuiceFS 实现了 [S3 网关](https://juicefs.com/docs/zh/community/s3_gateway)，因此如果有需要，可以通过 S3 API 直接访问文件系统，也能使用 s3cmd、AWS CLI、MinIO Client（mc）等工具直接管理文件系统。

SeaweedFS 当前[支持部分 S3 API](https://github.com/seaweedfs/seaweedfs/wiki/Amazon-S3-API)，覆盖了常用的读写查删等请求，对一些特定的请求（如 Read）还做了功能上的扩展。

### HDFS

JuiceFS [完整兼容 HDFS API](../../deployment/hadoop_java_sdk.md)。包括 Hadoop 2.x 和 Hadoop 3.x，以及 Hadoop 生态系统中的各种组件。SeaweedFS 则是提供了对 HDFS API 的[基础兼容](https://github.com/seaweedfs/seaweedfs/wiki/Hadoop-Compatible-File-System)，一些更加高级的操作如如 truncate、concat、checksum 和扩展属性等则尚未支持。

### CSI 驱动

二者均支持 CSI 驱动，详见：

* [SeaweedFS CSI 驱动](https://github.com/seaweedfs/seaweedfs-csi-driver)
* [JuiceFS CSI 驱动](https://github.com/juicedata/juicefs-csi-driver)

### WebDAV

二者均支持 WebDAV 协议，详见：

* [SeaweedFS Wiki](https://github.com/seaweedfs/seaweedfs/wiki/WebDAV)
* [JuiceFS 文档](../../deployment/webdav.md)

## 其他高级功能

### 客户端缓存

SeaweedFS 客户端[具备简单客户端缓存能力](https://github.com/seaweedfs/seaweedfs/wiki/FUSE-Mount)，由于在写作期间未能找到具体文档，可以直接在其[源码](https://github.com/seaweedfs/seaweedfs/blob/master/weed/command/mount.go)中搜索 `cache` 相关字样。

JuiceFS 客户端支持[元数据以及数据缓存](../../guide/cache.md)，提供更丰富的定制空间，允许用户根据自己的应用场景进行调优。

### 对象存储网关

SeaweedFS 可以作为[对象存储的网关（Gateway）](https://github.com/seaweedfs/seaweedfs/wiki/Gateway-to-Remote-Object-Storage)来使用，可以将对象存储中指定的数据预热到本地，在本地发生的数据修改也会异步同步到对象存储中。

由于 JuiceFS 使用对象存储的方式是将文件进行切分存储，因架构所限，不支持直接作为对象存储的网关或者缓存层。但是在 JuiceFS 企业版，我们开发了单独的功能，为对象存储中已有的数据提供缓存服务，功能类似 SeaweedFS。

### 回收站

JuiceFS 支持并默认开启[回收站](../../security/trash.md)功能，删除的文件会保留指定的时间才删除，避免数据误删，保证数据安全。SeaweedFS 暂不支持此功能。

### 运维

二者均提供完善的运维和排查调优方案：

* JuiceFS 可以通过 [`juicefs stats`](../../administration/fault_diagnosis_and_analysis.md#stats)，[`juicefs profile`](../../administration/fault_diagnosis_and_analysis.md#profile) 来实时观测文件系统性能。也可以通过 [`metrics API`](../../administration/monitoring.md#collect-metrics) 将监控数据接入到 Prometheus，用 Grafana 进行可视化和监控告警。
* SeaweedFS 可以通过 [`weed shell`](https://github.com/seaweedfs/seaweedfs/wiki/weed-shell) 来交互式执行运维工作，例如查看当前集群状态、列举文件列表。SeaweedFS 同时支持 [push 和 pull 方式](https://github.com/seaweedfs/seaweedfs/wiki/System-Metrics) 对接 Prometheus。


================================================
FILE: docs/zh_cn/introduction/io_processing.md
================================================
---
title: 读写请求处理流程
sidebar_position: 3
slug: /internals/io_processing
description: 本文分别介绍 JuiceFS 的读和写的流程，更进一步的介绍 JuiceFS 读写分块技术在操作系统上的实现过程。
---

## 写入流程 {#workflow-of-write}

JuiceFS 对大文件会做多级拆分（[JuiceFS 如何存储文件](../introduction/architecture.md#how-juicefs-store-files)），以提高读写效率。在处理写请求时，JuiceFS 先将数据写入 Client 的内存缓冲区，并在其中按 Chunk/Slice 的形式进行管理。Chunk 是根据文件内 offset 按 64 MiB 大小拆分的连续逻辑单元，不同 Chunk 之间完全隔离。每个 Chunk 内会根据应用写请求的实际情况进一步拆分成 Slice；当新的写请求与已有的 Slice 连续或有重叠时，会直接在该 Slice 上进行更新，否则就创建新的 Slice。Slice 是启动数据持久化的逻辑单元，其在 flush 时会先将数据按照默认 4 MiB 大小拆分成一个或多个连续的 Block，并作为最小单元上传到对象存储；然后再更新一次元数据，写入新的 Slice 信息。

显然，在应用顺序写情况下，只需要一个不停增长的 Slice，最后仅 `flush` 一次即可；此时能最大化发挥出对象存储的写入性能。以一次简单的 [JuiceFS 基准测试](../benchmark/performance_evaluation_guide.md)为例，使用 1 MiB IO 顺序写 1 GiB 文件，在不考虑压缩和加密的前提下，数据在各个组件中的形式如下图所示：

![internals-write](../images/internals-write.png)

用 [`juicefs stats`](../reference/command_reference.mdx#stats) 命令记录的指标图，可以直观地看到实时性能数据：

![internals-stats](../images/internals-stats.png)

图中第 1 阶段：

- 对象存储写入的平均 IO 大小为 `object.put / object.put_c = 4 MiB`，等于 Block 的默认大小
- 元数据事务数与对象存储写入数比例大概为 `meta.txn : object.put_c ~= 1 : 16`，对应 Slice flush 需要的 1 次元数据修改和 16 次对象存储上传，同时也说明了每次 flush 写入的数据量为 4 MiB * 16 = 64 MiB，即 Chunk 的默认大小
- FUSE 层的平均请求大小为约 `fuse.write / fuse.ops ~= 128 KiB`，与其默认的请求大小限制一致

小文件的写入通常是在文件关闭时被上传到对象存储，对应 IO 大小一般就是文件大小。指标图的第 3 阶段是创建 128 KiB 小文件，可以发现：

- 对象存储 PUT 的大小就是 128 KiB
- 元数据事务数大致是 PUT 计数的两倍，对应每个文件的一次 Create 和一次 Write

对于这种不足一个 Block Size 的对象，JuiceFS 在上传的同时还会尝试写入到本地[缓存](../guide/cache.md)，来提升后续可能的读请求速度。因此从图中第 3 阶段也可以看到，创建小文件时，本地缓存（blockcache）与对象存储有着同等的写入带宽，而在读取时（第 4 阶段）大部分均在缓存命中，这使得小文件的读取速度看起来特别快。

由于写请求写入客户端内存缓冲区即可返回，因此通常来说 JuiceFS 的 Write 时延非常低（几十微秒级别），真正上传到对象存储的动作由内部自动触发，比如单个 Slice 过大，Slice 数量过多，或者仅仅是在缓冲区停留时间过长等，或应用主动触发，比如关闭文件、调用 `fsync` 等。

缓冲区中的数据只有在被持久化后才能释放，因此当写入并发较大时，如果缓冲区大小不足（默认 300MiB，通过 [`--buffer-size`](../reference/command_reference.mdx#mount-data-cache-options) 调节），或者对象存储性能不佳，读写缓冲区将持续被占用而导致写阻塞。缓冲区大小可以在指标图的 usage.buf 一列中看到。当使用量超过阈值时，JuiceFS Client 会主动为 Write 添加约 10ms 等待时间以减缓写入速度；若已用量超过阈值两倍，则会导致写入暂停直至缓冲区得到释放。因此，在观察到 Write 时延上升以及 Buffer 长时间超过阈值时，通常需要尝试设置更大的 `--buffer-size`。另外，增大上传并发度（[`--max-uploads`](../reference/command_reference.mdx#mount-data-storage-options)，默认 20）也能提升写入到对象存储的带宽，从而加快缓冲区的释放。

### 随机写 {#random-write}

JuiceFS 支持随机写，包括通过 mmap 等进行的随机写。

要知道，Block 是一个不可变对象，这也是因为大部分对象存储服务并不支持修改对象，只能重新上传覆盖。因此发生覆盖写、大文件随机写时，并不会将 Block 重新下载、修改、重新上传（这样会带来严重的读写放大！），而是在新分配或者已有 Slice 中进行写入，以新 Block 的形式上传至对象存储，然后修改对应文件的元数据，在 Chunk 的 Slice 列表中追加新 Slice。后续读取文件时，其实在读取通过合并 Slice 得到的视图。

因此相较于顺序写来说，大文件随机写的情况更复杂：每个 Chunk 内可能存在多个不连续的 Slice，使得一方面数据对象难以达到 4 MiB 大小，另一方面元数据需要多次更新。因此，JuiceFS 在大文件随机写有明显的性能下降。当一个 Chunk 内已写入的 Slice 过多时，会触发碎片清理（Compaction）来尝试合并与清理这些 Slice，来提升读性能。碎片清理以后台任务形式发生，除了系统自动运行，还能通过 [`juicefs gc`](../administration/status_check_and_maintenance.md#gc) 命令手动触发。

### 客户端写缓存 {#client-write-cache}

客户端写缓存，也称为「回写模式」。

如果对数据一致性和可靠性没有极致要求，可以在挂载时添加 `--writeback` 以进一步提写性能。客户端缓存开启后，Slice flush 仅需写到本地缓存目录即可返回，数据由后台线程异步上传到对象存储。换个角度理解，此时本地目录就是对象存储的缓存层。

更详细的介绍请见[「客户端写缓存」](../guide/cache.md#client-write-cache)。

## 读取流程 {#workflow-of-read}

JuiceFS 支持顺序读和随机读（包括基于 mmap 的随机读），在处理读请求时会通过对象存储的 `GetObject` 接口完整读取 Block 对应的对象，也有可能仅仅读取对象中一定范围的数据（比如通过 [S3 API](https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html) 的 `Range` 参数限定读取范围）。与此同时异步地进行预读（通过 [`--prefetch`](../reference/command_reference.mdx#mount) 参数控制预读并发度），预读会将整个对象存储块下载到本地缓存目录，以备后用（如指标图中的第 2 阶段，blockcache 有很高的写入带宽）。显然，在顺序读时，这些提前获取的数据都会被后续的请求访问到，缓存命中率非常高，因此也能充分发挥出对象存储的读取性能。数据流如下图所示：

![internals-read](../images/internals-read.png)

但是对于大文件随机读场景，预读的用途可能不大，反而容易因为读放大和本地缓存的频繁写入与驱逐使得系统资源的实际利用率降低，此时可以考虑用 `--prefetch=0` 禁用预读。考虑到此类场景下，一般的缓存策略很难有足够高的收益，可考虑尽可能提升缓存的整体容量，达到能几乎完全缓存所需数据的效果；或者直接禁用缓存（`--cache-size=0`），并尽可能提高对象存储的读取性能。

小文件的读取则比较简单，通常就是在一次请求里读取完整个文件。由于小文件写入时会直接被缓存起来，因此，之后的读性能非常可观。


================================================
FILE: docs/zh_cn/reference/_common_options.mdx
================================================
#### 元数据相关参数 {#mount-metadata-options}

|项 | 说明|
|-|-|
|`--subdir=value`|挂载指定的子目录，默认挂载整个文件系统。|
|`--backup-meta=3600`|自动备份元数据到对象存储的间隔时间；单位秒，默认 3600，设为 0 表示不备份。|
|`--backup-skip-trash` <VersionAdd>1.2</VersionAdd>|备份元数据时跳过回收站中的文件和目录。|
|`--heartbeat=12`|发送心跳的间隔（单位秒），建议所有客户端使用相同的心跳值 (默认：12)|
|`--read-only`|只读模式，只允许 lookup 和 read 请求。注意，只读模式隐含了 `--no-bgjob`，因此只读客户端不会运行后台任务。|
|`--no-bgjob`|禁用后台任务，默认为 false，也就是说客户端会默认运行后台任务。后台任务包含：<br/><ul><li>清理回收站中过期的文件（在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `cleanupDeletedFiles` 和 `cleanupTrash`）</li><li>清理引用计数为 0 的 Slice（在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `cleanupSlices`）</li><li>清理过期的客户端会话（在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/base.go) 中搜索 `CleanStaleSessions`）</li></ul>特别地，与[企业版](https://juicefs.com/docs/zh/cloud/guide/background-job)不同，社区版碎片合并（Compaction）不受该选项的影响，而是随着文件读写操作，自动判断是否需要合并，然后异步执行（以 Redis 为例，在 [`pkg/meta/base.go`](https://github.com/juicedata/juicefs/blob/main/pkg/meta/redis.go) 中搜索 `compactChunk`）|
|`--atime-mode=noatime` <VersionAdd>1.1</VersionAdd>|控制如何更新 atime（文件最后被访问的时间）。支持以下模式：<br/><ul><li>`noatime`（默认）：仅在文件创建和主动调用 `SetAttr` 时设置，平时访问与修改文件不影响 atime 值。考虑到更新 atime 需要运行额外的事务，对性能有影响，因此默认关闭。</li><li>`relatime`：仅在 mtime（文件内容修改时间）或 ctime（文件元数据修改时间）比 atime 新，或者 atime 超过 24 小时没有更新时进行更新。</li><li>`strictatime`：持续更新 atime</li></ul>|
|`--skip-dir-nlink=20` <VersionAdd>1.1</VersionAdd>|跳过更新目录 nlink 前的重试次数 (仅用于 TKV, 0 代表永不跳过) (默认：20)|
|`--skip-dir-mtime=100ms` <VersionAdd>1.2</VersionAdd>|如果 mtime 差异小于该值（默认值：100ms），则跳过更新目录的属性。|
|`--sort-dir` <VersionAdd>1.3</VersionAdd>|按名称对目录中的条目进行排序|
|`--fast-statfs` <VersionAdd>1.3</VersionAdd>|通过使用本地缓存减少元数据访问提升`statfs`性能，准确性会降低（默认：false）|

#### 元数据缓存参数 {#mount-metadata-cache-options}

元数据缓存的介绍和使用，详见[「内核元数据缓存」](../guide/cache.md#kernel-metadata-cache)及[「客户端内存元数据缓存」](../guide/cache.md#client-memory-metadata-cache)。

|项 | 说明|
|-|-|
|`--attr-cache=1`|属性缓存过期时间；单位为秒，默认为 1。|
|`--entry-cache=1`|文件项缓存过期时间；单位为秒，默认为 1。|
|`--dir-entry-cache=1`|目录项缓存过期时间；单位为秒，默认为 1。|
|`--open-cache=0`|打开的文件的缓存过期时间，单位为秒，默认为 0，代表关闭该特性。|
|`--open-cache-limit=value` <VersionAdd>1.1</VersionAdd>|允许缓存的最大文件个数 (软限制，0 代表不限制) (默认：10000)|
|`--readdir-cache=false` <VersionAdd>1.3, only for mount</VersionAdd>|开启目录项缓存，默认为 false，代表不开启|
|`--negative-entry-cache=0` <VersionAdd>1.3, only for mount</VersionAdd>|失败 lookup 查询 (返回 ENOENT) 缓存过期时间，默认为 0，代表不缓存|

#### 数据存储参数 {#mount-data-storage-options}

|项 | 说明|
|-|-|
|`--storage=file`|对象存储类型 (例如 `s3`、`gs`、`oss`、`cos`) (默认：`"file"`，参考[文档](../reference/how_to_set_up_object_storage.md#supported-object-storage)查看所有支持的对象存储类型)|
|`--bucket=value`|为当前挂载点指定访问对象存储的 Endpoint。|
|`--storage-class value` <VersionAdd>1.1</VersionAdd>|当前客户端写入数据的存储类型|
|`--get-timeout=60`|下载一个对象的超时时间；单位为秒 (默认：60)|
|`--put-timeout=60`|上传一个对象的超时时间；单位为秒 (默认：60)|
|`--io-retries=10`|网络异常时的重试次数，元数据请求的重试次数也由这个选项控制。如果超过重试次数将会返回 `EIO Input/output error` 错误。（默认：10）|
|`--max-uploads=20`|上传并发度，默认为 20。对于粒度为 4M 的写入模式，20 并发已经是很高的默认值，在这样的写入模式下，提高写并发往往需要伴随增大 `--buffer-size`, 详见「[读写缓冲区](../guide/cache.md#buffer-size)」。但面对百 K 级别的小随机写，并发量大的时候很容易产生阻塞等待，造成写入速度恶化。如果无法改善应用写模式，对其进行合并，那么需要考虑采用更高的写并发，避免排队等待。|
|`--max-stage-write=0` <VersionAdd>1.2</VersionAdd>|异步写入数据块到缓存盘的最大并发数，如果达到最大并发数则会直接上传对象存储（此选项仅在启用[「客户端写缓存」](../guide/cache.md#client-write-cache)时有效）（默认值：0，即没有并发限制）|
|`--max-deletes=10`|删除对象的连接数 (默认：10)|
|`--upload-limit=0`|上传带宽限制，单位为 Mbps (默认：0)|
|`--download-limit=0`|下载带宽限制，单位为 Mbps (默认：0)|
|`--check-storage`<VersionAdd>1.3</VersionAdd>|在挂载前测试存储以提前暴露访问问题|

#### 数据缓存相关参数 {#mount-data-cache-options}

|项 | 说明|
|-|-|
|`--buffer-size=300`|读写缓冲区的总大小；单位为 MiB (默认：300)。阅读[「读写缓冲区」](../guide/cache.md#buffer-size)了解更多。|
|`--prefetch=1`|并发预读 N 个块 (默认：1)。阅读[「客户端读缓存」](../guide/cache.md#client-read-cache)了解更多。|
|`--writeback`|后台异步上传对象，默认为 false。阅读[「客户端写缓存」](../guide/cache.md#client-write-cache)了解更多。|
|`--upload-delay=0`|启用 `--writeback` 后，可以使用该选项控制数据延迟上传到对象存储，默认为 0 秒，相当于写入后立刻上传。该选项也支持 `s`（秒）、`m`（分）、`h`（时）这些单位。如果在等待的时间内数据被应用删除，则无需再上传到对象存储。如果数据只是临时落盘，可以考虑用该选项节约资源。阅读[「客户端写缓存」](../guide/cache.md#client-write-cache)了解更多。|
|`--upload-hours` <VersionAdd>1.2</VersionAdd>|启用 `--writeback` 后，只在一天中指定的时间段上传数据块。参数的格式为 `<起始小时>,<结束小时>`（含「起始小时」，但是不含「结束小时」，「起始小时」必须小于或者大于「结束小时」），其中 `<小时>` 的取值范围为 0 到 23。例如 `0,6` 表示只在每天 0:00 至 5:59 之间上传数据块、`23,3` 表示只在每天 23:00 至第二天 2:59 之间上传数据块。|
|`--cache-dir=value`|本地缓存目录路径；使用 `:`（Linux、macOS）或 `;`（Windows）隔离多个路径 (默认：`$HOME/.juicefs/cache` 或 `/var/jfsCache`)。阅读[「客户端读缓存」](../guide/cache.md#client-read-cache)了解更多。|
|`--cache-mode value` <VersionAdd>1.1</VersionAdd>|缓存块的文件权限 (默认："0600")|
|`--cache-size=102400`|缓存对象的总大小；单位为 MiB (默认：102400)。阅读[「客户端读缓存」](../guide/cache.md#client-read-cache)了解更多。|
|`--cache-items=0` <VersionAdd>1.3</VersionAdd> |最大缓存项目数 (默认会根据`free-space-ratio`计算最大值)|
|`--free-space-ratio=0.1`|最小剩余空间比例，默认为 0.1。如果启用了[「客户端写缓存」](../guide/cache.md#client-write-cache)，则该参数还控制着写缓存占用空间。阅读[「客户端读缓存」](../guide/cache.md#client-read-cache)了解更多。|
|`--cache-partial-only`|仅缓存随机小块读，默认为 false。阅读[「客户端读缓存」](../guide/cache.md#client-read-cache)了解更多。|
|`--cache-large-write` <VersionAdd>1.3</VersionAdd>|在上传后缓存完整数据块|
|`--verify-cache-checksum=extend` <VersionAdd>1.1</VersionAdd>|缓存数据一致性检查级别，启用 Checksum 校验后，生成缓存文件时会对数据切分做 Checksum 并记录于文件末尾，供读缓存时进行校验。支持以下级别：<br/><ul><li>`none`：禁用一致性检查，如果本地数据被篡改，将会读到错误数据；</li><li>`full`（1.3 以前的默认值）：读完整数据块时才校验，适合顺序读场景；</li><li>`shrink`：对读范围内的切片数据进行校验，校验范围不包含读边界所在的切片（可以理解为开区间），适合随机读场景；</li><li>`extend`（1.3+ 的默认值）：对读范围内的切片数据进行校验，校验范围同时包含读边界所在的切片（可以理解为闭区间），因此将带来一定程度的读放大，适合对正确性有极致要求的随机读场景。</li></ul>|
|`--cache-eviction=2-random` <VersionAdd>1.1</VersionAdd>|缓存逐出策略（`none` 或 `2-random`）（默认值：2-random）|
|`--cache-scan-interval=1h` <VersionAdd>1.1</VersionAdd>|扫描缓存目录重建内存索引的间隔（以秒为单位）（默认值：1h）|
|`--cache-expire=0` <VersionAdd>1.2</VersionAdd>|超过设置的时间未被访问的缓存块将会被自动清除（即使 `--cache-eviction` 的值为 `none`，这些缓存块也会被删除），单位为秒，值为 0 表示永不过期（默认值：0）|
|`--max-readahead` <VersionAdd>1.3</VersionAdd>|最大预读缓冲区大小，单位为 MiB |

#### 监控相关参数 {#mount-metrics-options}

|项 | 说明|
|-|-|
|`--metrics=127.0.0.1:9567`|监控数据导出地址，默认为 `127.0.0.1:9567`。|
|`--custom-labels`|监控指标自定义标签，格式为 `key1:value1;key2:value2` (默认："")|
|`--consul=127.0.0.1:8500`|Consul 注册中心地址，默认为 `127.0.0.1:8500`。|
|`--no-usage-report`|不发送使用量信息 (默认：false)|

#### Windows 相关参数 {#mount-windows-options}

|项 | 说明|
|-|-|
|`--o=`|可以用于指定 FUSE 的额外挂载参数，具体支持情况由 WinFsp 决定。|
|`--log=c:/juicefs.log` <VersionAdd>1.3</VersionAdd>|JuiceFS 日志保存路径（仅对后台运行生效）。|
|`-d` <VersionAdd>1.3</VersionAdd>|是否后台运行。在 Windows 系统下，指定了后台运行后，JuiceFS 将以系统服务运行。（注：运行此参数时，需要有管理员权限，并且同一时间只能挂载一个文件系统）|
|`--fuse-trace-log=c:/fuse.log` <VersionAdd>1.3</VersionAdd>|用于指定 WinFsp FUSE 层的回调日志。（默认：""）|
|`--as-root`|这是一个兼容选项，开启此设置后，将会把所有文件 uid,gid 以及写入的身份都映射为 root(uid=0) 用户。|
|`--show-dot-files` <VersionAdd>1.3 </VersionAdd>|显示`.`开头的文件。默认情况下，这些文件会被设置为隐藏文件。|
|`--winfsp-threads=16` <VersionAdd>1.3</VersionAdd>|设置 WinFsp 用于处理内核事件的线程数量，默认为 min(CPU 核数 * 2, 16)。|
|`--report-case` <VersionAdd>1.3</VersionAdd>|配置 JuiceFS 在处理文件名时，是否尽可能上报精确的大小写信息。例如在使用 aaa.txt 打开一个实际为 AAA.txt 的文件名时，JuiceFS 是否向 Windows 内核汇报实际的文件名。（打开此选项可能会对性能有影响）|


================================================
FILE: docs/zh_cn/reference/command_reference.mdx
================================================
---
title: 命令参考
sidebar_position: 1
slug: /command_reference
description: JuiceFS 客户端的所有命令及选项的说明、用法和示例。
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

<!-- 特别提示：由于 mount、gateway 和 webdav 命令存在很多公共选项，为了简化文档维护，我们已经将这些公共选项统一写在 _common_options.mdx 文件中，如需更新相关内容，请查看该文件。 -->
import CommonOptions from './_common_options.mdx';

在终端输入 `juicefs` 并执行，就能看到所有可用的命令。在每个子命令后面添加 `-h/--help` 并运行，就能获得该命令的详细帮助信息，例如 `juicefs format -h`。

```
NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   1.2.0

COMMANDS:
   ADMIN:
     format   Format a volume
     config   Change configuration of a volume
     quota    Manage directory quotas
     destroy  Destroy an existing volume
     gc       Garbage collector of objects in data storage
     fsck     Check consistency of a volume
     restore  restore files from trash
     dump     Dump metadata into a JSON file
     load     Load metadata from a previously dumped JSON file
     version  Show version
   INSPECTOR:
     status   Show status of a volume
     stats    Show real time performance statistics of JuiceFS
     profile  Show profiling of operations completed in JuiceFS
     info     Show internal information of a path or inode
     debug    Collect and display system static and runtime information
     summary  Show tree summary of a directory
   SERVICE:
     mount    Mount a volume
     umount   Unmount a volume
     gateway  Start an S3-compatible gateway
     webdav   Start a WebDAV server
   TOOL:
     bench     Run benchmarks on a path
     objbench  Run benchmarks on an object storage
     warmup    Build cache for target directories/files
     rmr       Remove directories recursively
     sync      Sync between two storages
     clone     clone a file or directory without copying the underlying data
     compact   Trigger compaction of chunks

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             show warning and errors only (default: false)
   --trace                 enable trace log (default: false)
   --log-id value          append the given log id in log, use "random" to use random uuid
   --no-agent              disable pprof (:6060) agent (default: false)
   --pyroscope value       pyroscope address
   --no-color              disable colors (default: false)
   --help, -h              show help (default: false)
   --version, -V           print version only (default: false)

COPYRIGHT:
   Apache License 2.0
```

## 自动补全 {#auto-completion}

通过加载 [`hack/autocomplete`](https://github.com/juicedata/juicefs/tree/main/hack/autocomplete) 目录下的对应脚本可以启用命令的自动补全，例如：

<Tabs groupId="juicefs-cli-autocomplete">
  <TabItem value="bash" label="Bash">

```shell
source hack/autocomplete/bash_autocomplete
```

  </TabItem>
  <TabItem value="zsh" label="Zsh">

```shell
source hack/autocomplete/zsh_autocomplete
```

  </TabItem>
</Tabs>

请注意自动补全功能仅对当前会话有效。如果你希望对所有新会话都启用此功能，请将 `source` 命令添加到 `.bashrc` 或 `.zshrc` 中：

<Tabs groupId="juicefs-cli-autocomplete">
  <TabItem value="bash" label="Bash">

```shell
echo "source path/to/bash_autocomplete" >> ~/.bashrc
```

  </TabItem>
  <TabItem value="zsh" label="Zsh">

```shell
echo "source path/to/zsh_autocomplete" >> ~/.zshrc
```

  </TabItem>
</Tabs>

另外，如果你是在 Linux 系统上使用 bash，也可以直接将脚本拷贝到 `/etc/bash_completion.d` 目录并将其重命名为 `juicefs`：

```shell
cp hack/autocomplete/bash_autocomplete /etc/bash_completion.d/juicefs
source /etc/bash_completion.d/juicefs
```

## 全局选项 {#global-options}

|项 | 说明|
|-|-|
|`-q` `--quiet`|仅显示警告及错误日志。|
|`-v` `--verbose` `--debug`|开启调试日志。|
|`--trace`|开启比 `--debug` 选项更详细的调试日志。|
|`--no-agent`|关闭 pprof 代理。|
|`--pyroscope`|配置 [Pyroscope](https://github.com/pyroscope-io/pyroscope) 地址，如 `http://localhost:4040`。|
|`--no-color`|关闭日志的颜色。|

## 管理 {#admin}

### `juicefs format` {#format}

创建并格式化文件系统，如果 `META-URL` 中已经存在一个文件系统，不会再次进行格式化。如果文件系统创建后需要调整配置，请使用 [`juicefs config`](#config)。

#### 概览

```shell
juicefs format [command options] META-URL NAME

# 创建一个简单的测试卷（数据将存储在本地目录中）
juicefs format sqlite3://myjfs.db myjfs

# 使用 Redis 和 S3 创建卷
juicefs format redis://localhost myjfs --storage=s3 --bucket=https://mybucket.s3.us-east-2.amazonaws.com

# 使用带有密码的 MySQL 创建卷
juicefs format mysql://jfs:mypassword@(127.0.0.1:3306)/juicefs myjfs
# 更安全的方法
META_PASSWORD=mypassword juicefs format mysql://jfs:@(127.0.0.1:3306)/juicefs myjfs

# 创建一个开启配额设置的卷
juicefs format sqlite3://myjfs.db myjfs --inodes=1000000 --capacity=102400

# 创建一个关闭了回收站的卷
juicefs format sqlite3://myjfs.db myjfs --trash-days=0
```

#### 参数

|项 | 说明|
|-|-|
|`META-URL`|用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
|`NAME`|文件系统名称。|
|`--force`|强制覆盖当前的格式化配置，默认为 false。|
|`--no-update`|不要修改已有的格式化配置，默认为 false。|

#### 数据存储参数 {#format-data-storage-options}

|项 | 说明|
|-|-|
|`--storage=file`|对象存储类型，例如 `s3`、`gs`、`oss`、`cos`。默认为 `file`，参考[文档](../reference/how_to_set_up_object_storage.md#supported-object-storage)查看所有支持的对象存储类型。|
|`--bucket=path`|存储数据的桶路径（默认：`$HOME/.juicefs/local` 或 `/var/jfs`）。|
|`--access-key=value`|对象存储的 Access Key，也可通过环境变量 `ACCESS_KEY` 设置。查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#aksk)以了解更多。|
|`--secret-key=value`|对象存储的 Secret Key，也可通过环境变量 `SECRET_KEY` 设置。查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#aksk)以了解更多。|
|`--session-token=value`|对象存储的临时访问凭证（Session Token），查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#session-token)以了解更多。|
|`--storage-class=value` <VersionAdd>1.1</VersionAdd>|默认存储类型。|

#### 数据格式参数 {#format-data-format-options}

|项 | 说明|
|-|-|
|`--block-size=4M`|块大小，单位为 KiB，默认 4M。4M 是一个较好的默认值，不少对象存储（比如 S3）都将 4M 设为内部的块大小，因此将 JuiceFS block size 设为相同大小，往往也能获得更好的性能。|
|`--compress=none`|压缩算法，支持 `lz4`、`zstd`、`none`（默认），启用压缩将不可避免地对性能产生一定影响。这两种压缩算法中，`lz4` 提供更好的性能，但压缩比要逊于 `zstd`，他们的具体性能差别具体需要读者自行搜索了解。|
|`--encrypt-rsa-key=value`|RSA 私钥的路径，查看[数据加密](../security/encryption.md)以了解更多。|
|`--encrypt-algo=aes256gcm-rsa`|加密算法 (aes256gcm-rsa, chacha20-rsa) (默认："aes256gcm-rsa")|
|`--hash-prefix`|对于部分对象存储服务，如果对象存储命名路径的键值（key）是连续的，那么坐落在对象存储上的物理数据也将是连续的。在大规模顺序读场景下，这样会带来数据访问热点，让对象存储服务的部分区域访问压力过大。<br/><br/>启用 `--hash-prefix` 将会给每个对象路径命名添加 hash 前缀（用 slice ID 对 256 取模，详见[内部实现](../development/internals.md#object-storage-naming-format)），相当于“打散”对象存储键值，避免在对象存储服务层面创造请求热点。显而易见，由于影响着对象存储块的命名规则，该选项**必须在创建文件系统之初就指定好、不能动态修改。**<br/><br/>目前而言，[AWS S3](https://aws.amazon.com/about-aws/whats-new/2018/07/amazon-s3-announces-increased-request-rate-performance) 已经做了优化，不再需要应用侧的随机对象前缀。而对于其他对象对象存储服务（比如 [COS 就在文档里推荐随机化前缀](https://cloud.tencent.com/document/product/436/13653#.E6.B7.BB.E5.8A.A0.E5.8D.81.E5.85.AD.E8.BF.9B.E5.88.B6.E5.93.88.E5.B8.8C.E5.89.8D.E7.BC.80)），因此，对于这些对象存储，如果文件系统规模庞大，建议启用该选项以提升性能。|
|`--shards=0`|如果对象存储服务在桶级别设置了限速（或者你使用自建的对象存储服务，单个桶的性能有限），可以将数据块根据名字哈希分散存入 N 个桶中。该值默认为 0，也就是所有数据存入单个桶。当 N 大于 0 时，`bucket` 需要包含 `%d` 占位符，例如 `--bucket=juicefs-%d`。`--shards` 设置无法动态修改，需要提前规划好用量。|

#### 管理参数 {#format-management-options}

|项 | 说明|
|-|-|
|`--capacity=0`|容量配额，单位为 GiB，默认为 0 代表不限制。如果启用了[回收站](../security/trash.md)，那么配额大小也将包含回收站文件。|
|`--inodes=0`|文件数配额，默认为 0 代表不限制。|
|`--trash-days=1`|文件被删除后，默认会进入[回收站](../security/trash.md)，该选项控制已删除文件在回收站内保留的天数，默认为 1，设为 0 以禁用回收站。|
|`--enable-acl=true` <VersionAdd>1.2</VersionAdd>|启用[POSIX ACL](../security/posix_acl.md)，该选项启用后暂不支持关闭。|

### `juicefs config` {#config}

修改指定文件系统的配置项。注意更新某些设置以后，客户端未必能立刻生效，需要等待一定时间，具体的等待时间可以通过 [`--heartbeat`](#mount) 选项控制。

#### 概览

```shell
juicefs config [command options] META-URL

# 显示当前配置
juicefs config redis://localhost

# 改变目录的配额
juicefs config redis://localhost --inodes 10000000 --capacity 1048576

# 更改回收站中文件可被保留的最长天数
juicefs config redis://localhost --trash-days 7

# 限制允许连接的客户端版本
juicefs config redis://localhost --min-client-version 1.0.0 --max-client-version 1.1.0
```

#### 参数

|项 | 说明|
|-|-|
|`--yes, -y`|对所有提示自动回答 "yes" 并以非交互方式运行 (默认值：false)|
|`--force`|跳过合理性检查并强制更新指定配置项 (默认：false)|

#### 数据存储参数 {#config-data-storage-options}

|项 | 说明|
|-|-|
|`--storage=file` <VersionAdd>1.1</VersionAdd>|对象存储类型，例如 `s3`、`gs`、`oss`、`cos`。默认为 `file`，参考[文档](../reference/how_to_set_up_object_storage.md#supported-object-storage)查看所有支持的对象存储类型。|
|`--bucket=path`|存储数据的桶路径（默认：`$HOME/.juicefs/local` 或 `/var/jfs`）。|
|`--access-key=value`|对象存储的 Access Key，也可通过环境变量 `ACCESS_KEY` 设置。查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#aksk)以了解更多。|
|`--secret-key=value`|对象存储的 Secret Key，也可通过环境变量 `SECRET_KEY` 设置。查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#aksk)以了解更多。|
|`--session-token=value`|对象存储的临时访问凭证（Session Token），查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#session-token)以了解更多。|
|`--storage-class=value` <VersionAdd>1.1</VersionAdd>|默认存储类型。|
|`--upload-limit=0`|上传带宽限制，单位为 Mbps (默认：0)|
|`--download-limit=0`|下载带宽限制，单位为 Mbps (默认：0)|

#### 管理参数 {#config-management-options}

|项 | 说明|
|-|-|
|`--capacity value`|容量配额，单位为 GiB|
|`--inodes value`|文件数配额|
|`--trash-days value`|文件被自动清理前在回收站内保留的天数|
|`--enable-acl` <VersionAdd>1.2</VersionAdd>|开启 [POSIX ACL](../security/posix_acl.md)（不支持关闭），同时允许连接的最小客户端版本会提升到 v1.2|
|`--encrypt-secret`|如果密钥之前以原格式存储，则加密密钥 (默认值：false)|
|`--min-client-version value` <VersionAdd>1.1</VersionAdd>|允许连接的最小客户端版本|
|`--max-client-version value` <VersionAdd>1.1</VersionAdd>|允许连接的最大客户端版本|
|`--dir-stats` <VersionAdd>1.1</VersionAdd>|开启目录统计，这是快速汇总和目录配额所必需的 (默认值：false)|

### `juicefs quota` <VersionAdd>1.1</VersionAdd> {#quota}

管理目录配额

#### 概览

```shell
juicefs quota command [command options] META-URL

# 为目录设置配额
juicefs quota set redis://localhost --path /dir1 --capacity 1 --inodes 100

# 获取目录配额信息
juicefs quota get redis://localhost --path /dir1

# 列出所有目录配额
juicefs quota list redis://localhost

# 删除目录配额
juicefs quota delete redis://localhost --path /dir1

# 检查目录配额的一致性
juicefs quota check redis://localhost
```

#### 参数

|项 | 说明|
|-|-|
|`META-URL`|用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
|`--path value`|卷中目录的全路径|
|`--capacity value`|目录空间硬限制，单位 GiB (默认：0)|
|`--inodes value`|用于硬限制目录 inode 数 (默认：0)|
|`--repair`|修复不一致配额 (默认：false)|
|`--strict`|在严格模式下计算目录的总使用量 (注意：对于大目录可能很慢) (默认：false)|

### `juicefs destroy` {#destroy}

销毁一个已经存在的文件系统，将会清空元数据引擎与对象存储中的相关数据。详见[「如何销毁文件系统」](../administration/destroy.md)。

#### 概览

```shell
juicefs destroy [command options] META-URL UUID

juicefs destroy redis://localhost e94d66a8-2339-4abd-b8d8-6812df737892
```

#### 参数

| 项                                         | 说明|
|-------------------------------------------|-|
| `--yes, -y` <VersionAdd>1.1</VersionAdd> |对所有提示自动回答 "yes" 并以非交互方式运行 (默认值：false)|
| `--force`                                 |跳过合理性检查并强制销毁文件系统 (默认：false)|

### `juicefs gc` {#gc}

如果对象存储块因为某种原因，完全脱离了 JuiceFS 的管理，也就是对象存储上依然还存在，但在 JuiceFS 元数据已经不复存在，无法被回收释放，这种现象称作「对象泄漏」。如果你并没有进行任何特殊操作，那么对象泄露通常昭示着 bug，建议提交 [GitHub Issue](https://github.com/juicedata/juicefs/issues/new/choose)。

与此同时，你可以用该命令清理泄漏对象。顺带一提，该命令还能够清理失效的文件碎片。详见[「状态检查 & 维护」](../administration/status_check_and_maintenance.md#gc)。

#### 概览

```shell
juicefs gc [command options] META-URL

# 只检查，没有更改的能力
juicefs gc redis://localhost

# 触发所有 slices 的压缩
juicefs gc redis://localhost --compact

# 删除泄露的对象
juicefs gc redis://localhost --delete
```

#### 参数

|项 | 说明|
|-|-|
|`--compact`|对所有文件执行碎片合并。|
|`--delete`|删除泄漏的对象，以及因不完整的 `clone` 命令而产生泄漏的元数据。|
|`--threads=10`|并发线程数，默认为 10。|

### `juicefs fsck` {#fsck}

检查文件系统一致性。

#### 概览

```shell
juicefs fsck [command options] META-URL

juicefs fsck redis://localhost
```

#### 参数

|项 | 说明|
|-|-|
|`--path value` <VersionAdd>1.1</VersionAdd>|待检查的 JuiceFS 中的绝对路径|
|`--repair` <VersionAdd>1.1</VersionAdd>|发现损坏后尽可能修复 (默认：false)|
|`--recursive, -r` <VersionAdd>1.1</VersionAdd>|递归检查或修复 (默认值：false)|
|`--sync-dir-stat` <VersionAdd>1.1</VersionAdd>|同步所有目录的状态，即使他们没有损坏 (注意：巨大的文件树可能会花费很长时间) (默认：false)|

### `juicefs restore` <VersionAdd>1.1</VersionAdd> {#restore}

重新构建回收站文件的树结构，并将它们放回原始目录。如果需要恢复文件存在命名冲突，程序会直接跳过，不会覆盖新创建的文件（注意日志中会有提示）。

#### 概览

```shell
juicefs restore [command options] META HOUR ...

juicefs restore redis://localhost/1 2023-05-10-01
```

#### 参数

|项 | 说明|
|-|-|
|`--put-back`|将恢复的文件移动到原始目录，面对命名冲突时会直接跳过，不会覆盖已有文件。|
|`--threads=10`|线程数，默认 10，如果恢复速度慢，增加并发以提速。|

### `juicefs dump` {#dump}

导出元数据。阅读[「元数据备份」](../administration/metadata_dump_load.md#backup)以了解更多。

#### 概览

```shell
juicefs dump [command options] META-URL [FILE]

# 导出元数据至 meta-dump.json
juicefs dump redis://localhost meta-dump.json

# 只导出文件系统的一个子目录的元数据
juicefs dump redis://localhost sub-meta-dump.json --subdir /dir/in/jfs
```

#### 参数

|项 | 说明|
|-|-|
|`META-URL`|用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
|`FILE`|导出文件路径，如果不指定，则会导出到标准输出。如果文件名以 `.gz` 结尾，将会自动压缩。|
|`--subdir=path`|只导出指定子目录的元数据。|
|`--keep-secret-key` <VersionAdd>1.1</VersionAdd>|导出对象存储认证信息，默认为 `false`。由于是明文导出，使用时注意数据安全。如果导出文件不包含对象存储认证信息，后续的导入完成后，需要用 [`juicefs config`](#config) 重新配置对象存储认证信息。|
|`--threads=10` <VersionAdd>1.2</VersionAdd>|并发线程数，默认 10。|
|`--fast` <VersionAdd>1.2</VersionAdd>|使用更多内存来加速导出。|
|`--skip-trash` <VersionAdd>1.2</VersionAdd>|跳过回收站中的文件和目录。|

### `juicefs load` {#load}

将元数据导入一个空的文件系统。阅读[「元数据恢复与迁移」](../administration/metadata_dump_load.md#recovery-and-migration)以了解更多。

#### 概览

```shell
juicefs load [command options] META-URL [FILE]

# 将元数据备份文件 meta-dump.json 导入数据库
juicefs load redis://127.0.0.1:6379/1 meta-dump.json
```

#### 参数

| 项                                                          | 说明|
|------------------------------------------------------------|-|
| `META-URL`                                                 |用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
| `FILE`                                                     |导入文件路径，如果不指定，则会从标准输入导入。如果文件名以 `.gz` 结尾，将会自动解压。|
| `--encrypt-rsa-key=path` <VersionAdd>1.0.4</VersionAdd>    |加密所使用的 RSA 私钥文件路径。|
| `--encrypt-algo=aes256gcm-rsa` <VersionAdd>1.0.4</VersionAdd> |加密算法，默认为 `aes256gcm-rsa`。|

## 检视 {#inspector}

### `juicefs status` {#status}

显示 JuiceFS 的状态。

#### 概览

```shell
juicefs status [command options] META-URL

juicefs status redis://localhost
```

#### 参数

|项 | 说明|
|-|-|
|`--session=0, -s 0`|展示指定会话 (SID) 的具体信息 (默认：0)|
|`--more, -m` <VersionAdd>1.1</VersionAdd>|显示更多的统计信息，可能需要很长时间 (默认值：false)|

### `juicefs stats` {#stats}

展示实时的性能统计信息，阅读[「实时性能监控」](../administration/fault_diagnosis_and_analysis.md#performance-monitor)以了解更多。

#### 概览

```shell
juicefs stats [command options] MOUNTPOINT

juicefs stats /mnt/jfs

# 更多的指标
juicefs stats /mnt/jfs -l 1
```

#### 参数

|项 | 说明|
|-|-|
|`--schema=ufmco`|控制输出内容的标题字符串，默认为 `ufmco`，含义如下：<br/>`u`：usage<br/>`f`：FUSE<br/>`m`：metadata<br/>`c`：block cache<br/>`o`：object storage<br/>`g`：Go|
|`--interval=1`|更新间隔；单位为秒 (默认：1)|
|`--verbosity=0`|详细级别；通常 0 或 1 已足够 (默认：0)|

### `juicefs profile` {#profile}

展示基于[文件系统访问日志](../administration/fault_diagnosis_and_analysis.md#access-log)的实时监控数据，阅读[「实时性能监控」](../administration/fault_diagnosis_and_analysis.md#performance-monitor)以了解更多。

#### 概览

```shell
juicefs profile [command options] MOUNTPOINT/LOGFILE

# 监控实时操作
juicefs profile /mnt/jfs

# 重放访问日志
cat /mnt/jfs/.accesslog > /tmp/jfs.alog
# 一段时间后按 Ctrl-C 停止 "cat" 命令
juicefs profile /tmp/jfs.alog

# 分析访问日志并立即打印总统计数据
juicefs profile /tmp/jfs.alog --interval 0
```

#### 参数

|项 | 说明|
|-|-|
|`--uid=value, -u value`|仅跟踪指定 UIDs (用逗号分隔)|
|`--gid=value, -g value`|仅跟踪指定 GIDs (用逗号分隔)|
|`--pid=value, -p value`|仅跟踪指定 PIDs (用逗号分隔)|
|`--interval=2`|显示间隔；在回放模式中将其设置为 0 可以立即得到整体的统计结果；单位为秒 (默认：2)|

### `juicefs info` {#info}

显示指定路径或 inode 的内部信息。

#### 概览

```shell
juicefs info [command options] PATH or INODE

# 检查路径
juicefs info /mnt/jfs/foo

# 检查 inode
cd /mnt/jfs
juicefs info -i 100
```

#### 参数

|项 | 说明|
|-|-|
|`--inode, -i`|使用 inode 号而不是路径 (当前目录必须在 JuiceFS 挂载点内) (默认：false)|
|`--recursive, -r`|递归获取所有子目录的概要信息，当指定一个目录结构很复杂的路径时可能会耗时很长） (默认：false)|
|`--strict` <VersionAdd>1.1</VersionAdd>|获取准确的目录概要 (注意：巨大的文件树可能会花费很长的时间) (默认：false)|
|`--raw`|显示内部原始信息 (默认：false)|

### `juicefs debug` <VersionAdd>1.1</VersionAdd> {#debug}

从运行环境、系统日志等多个维度收集和展示信息，帮助更好地定位错误

#### 概览

```shell
juicefs debug [command options] MOUNTPOINT

# 收集并展示挂载点 /mnt/jfs 的各类信息
juicefs debug /mnt/jfs

# 指定输出目录为 /var/log
juicefs debug --out-dir=/var/log /mnt/jfs

# 收集最后 1000 条日志条目
juicefs debug --out-dir=/var/log --limit=1000 /mnt/jfs
```

#### 参数

|项 | 说明|
|-|-|
|`--out-dir=./debug/`|结果输出目录，若目录不存在则自动创建，默认为 `./debug/`。|
|`--limit=value`|收集的日志条目数，从新到旧，若不指定则收集全部条目|
|`--stats-sec=5`|.stats 文件采样秒数 (默认：5)|
|`--trace-sec=5`|trace 指标采样秒数 (默认：5)|
|`--profile-sec=30`|profile 指标采样秒数 (默认：30)|

### `juicefs summary` <VersionAdd>1.1</VersionAdd> {#summary}

显示目标目录树摘要。

#### 概览

```shell
juicefs summary [command options] PATH

juicefs summary /mnt/jfs/foo

# 显示最大深度为 5
juicefs summary --depth 5 /mnt/jfs/foo

# 显示前 20 个 entry
juicefs summary --entries 20 /mnt/jfs/foo

# 显示准确的结果
juicefs summary --strict /mnt/jfs/foo
```

#### 参数

|项 | 说明|
|-|-|
|`--depth value, -d value`|显示树的深度 (0 表示只显示根) (默认：2)|
|`--entries value, -e value`|显示前 N 个 entry (按大小排序)(默认：10)|
|`--strict`|显示准确的摘要，包括目录和文件 (可能很慢) (默认值：false)|
|`--csv`|以 CSV 格式打印摘要 (默认：false)|

## 服务 {#service}

### `juicefs mount` {#mount}

挂载一个已经创建的文件系统。

JuiceFS 支持用 root 以及普通用户挂载，但由于权限不同，挂载时所使用的的缓存目录和日志文件等路径会有所区别，详见下方参数说明。

#### 概要

```shell
juicefs mount [command options] META-URL MOUNTPOINT

# 前台挂载
juicefs mount redis://localhost /mnt/jfs

# 使用带密码的 redis 后台挂载
juicefs mount redis://:mypassword@localhost /mnt/jfs -d
# 更安全的方式
META_PASSWORD=mypassword juicefs mount redis://localhost /mnt/jfs -d

# 将一个子目录挂载为根目录
juicefs mount redis://localhost /mnt/jfs --subdir /dir/in/jfs

# 开启写缓存（writeback）模式，可以提升写入性能但同时有数据丢失风险
juicefs mount redis://localhost /mnt/jfs -d --writeback

# 开启只读模式
juicefs mount redis://localhost /mnt/jfs -d --read-only

# 关闭元数据自动备份
juicefs mount redis://localhost /mnt/jfs --backup-meta 0
```

#### 参数

|项 | 说明|
|-|-|
|`META-URL`|用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
|`MOUNTPOINT`|文件系统挂载点，例如：`/mnt/jfs`、`Z:`。|
|`-d, --background`|后台运行，默认为 false。|
|`--no-syslog`|禁用系统日志，默认为 false。|
|`--log=path`|后台运行时日志文件的位置 (默认：`$HOME/.juicefs/juicefs.log` 或 `/var/log/juicefs.log`)|
|`--force`|强制挂载即使挂载点已经被相同的文件系统挂载 (默认值:false)|
|`--update-fstab` <VersionAdd>1.1</VersionAdd>|新增／更新 `/etc/fstab` 中的条目，如果不存在将会创建一个从 `/sbin/mount.juicefs` 到 JuiceFS 可执行文件的软链接，默认为 false。|
|`--disable-transparent-hugepage` <VersionAdd>1.3</VersionAdd>| 关闭内核的透明大页（THP），内存紧张时，THP 可能导致进程挂起等待，默认为 false。 |

#### FUSE 相关参数 {#mount-fuse-options}

|项 | 说明|
|-|-|
|`--enable-xattr`|启用扩展属性 (xattr) 功能，默认为 false。|
|`--enable-cap` <VersionAdd>1.3</VersionAdd>|启用 security.capability 扩展属性 (xattr) ，默认为 false。|
|`--enable-selinux` <VersionAdd>1.3</VersionAdd>|启用 security.selinux 扩展属性 (xattr) ，默认为 false。|
|`--enable-ioctl` <VersionAdd>1.1</VersionAdd>|启用 ioctl (仅支持 GETFLAGS/SETFLAGS) (默认：false)|
|`--root-squash value` <VersionAdd>1.1</VersionAdd>|将本地 root 用户 (UID=0) 映射到一个指定用户，如 UID:GID|
|`--all-squash value` <VersionAdd>1.3</VersionAdd>|将所有用户映射到一个指定用户，如 UID:GID|
|`--umask value` <VersionAdd>1.3</VersionAdd> |新文件和新目录的 umask 的八进制格式|
|`--prefix-internal` <VersionAdd>1.1</VersionAdd>|挂载 JuiceFS 后，挂载点下默认创建 `.stats`, `.accesslog` 等虚拟文件。如果这些内部文件和你的应用发生冲突，可以启用该选项，添加 `.jfs` 前缀到所有内部文件。|
|`--max-fuse-io=128K` <VersionAdd>1.3</VersionAdd>| fuse 请求最大大小 (默认：128K)|
|`-o value`|其他 FUSE 选项，详见 [FUSE 挂载选项](../reference/fuse_mount_options.md)|

<CommonOptions />

<!-- 注意：下面这段 HTML 的用途仅仅是为了在检查坏链时不要报错（因为这些标题都在 _common_options.mdx 文件里），在实际页面中不会显示。请不要删除，也不要移动位置（必须放在 <CommonOptions /> 这一行下面）。 -->
<div style={{ display: 'none' }}>

#### {#mount-metadata-options}
#### {#mount-metadata-cache-options}
#### {#mount-data-storage-options}
#### {#mount-data-cache-options}
#### {#mount-metrics-options}

</div>

### `juicefs umount` {#umount}

卸载 JuiceFS 文件系统。

#### 概要

```shell
juicefs umount [command options] MOUNTPOINT

juicefs umount /mnt/jfs
```

#### 参数

|项 | 说明|
|-|-|
|`-f, --force`|强制卸载一个忙碌的文件系统 (默认：false)|
|`--flush` <VersionAdd>1.1</VersionAdd>|等待所有暂存块被刷新 (默认值：false)|

### `juicefs gateway` {#gateway}

启动一个 S3 兼容的网关，详见[「配置 JuiceFS S3 网关」](../guide/gateway.md)。

#### 概览

```shell
juicefs gateway [command options] META-URL ADDRESS

export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=12345678
juicefs gateway redis://localhost localhost:9000
```

#### 参数

|项 | 说明|
|-|-|
|`META-URL`|用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
|`ADDRESS`|S3 网关地址和监听的端口，例如：`localhost:9000`|
|`--log value` <VersionAdd>1.2</VersionAdd>|网关日志路径|
|`--access-log=path`|访问日志的路径|
|`--background, -d` <VersionAdd>1.2</VersionAdd>|后台运行（默认：false）|
|`--no-banner`| 禁用 MinIO 的启动信息（默认：false）|
|`--multi-buckets`|使用第一级目录作为存储桶（默认：false）|
|`--keep-etag`|保留对象上传时的 ETag（默认：false）|
|`--umask=022`|新文件和新目录的 umask 的八进制格式（默认值：022）|
|`--object-tag` <VersionAdd>1.2</VersionAdd>|启用对象标签 API|
|`--domain value` <VersionAdd>1.2</VersionAdd>|虚拟主机样式请求的域|
|`--refresh-iam-interval=5m` <VersionAdd>1.2</VersionAdd>|重新加载网关 IAM 配置的间隔时间（默认值：5 分钟）|

<CommonOptions />

### `juicefs webdav` {#webdav}

启动一个 WebDAV 服务，阅读[「配置 WebDAV 服务」](../deployment/webdav.md)以了解更多。

#### 概览

```shell
juicefs webdav [command options] META-URL ADDRESS

juicefs webdav redis://localhost localhost:9007
```

#### 参数

|项 | 说明|
|-|-|
|`META-URL`|用于元数据存储的数据库 URL，详情查看[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)。|
|`ADDRESS`|WebDAV 服务监听的地址与端口，例如：`localhost:9007`|
|`--cert-file` <VersionAdd>1.1</VersionAdd>|HTTPS 证书文件|
|`--key-file` <VersionAdd>1.1</VersionAdd>|HTTPS 密钥文件|
|`--gzip`|通过 gzip 压缩提供的文件（默认值：false）|
|`--disallowList`|禁止列出目录（默认值：false）|
|`--enable-proppatch` <VersionAdd>1.3</VersionAdd>|启用 proppatch 方法支持|
|`--log value` <VersionAdd>1.2</VersionAdd>|WebDAV 日志路径|
|`--access-log=path`|访问日志的路径|
|`--background, -d` <VersionAdd>1.2</VersionAdd>|后台运行（默认：false）|
|`--threads=50, -p 50`<VersionAdd>1.3</VersionAdd>|用于删除作业的最大线程数（最大 255 个）|

<CommonOptions />

## 工具 {#tool}

### `juicefs bench` {#bench}

对指定的路径做基准测试，包括对大文件和小文件的读/写/获取属性操作。详细介绍参考[文档](../benchmark/performance_evaluation_guide.md#juicefs-bench)。

#### 概览

```shell
juicefs bench [command options] PATH

# 使用 4 个线程运行基准测试
juicefs bench /mnt/jfs -p 4

# 只运行小文件的基准测试
juicefs bench /mnt/jfs --big-file-size 0
```

#### 参数

|项 | 说明|
|-|-|
|`--block-size=1`|块大小；单位为 MiB (默认：1)|
|`--big-file-size=1024`|大文件大小；单位为 MiB (默认：1024)|
|`--small-file-size=128`|小文件大小；单位为 KiB (默认：128)|
|`--small-file-count=100`|小文件数量 (默认：100)|
|`--threads=1, -p 1`|并发线程数 (默认：1)|

### `juicefs objbench` {#objbench}

测试对象存储接口的正确性与基本性能，详细介绍参考[文档](../benchmark/performance_evaluation_guide.md#juicefs-objbench)。

#### 概览

```shell
juicefs objbench [command options] BUCKET

# 测试 S3 对象存储的基准性能
ACCESS_KEY=myAccessKey SECRET_KEY=mySecretKey juicefs objbench --storage=s3 https://mybucket.s3.us-east-2.amazonaws.com -p 6
```

#### 参数

|项 | 说明|
|-|-|
|`--storage=file`|对象存储类型 (例如 `s3`、`gs`、`oss`、`cos`) (默认：`file`，参考[文档](../reference/how_to_set_up_object_storage.md#supported-object-storage)查看所有支持的对象存储类型)|
|`--access-key=value`|对象存储的 Access Key，也可通过环境变量 `ACCESS_KEY` 设置。查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#aksk)以了解更多。|
|`--secret-key=value`|对象存储的 Secret Key，也可通过环境变量 `SECRET_KEY` 设置。查看[如何设置对象存储](../reference/how_to_set_up_object_storage.md#aksk)以了解更多。|
|`--session-token value` <VersionAdd>1.0</VersionAdd>|对象存储的会话令牌|
|`--shards`<VersionAdd>1.3</VersionAdd>|如果对象存储服务在桶级别设置了限速（或者你使用自建的对象存储服务，单个桶的性能有限），可以将数据块根据名字哈希分散存入 N 个桶中。该值默认为 0，也就是所有数据存入单个桶。当 N 大于 0 时，`bucket` 需要包含 `%d` 占位符，例如 `--bucket=juicefs-%d`。`--shards` 设置无法动态修改，需要提前规划好用量。|
|`--block-size=4096`|每个 IO 块的大小（以 KiB 为单位）（默认值：4096）|
|`--big-object-size=1024`|大文件的大小（以 MiB 为单位）（默认值：1024）|
|`--small-object-size=128`|每个小文件的大小（以 KiB 为单位）（默认值：128）|
|`--small-objects=100`|小文件的数量（默认值：100）|
|`--skip-functional-tests`|跳过功能测试（默认值：false）|
|`--threads=4, -p 4`|上传下载等操作的并发数（默认值：4）|

### `juicefs warmup` {#warmup}

将文件提前下载到缓存，提升后续本地访问的速度。可以指定某个挂载点路径，递归对这个路径下的所有文件进行缓存预热；也可以通过 `--file` 选项指定文本文件，在文本文件中指定需要预热的文件名。

如果需要预热的文件分布在许多不同的目录，推荐将这些文件名保存到文本文件中并用 `--file` 参数传给预热命令，这样做能利用 `warmup` 的并发功能，速度会显著优于多次调用 `juicefs warmup`，在每次调用里传入单个文件。

#### 概览

```shell
juicefs warmup [command options] [PATH ...]

# 预热目录中的所有文件
juicefs warmup /mnt/jfs/datadir

# 只预热指定文件
echo '/jfs/f1
/jfs/f2
/jfs/f3' > /tmp/filelist.txt
juicefs warmup -f /tmp/filelist.txt
```

#### 参数

|项 | 说明|
|-|-|
|`--file=value, -f value`|指定一个包含一组路径的文件（每一行为一个文件路径）。|
|`--threads=50, -p 50`|并发的工作线程数，默认 50。如果带宽不足导致下载失败，需要减少并发度，控制下载速度。|
|`--background, -b`|后台运行（默认：false）|
|`--evict` <VersionAdd>1.2</VersionAdd>|逐出已缓存的块|
|`--check` <VersionAdd>1.2</VersionAdd>|检查数据块是否已缓存|

### `juicefs rmr` {#rmr}

快速删除目录里的所有文件和子目录，效果等同于 `rm -rf`，但该命令直接操纵元数据，不经过内核，所以速度更快。

如果文件系统启用了回收站功能，被删除的文件会进入回收站。详见[「回收站」](../security/trash.md)。

#### 参数

|项 | 说明|
|-|-|
|`--skip-trash`<VersionAdd>1.3</VersionAdd>|跳过垃圾并直接删除文件（需要 root 权限）|
|`--threads=50, -p 50`<VersionAdd>1.3</VersionAdd>|用于删除作业的最大线程数（最大 255 个）|

#### 概览

```shell
juicefs rmr PATH ...

juicefs rmr /mnt/jfs/foo
```

### `juicefs sync` {#sync}

在两个存储之间同步数据，阅读[「数据同步」](../guide/sync.md)以了解更多。

#### 概览

```shell
juicefs sync [command options] SRC DST

# 从 OSS 同步到 S3
juicefs sync oss://mybucket.oss-cn-shanghai.aliyuncs.com s3://mybucket.s3.us-east-2.amazonaws.com

# 从 S3 直接同步到 JuiceFS
juicefs sync s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/

# 源端：a1/b1, a2/b2, aaa/b1   目标端：empty   同步结果：aaa/b1
juicefs sync --exclude='a?/b*' s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/

# 源端：a1/b1, a2/b2, aaa/b1   目标端：empty   同步结果：a1/b1, aaa/b1
juicefs sync --include='a1/b1' --exclude='a[1-9]/b*' s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/

# 源端：a1/b1, a2/b2, aaa/b1, b1, b2  目标端：empty   同步结果：b2
juicefs sync --include='a1/b1' --exclude='a*' --include='b2' --exclude='b?' s3://mybucket.s3.us-east-2.amazonaws.com/ jfs://META-URL/
```

源路径（`SRC`）和目标路径（`DST`）的格式均为：

```
[NAME://][ACCESS_KEY:SECRET_KEY[:SESSIONTOKEN]@]BUCKET[.ENDPOINT][/PREFIX]
```

其中：

- `NAME`：JuiceFS 支持的数据存储类型，比如 `s3`、`oss`，完整列表见[文档](../reference/how_to_set_up_object_storage.md#supported-object-storage)。
- `ACCESS_KEY` 和 `SECRET_KEY`：访问数据存储所需的密钥信息，特殊字符需要进行 [URL 转义](https://www.w3schools.com/tags/ref_urlencode.ASP)并替换，比如 `/` 需要被替换为 `%2F`。另外，如果不清楚如何获取对象存储的 AKSK，参考[这里](../reference/how_to_set_up_object_storage.md#aksk)。
- `TOKEN` 用来访问对象存储的 token，部分对象存储支持使用临时的 token 以获得有限时间的权限。
- `BUCKET[.ENDPOINT]`：数据存储服务的访问地址，不同存储类型格式可能不同（MinIO 目前仅支持路径风格），详见[文档](../reference/how_to_set_up_object_storage.md#supported-object-storage)。
- `[/PREFIX]`：可选，源路径和目标路径的前缀，可用于限定只同步某些路径中的数据。

#### 选择条件相关参数 {#sync-selection-related-options}

|项 | 说明|
|-|-|
|`--files-from` <VersionAdd>1.3</VersionAdd>|仅同步给定文件中记录的对象，其每行内容都是待同步对象的相对路径，如果对象是目录建议以 / 结尾|
|`--start=KEY, -s KEY, --end=KEY, -e KEY`|提供 KEY 范围，来指定对象存储的 List 范围。|
|`--end KEY, -e KEY`|同步的最后一个 `KEY`|
|`--exclude=PATTERN`|排除匹配 `PATTERN` 的 Key。参考[「过滤」](../guide/sync.md#filtering)文档了解如何使用。|
|`--include=PATTERN`|不排除匹配 `PATTERN` 的 Key，需要与 `--exclude` 选项配合使用。参考[「过滤」](../guide/sync.md#filtering)文档了解如何使用。|
|`--match-full-path` <VersionAdd>1.2</VersionAdd>|使用「完整路径过滤模式」，默认为 false。参考[「过滤模式」](../guide/sync.md#filtering-mode)文档了解如何使用。|
|`--max-size=SIZE` <VersionAdd>1.2</VersionAdd>|跳过大小大于 `SIZE` 的文件，单位字节|
|`--min-size=SIZE` <VersionAdd>1.2</VersionAdd>|跳过大小小于 `SIZE` 的文件，单位字节|
|`--max-age=DURATION` <VersionAdd>1.2</VersionAdd>|跳过最后修改时间超过 `DURATION` 的文件，单位秒。例如 `--max-age=3600` 表示仅同步在 1 小时内被修改过的文件。|
|`--min-age=DURATION` <VersionAdd>1.2</VersionAdd>|跳过最后修改时间不超过 `DURATION` 的文件，单位秒。例如 `--min-age=3600` 表示仅同步最后修改时间距离当前时间已经超过 1 小时的文件。|
|`--start-time=DURATION` <VersionAdd>1.3</VersionAdd>|跳过开始时间之前修改的文件。例如 `2006-01-02 15:04:05`|
|`--end-time=DURATION` <VersionAdd>1.3</VersionAdd>|跳过结束时间之后修改的文件。例如 `2006-01-02 15:04:05`|
|`--limit=-1`|限制将要处理的对象的数量，默认为 -1 表示不限制|
|`--update, -u`|当源文件更新时（`mtime` 更新），覆盖已存在的文件，默认为 false。|
|`--force-update, -f`|强制覆盖已存在的文件，默认为 false。|
|`--existing, --ignore-non-existing` <VersionAdd>1.1</VersionAdd>|不创建任何新文件，默认为 false。|
|`--ignore-existing` <VersionAdd>1.1</VersionAdd>|不更新任何已经存在的文件，默认为 false。|

#### 行为相关参数 {#sync-action-related-options}

|项 | 说明|
|-|-|
|`--dirs`|同步目录（包括空目录）。|
|`--perms`|保留权限设置，默认为 false。|
|`--links, -l`|将符号链接复制为符号链接，默认为 false，此时会查找并同步符号链接所指向的文件。|
|`--inplace` <VersionAdd>1.2</VersionAdd>|当源路径的文件被修改时，直接修改目标路径中的同名文件，而不是先在目标路径中写一个临时文件，再将这个临时文件原子重命名到真实的文件名。这个选项只有当 `--update` 选项开启，以及目标路径的存储系统支持原地修改文件（如 JuiceFS、HDFS、NFS）时才有意义，也就是说如果目标路径的存储系统是对象存储开启这个选项是无效的。（默认值：false）|
|`--delete-src, --deleteSrc`|如果目标存储已经存在，删除源存储的对象。与 rsync 不同，为保数据安全，首次执行时不会删除源存储文件，只有拷贝成功后再次运行时，扫描确认目标存储已经存在相关文件，才会删除源存储文件。|
|`--delete-dst, --deleteDst`|删除目标存储下的不相关对象。|
|`--check-all`|校验源路径和目标路径中所有文件的数据完整性，默认为 false。校验方式是基于字节流对比，因此也将带来相应的开销。|
|`--check-new`|校验新拷贝文件的数据完整性，默认为 false。校验方式是基于字节流对比，因此也将带来相应的开销。|
|`--check-change` <VersionAdd>1.3</VersionAdd>|校验同步前后的数据是否有变更，默认为 false。校验方式基于文件大小和 mtime，比较轻量。|
|`--max-failure` <VersionAdd>1.3</VersionAdd>|最大允许失败的文件数（-1 表示无限）|
|`--dry`|仅打印执行计划，不实际拷贝文件。|

#### 对象存储相关参数 {#sync-storage-related-options}

|项 | 说明|
|-|-|
|`--threads=10, -p 10`|并发线程数，默认为 10。|
|`--list-threads=1` <VersionAdd>1.1</VersionAdd>|并发 `list` 线程数，默认为 1。阅读[并发 `list`](../guide/sync.md#concurrent-list)以了解如何使用。|
|`--list-depth=1` <VersionAdd>1.1</VersionAdd>|并发 `list` 目录深度，默认为 1。阅读[并发 `list`](../guide/sync.md#concurrent-list)以了解如何使用。|
|`--no-https`|不要使用 HTTPS，默认为 false。|
|`--storage-class value` <VersionAdd>1.1</VersionAdd>|目标端的新建文件的存储类型。|
|`--bwlimit=0`|限制最大带宽，单位 Mbps，默认为 0 表示不限制。|

#### 分布式相关参数 {#sync-cluster-related-options}

|项 | 说明 |
|-|-|
|`--manager-addr=ADDR`| 分布式同步模式中，Manager 节点的监听地址，格式：`<IP>:[port]`，如果不写端口，则监听随机端口。如果没有该参数，则监听本机随机的 IPv4 地址与随机端口。|
|`--worker=ADDR,ADDR`| 分布式同步模式中，工作节点列表，使用逗号分隔。|

#### 监控相关参数 {#sync-metrics-related-options}

|项 | 说明|
|-|-|
|`--metrics value` <VersionAdd>1.2</VersionAdd>|导出监控指标的地址（默认值："127.0.0.1:9567"）|
|`--consul value` <VersionAdd>1.2</VersionAdd>|用于注册的 Consul 地址（默认值："127.0.0.1:8500"）|

### `juicefs clone` <VersionAdd>1.1</VersionAdd> {#clone}

快速在同一挂载点下克隆目录或者文件，只拷贝元数据但不拷贝数据块，因此拷贝速度非常快。更多介绍详见[「克隆文件或目录」](../guide/clone.md)。

#### 概览

```shell
juicefs clone [command options] SRC DST

# 克隆文件
juicefs clone /mnt/jfs/file1 /mnt/jfs/file2

# 克隆目录
juicefs clone /mnt/jfs/dir1 /mnt/jfs/dir2

# 克隆时保留文件的 UID、GID 和 mode
juicefs clone -p /mnt/jfs/file1 /mnt/jfs/file2
```

#### 参数

|项 | 说明|
|-|-|
|`--preserve, -p`|克隆时默认使用当前用户的 UID 和 GID，而 mode 则使用当前用户的 umask 重新计算获得。如果启用该选项，则保留文件的 UID、GID 和 mode。|

### `juicefs compact` <VersionAdd>1.2</VersionAdd> {#compact}

对给定的目录执行碎片整理，合并或清理不连续的 slice，从而提升读性能。详细介绍参考[「状态检查和维护」](../administration/status_check_and_maintenance.md)。

#### 概览

```shell
juicefs compact [command options] PATH

# 对给定目录执行碎片整理
juicefs compact /mnt/jfs
```

#### 参数

|项 | 说明|
|-|-|
|`--threads, -p`| 并发执行任务的线程数（默认：10） |


================================================
FILE: docs/zh_cn/reference/fuse_mount_options.md
================================================
---
title: FUSE 挂载选项
sidebar_position: 5
slug: /fuse_mount_options
---

JuiceFS 文件系统为用户提供多种访问方式，FUSE 是其中较为常用的一种，即使用 `juicefs mount` 命令将文件系统挂载到本地的方式。用户可以根据需要添加 FUSE 支持的挂载选项，从而实现更细粒度的控制。

本指南介绍 JuiceFS 常用的 FUSE 挂载选项，有两种添加挂载选项的方式：

1. 手动执行 [`juicefs mount`](../reference/command_reference.mdx#mount) 命令时，通过 `-o` 选项指定，多个选项使用半角逗号分隔。

   ```bash
   juicefs mount -d -o allow_other,writeback_cache sqlite3://myjfs.db ~/jfs
   ```

2. Linux 发行版通过 `/etc/fstab` 定义自动挂载时，在 `options` 字段处直接添加选项，多个选项使用半角逗号分隔。

   ```
   # <file system>       <mount point>   <type>      <options>           <dump>  <pass>
   redis://localhost:6379/1    /jfs      juicefs     _netdev,writeback_cache   0       0
   ```

## default_permissions

JuiceFS 在挂载时会自动启用该选项，无需显式指定。该选项将启用内核的文件访问权限检查，它会在文件系统之外进行，启用后，内核检查和文件系统检查必须全部成功才允许进一步操作，该选项通常与 `allow_other` 一起使用。

:::tip
内核执行的是标准的 Unix 权限检查，基于 mode bits、UID/GID、目录所有权。
:::

## allow_other

FUSE 默认只有挂载文件系统的用户可以访问挂载点中的文件，`allow_other` 选项可以让其他用户也可以访问挂载点上的文件。当 root 用户挂载时，该选项会自动启用（在 [`fuse.go`](https://github.com/juicedata/juicefs/blob/main/pkg/fuse/fuse.go) 搜索 `AllowOther` 字样），无需显式指定。而如果是普通用户挂载，则需要修改 `/etc/fuse.conf`，在该配置文件中开启 `user_allow_other` 配置选项，才能在普通用户挂载时启用 `allow_other`。

## writeback_cache

:::note 注意
该挂载选项仅在 Linux 3.15 及以上版本内核上支持
:::

FUSE 支持[「writeback-cache 模式」](https://www.kernel.org/doc/Documentation/filesystems/fuse-io.txt)，这意味着 `write()` 系统调用通常可以非常快速地完成。当频繁写入非常小的数据（如 100 字节左右）时，建议启用此挂载选项。

## user_id 和 group_id

这两个选项用来指定文件系统的所有者 ID 和所有者组 ID（不同于文件或目录的 UID、GID），用以做更高层级的权限校验。如果指定了 allow_other 选项，此选项将失效。用法如 `sudo juicefs mount -o user_id=100,group_id=100`。

## debug

该选项会将低层类库（`go-fuse`）的 Debug 信息输出到 `juicefs.log` 中。

:::note 注意
该选项会将低层类库（`go-fuse`）的 Debug 信息输出到 `juicefs.log` 中，需要注意的是，该选项与 JuiceFS 客户端的全局 `--debug` 选项不同，前者是输出 `go-fuse` 类库的调试信息，后者是输出 JuiceFS 客户端的调试信息。详情参考文档[故障诊断和分析](../administration/fault_diagnosis_and_analysis.md)。
:::


================================================
FILE: docs/zh_cn/reference/how_to_set_up_metadata_engine.md
================================================
---
title: 如何设置元数据引擎
sidebar_position: 2
slug: /databases_for_metadata
description: JuiceFS 支持 Redis、TiKV、PostgreSQL、MySQL 等多种数据库作为元数据引擎，本文分别介绍相应的设置和使用方法。
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

:::tip
`META_PASSWORD` 是 JuiceFS v1.0 新增功能，旧版客户端需要[升级](../administration/upgrade.md)后才能使用。
:::

JuiceFS 采用数据和元数据分离的存储架构，元数据可以存储在任意支持的数据库中，称为「元数据存储引擎」。JuiceFS 支持众多元数据存储引擎，各个数据库性能、易用性、场景均有区别，具体性能对比可参考[该文档](../benchmark/metadata_engines_benchmark.md)。

## 元数据存储用量 {#storage-usage}

元数据所需的存储空间跟文件名的长度、文件的类型和长度以及扩展属性等相关，无法准确地估计一个文件系统的元数据存空间需求。简单起见，我们可以根据没有扩展属性的单个小文件所需的存储空间来做近似：

- **键值（Key-Value）数据库**（如 Redis、TiKV）：300 字节／文件
- **关系型数据库**（如 SQLite、MySQL、PostgreSQL）：600 字节／文件

当平均文件更大（超过 64MB），或者文件被频繁修改导致有很多碎片，或者有很多扩展属性，或者平均文件名很长（超过 50 字节），都会导致需要更多的存储空间。

当你需要在两种类型的元数据引擎之间迁移时，就可以据此来估算所需的存储空间。例如，假设你希望将元数据引擎从一个关系型数据库（MySQL）迁移到键值数据库（Redis），如果当前 MySQL 的用量为 30GB，那么目标 Redis 至少需要准备 15GB 以上的内存。反之亦然。

## Redis 兼容数据库

### Redis

JuiceFS 要求使用 4.0 及以上版本的 Redis。JuiceFS 也支持使用 Redis Cluster 作为元数据引擎，但为了避免在 Redis 集群中执行跨节点事务，同一个文件系统的元数据总会坐落于单个 Redis 实例中。

:::tip Redis Cluster 键前缀
使用 Redis Cluster 时，URL 中的数据库编号会被用作**键前缀**，而不是用于实际的数据库选择（因为 Redis Cluster 仅支持数据库 0）。前缀格式为 `{N}`（例如 `{1}`、`{2}`），使用 Redis 哈希标签（hash tag）确保同一个卷的所有键都被路由到同一个槽（slot）。这使得多个 JuiceFS 文件系统可以共享同一个 Redis Cluster：

```shell
# 不同的卷使用不同的数据库编号作为键前缀
juicefs format redis://cluster:6379/1 volume1   # 键前缀为 {1}
juicefs format redis://cluster:6379/2 volume2   # 键前缀为 {2}
```

可以使用以下命令在 Redis Cluster 中验证键：

```shell
redis-cli -c -h <host> -p 6379 keys '{1}*'   # 列出前缀为 {1} 的所有键
```

:::

为了保证元数据安全，JuiceFS 需要 [`maxmemory-policy noeviction`](https://redis.io/docs/reference/eviction/)，否则在启动 JuiceFS 的时候将会尝试将其设置为 `noeviction`，如果设置失败将会打印告警日志。更多可以参考 [Redis 最佳实践](../administration/metadata/redis_best_practices.md)。

#### 创建文件系统

使用 Redis 作为元数据存储引擎时，通常使用以下格式访问数据库：

<Tabs>
  <TabItem value="tcp" label="TCP">

```
redis[s]://[<username>:<password>@]<host>[:<port>]/<db>
```

  </TabItem>
  <TabItem value="unix-socket" label="Unix socket">

```
unix://[<username>:<password>@]<socket-file-path>?db=<db>
```

  </TabItem>
</Tabs>

其中，`[]` 括起来的是可选项，其它部分为必选项。

- 如果开启了 Redis 的 [TLS](https://redis.io/docs/manual/security/encryption) 特性，协议头需要使用 `rediss://`，否则使用 `redis://`。
- `<username>` 是 Redis 6.0 之后引入的，如果没有用户名可以忽略，但密码前面的 `:` 冒号需要保留，如 `redis://:<password>@<host>:6379/1`。
- Redis 监听的默认端口号为 `6379`，如果没有改变默认端口号可以不用填写，如 `redis://:<password>@<host>/1`，否则需要显式指定端口号。
- Redis 支持多个[逻辑数据库](https://redis.io/commands/select)，请将 `<db>` 替换为实际使用的数据库编号。
- 如果需要连接 Redis 哨兵（Sentinel），元数据 URL 的格式会稍有不同，具体请参考[「Redis 最佳实践」](../administration/metadata/redis_best_practices.md#数据可用性)。
- 如果 Redis 的用户名或者密码中包含特殊字符，需要使用单引号进行封闭，避免 shell 进行解释。或者使用环境变量 `REDIS_PASSWORD` 进行传递。

:::tip 提示
一个 Redis 实例默认可以创建 16 个逻辑数据库，而一个逻辑数据库可以创建一个 JuiceFS 文件系统。也就是说，在默认情况下，你可以使用一个 Redis 实例创建 16 个 JuiceFS 文件系统。需要注意，用于 JuiceFS 的逻辑数据库不要和其他应用共享，否则可能会造成数据混乱。
:::

例如，创建名为 `pics` 的文件系统，使用 Redis 的 `1` 号数据库存储元数据：

```shell
juicefs format \
    --storage s3 \
    ... \
    "redis://:mypassword@192.168.1.6:6379/1" \
    pics
```

安全起见，建议使用环境变量 `META_PASSWORD` 或 `REDIS_PASSWORD` 传递数据库密码，例如：

```shell
export META_PASSWORD=mypassword
```

然后就无需在元数据 URL 中设置密码了：

```shell
juicefs format \
    --storage s3 \
    ... \
    "redis://192.168.1.6:6379/1" \
    pics
```

#### 挂载文件系统

如果需要在多台服务器上共享同一个文件系统，必须确保每台服务器都能访问到存储元数据的数据库。

```shell
juicefs mount -d "redis://:mypassword@192.168.1.6:6379/1" /mnt/jfs
```

挂载文件系统也支持用 `META_PASSWORD` 或 `REDIS_PASSWORD` 环境变量传递密码：

```shell
export META_PASSWORD=mypassword
juicefs mount -d "redis://192.168.1.6:6379/1" /mnt/jfs
```

#### 设置 TLS

JuiceFS 同时支持 Redis 的 TLS 单向加密认证和 mTLS 双向加密认证连接。通过 TLS 或 mTLS 连接到 Redis 时均使用 `rediss://` 协议头，但是在使用 TLS 单向加密认证时，不需要指定客户端证书和私钥。

:::note
对 Redis mTLS 功能的支持需要使用 1.1.0 及以上版本的 JuiceFS
:::

当通过 mTLS 连接 Redis 时，需要提供客户端证书和私钥，以及签发客户端证书的 CA 证书进行连接。在 JuiceFS 中，可以通过以下方式设置 mTLS 需要的客户端证书：

```shell
juicefs format --storage s3 \
    ... \
    "rediss://192.168.1.6:6379/1?tls-cert-file=/etc/certs/client.crt&tls-key-file=/etc/certs/client.key&tls-ca-cert-file=/etc/certs/ca.crt"
    pics
```

上面的示例代码使用 `rediss://` 协议头来开启 mTLS 功能，然后使用以下选项来指定客户端证书的路径：

- `tls-cert-file=<path>` 指定客户端证书的路径
- `tls-key-file=<path>` 指定客户端密钥的路径
- `tls-ca-cert-file=<path>` 指定签发客户端证书的 CA 证书路径，它是可选的，如果不指定，客户端会使用系统默认的 CA 证书进行验证。
- `insecure-skip-verify=true` 可以用来跳过对服务端证书的验证

在 URL 指定选项时，以 `?` 符号开头，使用 `&` 符号来分隔多个选项，例如：`?tls-cert-file=client.crt&tls-key-file=client.key`。

上例中的 `/etc/certs` 只是一个目录，实际使用时请替换为你的证书目录，可以使用相对路径或绝对路径。

### KeyDB

[KeyDB](https://keydb.dev) 是 Redis 的开源分支，在开发上保持与 Redis 主线对齐。KeyDB 在 Redis 的基础上实现了多线程支持、更好的内存利用率和更大的吞吐量，另外还支持 [Active Replication](https://github.com/JohnSully/KeyDB/wiki/Active-Replication)，即 Active Active（双活）功能。

:::note 注意
KeyDB 的数据复制是异步的，使用 Active Active（双活）功能可能导致数据一致性问题，请务必充分验证、谨慎使用！
:::

在用于 JuiceFS 元数据存储时，KeyDB 与 Redis 的用法完全一致，这里不再赘述，请参考 [Redis](#redis) 部分使用。

## 键值数据库

### BadgerDB

[BadgerDB](https://github.com/dgraph-io/badger) 是一个 Go 语言开发的嵌入式、持久化的单机 Key-Value 数据库，它的数据库文件存储在本地你指定的目录中。

使用 BadgerDB 作为 JuiceFS 元数据存储引擎时，使用 `badger://` 协议头指定数据库路径。

#### 创建文件系统

无需提前创建 BadgerDB 数据库，直接创建文件系统即可：

```shell
juicefs format badger://$HOME/badger-data myjfs
```

上述命令在当前用户的 `home` 目录创建 `badger-data` 作为数据库目录，并以此作为 JuiceFS 的元数据存储。

#### 挂载文件系统

挂载文件系统时需要指定数据库路径：

```shell
juicefs mount -d badger://$HOME/badger-data /mnt/jfs
```

:::tip 提示
BadgerDB 只允许单进程访问，如果需要执行 `gc`、`fsck`、`dump`、`load` 等操作，需要先卸载文件系统。
:::

### TiKV

[TiKV](https://tikv.org) 是一个分布式事务型的键值数据库，最初作为 PingCAP 旗舰产品 TiDB 的存储层而研发，现已独立开源并从 CNCF 毕业。

TiKV 的测试环境搭建非常简单，使用官方提供的 TiUP 工具即可实现一键部署，具体可参见[这里](https://tikv.org/docs/latest/concepts/tikv-in-5-minutes)。生产环境一般需要至少三个节点来存储三份数据副本，部署步骤可以参考[官方文档](https://tikv.org/docs/latest/deploy/install/install)。

:::note 注意
建议使用独立部署的 TiKV 5.0+ 集群作为 JuiceFS 的元数据引擎
:::

#### 创建文件系统

使用 TiKV 作为元数据引擎时，需要使用如下格式来指定参数：

```shell
tikv://<pd_addr>[,<pd_addr>...]/<prefix>
```

其中 `prefix` 是一个用户自定义的字符串，当多个文件系统或者应用共用一个 TiKV 集群时，设置前缀可以避免混淆和冲突。示例如下：

```shell
juicefs format \
    --storage s3 \
    ... \
    "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" \
    pics
```

#### 设置 TLS

如果需要开启 TLS，可以通过在元数据 URL 后以添加 query 参数的形式设置 TLS 的配置项，目前支持的配置项：

| 配置项      | 值                                                                                                                                                                                                |
|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `ca`        | CA 根证书，用于用 TLS 连接 TiKV/PD                                                                                                                                                                |
| `cert`      | 证书文件路径，用于用 TLS 连接 TiKV/PD                                                                                                                                                             |
| `key`       | 私钥文件路径，用于用 TLS 连接 TiKV/PD                                                                                                                                                             |
| `verify-cn` | 证书通用名称，用于验证调用者身份，[详情](https://docs.pingcap.com/zh/tidb/stable/enable-tls-between-components#%E8%AE%A4%E8%AF%81%E7%BB%84%E4%BB%B6%E8%B0%83%E7%94%A8%E8%80%85%E8%BA%AB%E4%BB%BD) |

例子：

```shell
juicefs format \
    --storage s3 \
    ... \
    "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs?ca=/path/to/ca.pem&cert=/path/to/tikv-server.pem&key=/path/to/tikv-server-key.pem&verify-cn=CN1,CN2" \
    pics
```

#### 挂载文件系统

```shell
juicefs mount -d "tikv://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" /mnt/jfs
```

### etcd

[etcd](https://etcd.io) 是一个高可用高可靠的小规模键值数据库，可以用作 JuiceFS 的元数据存储。

#### 创建文件系统

使用 etcd 作为元数据引擎时，需要使用如下格式来指定 `Meta-URL` 参数：

```
etcd://[user:password@]<addr>[,<addr>...]/<prefix>
```

其中 `user` 和 `password` 是当 etcd 开启了用户认证时需要。`prefix` 是一个用户自定义的字符串，当多个文件系统或者应用共用一个 etcd 集群时，设置前缀可以避免混淆和冲突。示例如下：

```shell
juicefs format etcd://user:password@192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs pics
```

#### 设置 TLS

如果需要开启 TLS，可以通过在元数据 URL 后以添加 query 参数的形式设置 TLS 的配置项，注意证书文件请使用绝对路径，避免后台挂载时找不到文件。

| 配置项               | 值           |
|----------------------|--------------|
| cacert               | CA 根证书    |
| cert                 | 证书文件路径 |
| key                  | 私钥文件路径 |
| server-name          | 服务器名称   |
| insecure-skip-verify | 1            |

例子：

```shell
juicefs format \
    --storage s3 \
    ... \
    "etcd://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs?cert=/path/to/ca.pem&cacert=/path/to/etcd-server.pem&key=/path/to/etcd-key.pem&server-name=etcd" \
    pics
```

#### 挂载文件系统

```shell
juicefs mount -d "etcd://192.168.1.6:2379,192.168.1.7:2379,192.168.1.8:2379/jfs" /mnt/jfs
```

### FoundationDB <VersionAdd>1.1</VersionAdd>

[FoundationDB](https://www.foundationdb.org) 是一个能在多集群服务器上存放大规模结构化数据的分布式数据库。该数据库系统专注于高性能、高可扩展性，且具有不错的容错能力。由于对接 FoundationDB 需要先安装其客户端库，因此 JuiceFS 的发布版本默认不支持，使用前需要自行编译。

#### 编译 JuiceFS

首先安装 FoundationDB 客户端（参考[官方文档](https://apple.github.io/foundationdb/api-general.html#installing-client-binaries)）：

<Tabs>
  <TabItem value="debian" label="Debian 及衍生版本">

```shell
curl -O https://github.com/apple/foundationdb/releases/download/6.3.25/foundationdb-clients_6.3.25-1_amd64.deb
sudo dpkg -i foundationdb-clients_6.3.25-1_amd64.deb
```

  </TabItem>
  <TabItem value="centos" label="RHEL 及衍生版本">

```shell
curl -O https://github.com/apple/foundationdb/releases/download/6.3.25/foundationdb-clients-6.3.25-1.el7.x86_64.rpm
sudo rpm -Uvh foundationdb-clients-6.3.25-1.el7.x86_64.rpm
```

  </TabItem>
</Tabs>

然后编译支持 FoundationDB 的 JuiceFS：

```shell
make juicefs.fdb
```

#### 创建文件系统

使用 FoundationDB 作为元数据引擎时，需要使用如下格式来指定 `Meta-URL` 参数：

```uri
fdb://<cluster_file_path>?prefix=<prefix>
```

其中 `<cluster_file_path>` 为 FoundationDB 的配置文件路径，用来连接 FoundationDB 服务端。`<prefix>` 是一个用户自定义的字符串，当多个文件系统或者应用共用一个 FoundationDB 集群时，设置前缀可以避免混淆和冲突。示例如下：

```shell
juicefs.fdb format \
    --storage s3 \
    ... \
    "fdb:///etc/foundationdb/fdb.cluster?prefix=jfs" \
    pics
```

#### 设置 TLS

如果需要开启 TLS，大体步骤如下，详细信息请参考[官方文档](https://apple.github.io/foundationdb/tls.html)。

##### 使用 OpenSSL 生成 CA 证书

```shell
openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout private.key -out cert.crt
cat cert.crt private.key > fdb.pem
```

##### 配置 TLS

| 命令行选项             | 客户端选项         | 环境变量                   | 目的                               |
|------------------------|--------------------|----------------------------|------------------------------------|
| `tls_certificate_file` | `TLS_cert_path`    | `FDB_TLS_CERTIFICATE_FILE` | 可以从中加载本地证书的文件的路径   |
| `tls_key_file`         | `TLS_key_path`     | `FDB_TLS_KEY_FILE`         | 从中加载私钥的文件的路径           |
| `tls_verify_peers`     | `TLS_verify_peers` | `FDB_TLS_VERIFY_PEERS`     | 用于验证对等证书和会话的字节字符串 |
| `tls_password`         | `TLS_password`     | `FDB_TLS_PASSWORD`         | 表示用于解密私钥的密码的字节字符串 |
| `tls_ca_file`          | `TLS_ca_path`      | `FDB_TLS_CA_FILE`          | 包含要信任的 CA 证书的文件的路径   |

##### 配置服务端 TLS

可以在 `foundationdb.conf` 或者环境变量中配置 TLS 参数，配置文件如下（重点在 `[foundationdb.4500]` 配置中）。

```ini title="foundationdb.conf"
[fdbmonitor]
user = foundationdb
group = foundationdb

[general]
restart-delay = 60
## by default, restart-backoff = restart-delay-reset-interval = restart-delay
# initial-restart-delay = 0
# restart-backoff = 60
# restart-delay-reset-interval = 60
cluster-file = /etc/foundationdb/fdb.cluster
# delete-envvars =
# kill-on-configuration-change = true

## Default parameters for individual fdbserver processes
[fdbserver]
command = /usr/sbin/fdbserver
#public-address = auto:$ID
#listen-address = public
datadir = /var/lib/foundationdb/data/$ID
logdir = /var/log/foundationdb
# logsize = 10MiB
# maxlogssize = 100MiB
# machine-id =
# datacenter-id =
# class =
# memory = 8GiB
# storage-memory = 1GiB
# cache-memory = 2GiB
# metrics-cluster =
# metrics-prefix =

[fdbserver.4500]
public-address = 127.0.0.1:4500:tls
listen-address = public
tls_certificate_file = /etc/foundationdb/fdb.pem
tls_ca_file = /etc/foundationdb/cert.crt
tls_key_file = /etc/foundationdb/private.key
tls_verify_peers= Check.Valid=0

[backup_agent]
command = /usr/lib/foundationdb/backup_agent/backup_agent
logdir = /var/log/foundationdb

[backup_agent.1]
```

除此之外还需将 `fdb.cluster` 中的地址加上 `:tls` 后缀，`fdb.cluster` 示例如下：

```uri title="fdb.cluster"
u6pT9Jhl:ClZfjAWM@127.0.0.1:4500:tls
```

##### 配置客户端

在客户端机器上需要配置 TLS 参数以及 `fdb.cluster`，`fdbcli` 同理。

通过 `fdbcli` 连接时：

```shell
fdbcli --tls_certificate_file=/etc/foundationdb/fdb.pem \
       --tls_ca_file=/etc/foundationdb/cert.crt \
       --tls_key_file=/etc/foundationdb/private.key \
       --tls_verify_peers=Check.Valid=0
```

通过 API 连接时（`fdbcli` 也适用）：

```shell
export FDB_TLS_CERTIFICATE_FILE=/etc/foundationdb/fdb.pem \
export FDB_TLS_CA_FILE=/etc/foundationdb/cert.crt \
export FDB_TLS_KEY_FILE=/etc/foundationdb/private.key \
export FDB_TLS_VERIFY_PEERS=Check.Valid=0
```

#### 挂载文件系统

```shell
juicefs.fdb mount -d \
    "fdb:///etc/foundationdb/fdb.cluster?prefix=jfs" \
    /mnt/jfs
```

## SQL 数据库

每个数据库默认只能被一个 JuiceFS 文件系统所使用，如果想要多个文件系统共享一个数据库，可以通过在 META-URL 中添加 `table_prefix` <VersionAdd>1.3</VersionAdd> query 参数
为不同的文件系统添加不同的表名来前缀实现。例如：`mysql://user:mypassword@(192.168.1.6:3306)/juicefs?table_prefix=volume1`

### MySQL

[MySQL](https://www.mysql.com) 是受欢迎的开源关系型数据库之一，常被作为 Web 应用程序的首选数据库。

>[MariaDB](https://mariadb.org) 是 MySQL 的一个开源分支，由 MySQL 原始开发者维护并保持开源，与 MySQL 高度兼容，在设置元数据引擎方法上也没有任何差别。
>
>[OceanBase](https://www.oceanbase.com)是一款自主研发的分布式关系型数据库，专为处理海量数据和高并发事务而设计，具备高性能、强一致性和高可用性的特点。同时，OceanBase 与 MySQL 高度兼容，在设置元数据引擎方法上也没有任何差别。

#### 创建文件系统

使用 MySQL 作为元数据存储引擎时，需要提前手动创建数据库，通常使用以下格式访问数据库：

<Tabs>
  <TabItem value="tcp" label="TCP">

```
mysql://<username>[:<password>]@(<host>:3306)/<database-name>
```

  </TabItem>
  <TabItem value="unix-socket" label="Unix socket">

```
mysql://<username>[:<password>]@unix(<socket-file-path>)/<database-name>
```

  </TabItem>
</Tabs>

:::note 注意

1. 不要漏掉 URL 两边的 `()` 括号
2. 密码中的特殊字符不需要进行 url 编码

:::

例如：

```shell
juicefs format \
    --storage s3 \
    ... \
    "mysql://user:mypassword@(192.168.1.6:3306)/juicefs" \
    pics
```

更安全的做法是可以通过环境变量 `META_PASSWORD` 传递数据库密码：

```shell
export META_PASSWORD="mypassword"
juicefs format \
    --storage s3 \
    ... \
    "mysql://user@(192.168.1.6:3306)/juicefs" \
    pics
```

要连接到启用 TLS 的 MySQL 服务器，请传递 `tls=true` 参数（或 `tls=skip-verify` 如果使用自签名证书）：

```shell
juicefs format \
    --storage s3 \
    ... \
    "mysql://user:mypassword@(192.168.1.6:3306)/juicefs?tls=true" \
    pics
```

要启用 JuiceFS 到 MySQL 服务器建立连接的超时控制，请传递 `timeout=5s` 参数（时间可自定义）：

```shell
juicefs format \
    --storage s3 \
    ... \
    "mysql://user:mypassword@(192.168.1.6:3306)/juicefs?timeout=5s" \
    pics
```

:::note 注意

设置建立连接超时，在 JuiceFS 和 MySQL 间出现网络故障场景时，能明确控制对 JuiceFS 文件系统进行读写的阻塞时间，从而可控的对网络故障进行响应。

:::

#### 挂载文件系统

```shell
juicefs mount -d "mysql://user:mypassword@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

挂载文件系统也支持用 `META_PASSWORD` 环境变量传递密码：

```shell
export META_PASSWORD="mypassword"
juicefs mount -d "mysql://user@(192.168.1.6:3306)/juicefs" /mnt/jfs
```

要连接到启用 TLS 的 MySQL 服务器，请传递 `tls=true` 参数（或 `tls=skip-verify` 如果使用自签名证书）：

```shell
juicefs mount -d "mysql://user:mypassword@(192.168.1.6:3306)/juicefs?tls=true" /mnt/jfs
```

更多 MySQL 数据库的地址格式示例，[点此查看](https://github.com/Go-SQL-Driver/MySQL/#examples)。

### PostgreSQL

[PostgreSQL](https://www.postgresql.org) 是功能强大的开源关系型数据库，有完善的生态和丰富的应用场景，也可以用来作为 JuiceFS 的元数据引擎。

许多云计算平台都提供托管的 PostgreSQL 数据库服务，也可以按照[使用向导](https://www.postgresqltutorial.com/postgresql-getting-started)自己部署一个。

其他跟 PostgreSQL 协议兼容的数据库（比如 CockroachDB 等) 也可以这样使用。

#### 创建文件系统

使用 PostgreSQL 作为元数据引擎时，需要提前手动创建数据库，使用如下的格式来指定参数：

<Tabs>
  <TabItem value="tcp" label="TCP">

```
postgres://[username][:<password>]@<host>[:5432]/<database-name>[?parameters]
```

  </TabItem>
  <TabItem value="unix-socket" label="Unix socket">

```
postgres://[username][:<password>]@/<database-name>?host=<socket-directories-path>[&parameters]
```

  </TabItem>
</Tabs>

其中，`[]` 括起来的是可选项，其它部分为必选项。

例如：

```shell
juicefs format \
    --storage s3 \
    ... \
    "postgres://user:mypassword@192.168.1.6:5432/juicefs" \
    pics
```

更安全的做法是可以通过环境变量 `META_PASSWORD` 传递数据库密码：

```shell
export META_PASSWORD="mypassword"
juicefs format \
    --storage s3 \
    ... \
    "postgres://user@192.168.1.6:5432/juicefs" \
    pics
```

:::note 说明

1. JuiceFS 默认使用的 public [schema](https://www.postgresql.org/docs/current/ddl-schemas.html) ，如果要使用非 `public schema`，需要在连接字符串中指定 `search_path` 参数，例如 `postgres://user:mypassword@192.168.1.6:5432/juicefs?search_path=pguser1`
2. 如果 `public schema` 并非是 PostgreSQL 服务端配置的 `search_path` 中第一个命中的，则必须在连接字符串中明确设置 `search_path` 参数
3. `search_path` 连接参数原生可以设置为多个 schema，但是目前 JuiceFS 仅支持设置一个。`postgres://user:mypassword@192.168.1.6:5432/juicefs?search_path=pguser1,public` 将被认为不合法
4. 密码中的特殊字符需要进行 url 编码，例如 `|` 需要编码为`%7C`。

:::

#### 挂载文件系统

```shell
juicefs mount -d "postgres://user:mypassword@192.168.1.6:5432/juicefs" /mnt/jfs
```

挂载文件系统也支持用 `META_PASSWORD` 环境变量传递密码：

```shell
export META_PASSWORD="mypassword"
juicefs mount -d "postgres://user@192.168.1.6:5432/juicefs" /mnt/jfs
```

#### 故障排除

JuiceFS 客户端默认采用 SSL 加密连接 PostgreSQL，如果连接时报错  `pq: SSL is not enabled on the server` 说明数据库没有启用 SSL。可以根据业务场景为 PostgreSQL 启用 SSL 加密，也可以在元数据 URL 中添加参数禁用加密验证：

```shell
juicefs format \
    --storage s3 \
    ... \
    "postgres://user@192.168.1.6:5432/juicefs?sslmode=disable" \
    pics
```

元数据 URL 中还可以附加更多参数，[查看详情](https://pkg.go.dev/github.com/lib/pq#hdr-Connection_String_Parameters)。

### SQLite

[SQLite](https://sqlite.org) 是全球广泛使用的小巧、快速、单文件、可靠、全功能的单文件 SQL 数据库引擎。

SQLite 数据库只有一个文件，创建和使用都非常灵活，用它作为 JuiceFS 元数据存储引擎时无需提前创建数据库文件，可以直接创建文件系统：

```shell
juicefs format \
    --storage s3 \
    ... \
    "sqlite3://my-jfs.db" \
    pics
```

以上命令会在当前目录创建名为 `my-jfs.db` 的数据库文件，请 **务必妥善保管** 这个数据库文件！

挂载文件系统：

```shell
juicefs mount -d "sqlite3://my-jfs.db" /mnt/jfs/
```

请注意数据库文件的位置，如果不在当前目录，则需要指定数据库文件的绝对路径，比如：

```shell
juicefs mount -d "sqlite3:///home/herald/my-jfs.db" /mnt/jfs/
```

也可以在连接字符串中添加参数来支持 [PRAGMA 语句](https://www.sqlite.org/pragma.html)：

```shell
"sqlite3://my-jfs.db?cache=shared&_busy_timeout=5000"
```

更多 SQLite 数据库的地址格式示例，请参考 [Go-SQLite3 Driver](https://github.com/mattn/go-sqlite3#connection-string)。

:::note 注意
由于 SQLite 是一款单文件数据库，在不做特殊共享设置的情况下，只有数据库所在的主机可以访问它。对于多台服务器共享同一文件系统的情况，需要使用 Redis 或 MySQL 等数据库。
:::


================================================
FILE: docs/zh_cn/reference/how_to_set_up_object_storage.md
================================================
---
title: 如何设置对象存储
sidebar_position: 3
description: JuiceFS 以对象存储作为数据存储，本文介绍 JuiceFS 支持的对象存储以及相应的配置和使用方法。
---

import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';

通过阅读 [JuiceFS 的技术架构](../introduction/architecture.md)可以了解到，JuiceFS 是一个数据与元数据分离的分布式文件系统，以对象存储作为主要的数据存储，以 Redis、PostgreSQL、MySQL 等数据库作为元数据存储。

## 存储选项 {#storage-options}

在创建 JuiceFS 文件系统时，设置数据存储一般涉及以下几个选项：

- `--storage` 指定文件系统要使用的存储类型，例如：`--storage s3`。
- `--bucket` 指定存储访问地址，例如：`--bucket https://myjuicefs.s3.us-east-2.amazonaws.com`。
- `--access-key` 和 `--secret-key` 指定访问存储时的身份认证信息。

例如，以下命令使用 Amazon S3 对象存储创建文件系统：

```shell
juicefs format --storage s3 \
    --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \
    --access-key abcdefghijklmn \
    --secret-key nmlkjihgfedAcBdEfg \
    redis://192.168.1.6/1 \
    myjfs
```

## 其他选项 {#other-options}

在执行 `juicefs format` 或 `juicefs mount` 命令时，可以在 `--bucket` 选项中以 URL 参数的形式设置一些特别的选项，比如 `https://myjuicefs.s3.us-east-2.amazonaws.com?tls-insecure-skip-verify=true` 中的 `tls-insecure-skip-verify=true` 即为跳过 HTTPS 请求的证书验证环节。

客户端证书也受支持，因为它们通常用于 mTLS 连接，例如：
`https://myjuicefs.s3.us-east-2.amazonaws.com?ca-certs=./path/to/ca&ssl-cert=./path/to/cert&ssl-key=./path/to/privatekey`

## 配置数据分片（Sharding） {#enable-data-sharding}

创建文件系统时，可以通过 [`--shards`](../reference/command_reference.mdx#format-data-format-options) 选项定义多个 Bucket 作为文件系统的底层存储。这样一来，系统会根据文件名哈希值将文件分散到多个 Bucket 中。数据分片技术可以将大规模数据并发写的负载分散到多个 Bucket 中，从而提高写入性能。

启用数据分片功能需要注意以下事项：

- 只能使用同一种对象存储下的多个 bucket
- `--shards` 选项接受一个 0～256 之间的整数，表示将文件分散到多少个 Bucket 中。默认值为 0，表示不启用数据分片功能。
- 需要使用整型数字通配符 `%d` 或许 `%x` 之类指定用户生成 bucket 的 endpoint 的字符串，例如 `"http://192.168.1.18:9000/myjfs-%d"`，可以按照这样的格式预先创建 bucket，也可以在创建文件系统时由 JuiceFS 客户端自动创建；
- 数据分片在创建时设定，创建完毕不允许修改。不可增加或减少 bucket，也不可以取消 shards 功能。

例如，以下命令创建了一个数据分片为 4 的文件系统：

```shell
juicefs format --storage s3 \
    --shards 4 \
    --bucket "https://myjfs-%d.s3.us-east-2.amazonaws.com" \
    ...
```

执行上述命令后，JuiceFS 客户端会创建 4 个 bucket，分别为 `myjfs-0`、`myjfs-1`、`myjfs-2` 和 `myjfs-3`。

## Access Key 和 Secret Key {#aksk}

一般而言，对象存储通过 Access Key ID 和 Access Key Secret 验证用户身份，对应到 JuiceFS 文件系统就是 `--access-key` 和 `--secret-key` 这两个选项（或者简称为 AK、SK）。

创建文件系统时除了使用 `--access-key` 和 `--secret-key` 两个选项显式指定，更安全的做法是通过 `ACCESS_KEY` 和 `SECRET_KEY` 环境变量传递密钥信息，例如：

```shell
export ACCESS_KEY=abcdefghijklmn
export SECRET_KEY=nmlkjihgfedAcBdEfg
juicefs format --storage s3 \
    --bucket https://myjuicefs.s3.us-east-2.amazonaws.com \
    redis://192.168.1.6/1 \
    myjfs
```

公有云通常允许用户创建 IAM（Identity and Access Management）角色，例如：[AWS IAM 角色](https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/id_roles.html) 或 [阿里云 RAM 角色](https://help.aliyun.com/document_detail/93689.html)，可将角色分配给 VM 实例。如果云服务器实例已经拥有读写对象存储的权限，则无需再指定 `--access-key` 和 `--secret-key`。

## 使用临时访问凭证 {#session-token}

永久访问凭证一般有两个部分：Access Key 和 Secret Key，而临时访问凭证一般包括 3 个部分：Access Key、Secret Key 与 token，并且临时访问凭证具有过期时间，一般在几分钟到几个小时之间。

### 如何获取临时凭证 {#how-to-get-temporary-credentials}

不同云厂商的获取方式不同，一般是需要已具有相应权限用户的 Access Key、Secret Key 以及代表临时访问凭证的权限边界的 ARN 作为参数请求访问云服务厂商的 STS 服务器来获取临时访问凭证。这个过程一般可以由云厂商提供的 SDK 简化操作。比如 Amazon S3 获取临时凭证方式可以参考这个[链接](https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/id_credentials_temp_request.html)，阿里云 OSS 获取临时凭证方式可以参考这个[链接](https://help.aliyun.com/document_detail/100624.html)。

### 如何使用临时访问凭证设置对象存储 {#how-to-set-up-object-storage-with-temporary-access-credentials}

使用临时凭证的方式与使用永久凭证差异不大，在格式化文件系统时，将临时凭证的 Access Key、Secret Key、token 分别通过 `--access-key`、`--secret-key`、`--session-token` 设置值即可。例如：

```bash
juicefs format \
    --storage oss \
    --access-key xxxx \
    --secret-key xxxx \
    --session-token xxxx \
    --bucket https://bucketName.oss-cn-hangzhou.aliyuncs.com \
    redis://localhost:6379/1 \
    test1
```

由于临时凭证很快就会过期，所以关键在于格式化文件系统以后，如何在临时凭证过期前更新 JuiceFS 正在使用的临时凭证。一次凭证更新过程分为两步：

1. 在临时凭证过期前，申请好新的临时凭证；
2. 无需停止正在运行的 JuiceFS，直接使用 `juicefs config Meta-URL --access-key xxxx --secret-key xxxx --session-token xxxx` 命令热更新访问凭证。

新挂载的客户端会直接使用新的凭证，已经在运行的所有客户端也会在一分钟内更新自己的凭证。整个更新过程不会影响正在运行的业务。由于临时凭证过期时间较短，所以以上步骤需要**长期循环执行**才能保证 JuiceFS 服务可以正常访问到对象存储。

## 内网和外网 Endpoint {#internal-and-public-endpoint}

通常情况下，对象存储服务提供统一的 URL 进行访问，但云平台会同时提供内网和外网通信线路，比如满足条件的同平台云服务会自动解析通过内网线路访问对象存储，这样不但时延更低，而且内网通信产生的流量是免费的。

另外，一些云计算平台也区分内外网线路，但没有提供统一访问 URL，而是分别提供内网 Endpoint 和外网 Endpoint 地址。

JuiceFS 对这种区分内网外地址的对象存储服务也做了灵活的支持，对于共享同一个文件系统的场景，在满足条件的服务器上通过内网 Endpoint 访问对象存储，其他计算机通过外网 Endpoint 访问，可以这样使用：

- **创建文件系统时**：`--bucket` 建议使用内网 Endpoint 地址；
- **挂载文件系统时**：对于不满足内网线路的客户端，可以通过 `--bucket` 指定外网 Endpoint 地址。

使用内网 Endpoint 创建文件系统可以确保性能更好、延时更低，对于无法通过内网访问的客户端，可以在挂载文件系统时通过 `--bucket` 指定外网 Endpoint 进行挂载访问。

## 存储类 <VersionAdd>1.1</VersionAdd> {#storage-class}

对象存储通常支持多种存储类，如标准存储、低频访问存储、归档存储。不同的存储类会有不同的价格及服务可用性，你可以在创建 JuiceFS 文件系统时通过 [`--storage-class`](../reference/command_reference.mdx#format-data-storage-options) 选项设置默认的存储类，或者在挂载 JuiceFS 文件系统时通过 [`--storage-class`](../reference/command_reference.mdx#mount-data-storage-options) 选项设置一个新的存储类。请查阅你所使用的对象存储的用户手册了解应该如何设置 `--storage-class` 选项的值（如 [Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass)）。

:::note 注意
当使用某些存储类（如归档、深度归档）时，数据无法立即访问，需要提前恢复数据并等待一段时间之后才能访问。
:::

:::note 注意
当使用某些存储类（如低频访问）时，会有最小计费单位，读取数据也可能会产生额外的费用，请查阅你所使用的对象存储的用户手册了解详细信息。
:::

## 使用代理 {#using-proxy}

如果客户端所在的网络环境受防火墙策略或其他因素影响需要通过代理访问外部的对象存储服务，使用的操作系统不同，相应的代理设置方法也不同，请参考相应的用户手册进行设置。

以 Linux 为例，可以通过创建 `http_proxy` 和 `https_proxy` 环境变量设置代理：

```shell
export http_proxy=http://localhost:8035/
export https_proxy=http://localhost:8035/
juicefs format \
    --storage s3 \
    ... \
    myjfs
```

## 支持的存储服务 {#supported-object-storage}

如果你希望使用的存储类型不在列表中，欢迎提交需求 [issue](https://github.com/juicedata/juicefs/issues)。

| 名称                                        | 值         |
|:-------------------------------------------:|:----------:|
| [Amazon S3](#amazon-s3)                     | `s3`       |
| [Google 云存储](#google-cloud)              | `gs`       |
| [Azure Blob 存储](#azure-blob-存储)         | `wasb`     |
| [Backblaze B2](#backblaze-b2)               | `b2`       |
| [IBM 云对象存储](#ibm-云对象存储)           | `ibmcos`   |
| [Oracle 云对象存储](#oracle-云对象存储)     | `s3`       |
| [Scaleway](#scaleway)                       | `scw`      |
| [DigitalOcean Spaces](#digitalocean-spaces) | `space`    |
| [Wasabi](#wasabi)                           | `wasabi`   |
| [Storj DCS](#storj-dcs)                     | `s3`       |
| [Vultr 对象存储](#vultr-对象存储)           | `s3`       |
| [Cloudflare R2](#r2)                        | `s3`       |
| [阿里云 OSS](#阿里云-oss)                   | `oss`      |
| [腾讯云 COS](#腾讯云-cos)                   | `cos`      |
| [华为云 OBS](#华为云-obs)                   | `obs`      |
| [百度云 BOS](#百度-bos)                     | `bos`      |
| [火山引擎 TOS](#volcano-engine-tos)         | `tos`      |
| [金山云 KS3](#金山云-ks3)                   | `ks3`      |
| [青云 QingStor](#青云-qingstor)             | `qingstor` |
| [七牛云 Kodo](#七牛云-kodo)                 | `qiniu`    |
| [天翼云 OOS](#天翼云-oos)                   | `oos`      |
| [移动云 EOS](#移动云-eos)                   | `eos`      |
| [京东云 OSS](#京东云-oss)                   | `s3`       |
| [优刻得 US3](#优刻得-us3)                   | `ufile`    |
| [Ceph RADOS](#ceph-rados)                   | `ceph`     |
| [Ceph RGW](#ceph-rgw)                       | `s3`       |
| [Gluster](#gluster)                         | `gluster`  |
| [Swift](#swift)                             | `swift`    |
| [MinIO](#minio)                             | `minio`    |
| [WebDAV](#webdav)                           | `webdav`   |
| [HDFS](#hdfs)                               | `hdfs`     |
| [Apache Ozone](#apache-ozone)               | `s3`       |
| [Redis](#redis)                             | `redis`    |
| [TiKV](#tikv)                               | `tikv`     |
| [etcd](#etcd)                               | `etcd`     |
| [SQLite](#sqlite)                           | `sqlite3`  |
| [MySQL](#mysql)                             | `mysql`    |
| [PostgreSQL](#postgresql)                   | `postgres` |
| [本地磁盘](#本地磁盘)                       | `file`     |
| [SFTP/SSH](#sftp)                           | `sftp`     |
| [NFS](#nfs)                                 | `nfs`      |

### Amazon S3

S3 支持[两种风格的 endpoint URI](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/userguide/VirtualHosting.html)：「虚拟托管类型」和「路径类型」。区别如下：

- 虚拟托管类型：`https://<bucket>.s3.<region>.amazonaws.com`
- 路径类型：`https://s3.<region>.amazonaws.com/<bucket>`

其中 `<region>` 要替换成实际的区域代码，比如：美国西部（俄勒冈）的区域代码为 `us-west-2`。[点此查看](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions)所有的区域代码。

:::note 注意
AWS 中国的用户，应使用 `amazonaws.com.cn` 域名。相应的区域代码信息[点此查看](https://docs.amazonaws.cn/aws/latest/userguide/endpoints-arns.html)。
:::

:::note 注意
如果 S3 的桶具有公共访问权限（支持匿名访问），请将 `--access-key` 设置为 `anonymous`。
:::

JuiceFS 中可选择任意一种风格来指定存储桶的地址，例如：

<Tabs groupId="amazon-s3-endpoint">
  <TabItem value="virtual-hosted-style" label="虚拟托管类型">

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.s3.<region>.amazonaws.com \
    ... \
    myjfs
```

  </TabItem>
  <TabItem value="path-style" label="路径类型">

```bash
juicefs format \
    --storage s3 \
    --bucket https://s3.<region>.amazonaws.com/<bucket> \
    ... \
    myjfs
```

  </TabItem>
</Tabs>

你也可以将 `--storage` 设置为 `s3` 用来连接 S3 兼容的对象存储，比如：

<Tabs groupId="amazon-s3-endpoint">
  <TabItem value="virtual-hosted-style" label="虚拟托管类型">

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

  </TabItem>
  <TabItem value="path-style" label="路径类型">

```bash
juicefs format \
    --storage s3 \
    --bucket https://<endpoint>/<bucket> \
    ... \
    myjfs
```

  </TabItem>
</Tabs>

:::tip 提示
所有 S3 兼容的对象存储服务其 `--bucket` 选项的格式为 `https://<bucket>.<endpoint>` 或者 `https://<endpoint>/<bucket>`，默认的 `region` 为 `us-east-1`，当需要不同的 `region` 的时候，可以通过环境变量 `AWS_REGION` 或者 `AWS_DEFAULT_REGION` 手动设置。
:::

### Google 云存储 {#google-cloud}

Google 云采用 [IAM](https://cloud.google.com/iam/docs/overview) 管理资源的访问权限，通过对[服务账号](https://cloud.google.com/iam/docs/creating-managing-service-accounts#iam-service-accounts-create-gcloud)授权，可以对云服务器、对象存储的访问权限进行精细化的控制。

对于归属于同一服务账号的云服务器和对象存储，只要该账号赋予了相关资源的访问权限，创建 JuiceFS 文件系统时无需提供身份验证信息，云平台会自行完成鉴权。

对于要从谷歌云平台外部访问对象存储的情况，比如要在本地计算机上使用 Google 云存储创建 JuiceFS 文件系统，则需要配置认证信息。由于 Google 云存储并不使用 Access Key ID 和 Access Key Secret，而是通过服务账号的 JSON 密钥文件验证身份。

请参考[「以服务帐号身份进行身份验证」](https://cloud.google.com/docs/authentication/production)为服务账号创建 JSON 密钥文件并下载到本地计算机，通过 `GOOGLE_APPLICATION_CREDENTIALS` 环境变量定义密钥文件的路径，例如：

```shell
export GOOGLE_APPLICATION_CREDENTIALS="$HOME/service-account-file.json"
```

可以把创建环境变量的命令写入 `~/.bashrc` 或 `~/.profile` 让 Shell 在每次启动时自动设置。

配置了传递密钥信息的环境变量以后，在本地和在 Google 云服务器上创建文件系统的命令是完全相同的。例如：

```bash
juicefs format \
    --storage gs \
    --bucket <bucket>[.region] \
    ... \
    myjfs
```

可以看到，命令中无需包含身份验证信息，客户端会通过前面环境变量设置的 JSON 密钥文件完成对象存储的访问鉴权。同时，由于 bucket 名称是 [全局唯一](https://cloud.google.com/storage/docs/naming-buckets#considerations) 的，创建文件系统时，`--bucket` 选项中只需指定 bucket 名称即可。

### Azure Blob 存储

使用 Azure Blob 存储作为 JuiceFS 的数据存储，请先 [查看文档](https://docs.microsoft.com/zh-cn/azure/storage/common/storage-account-keys-manage) 了解如何查看存储帐户的名称和密钥，它们分别对应 `--access-key` 和 `--secret-key` 选项的值。

`--bucket` 选项的设置格式为 `https://<container>.<endpoint>`，请将其中的 `<container>` 替换为实际的 Blob 容器的名称，将 `<endpoint>` 替换为 `core.windows.net`（Azure 全球）或 `core.chinacloudapi.cn`（Azure 中国）。例如：

```bash
juicefs format \
    --storage wasb \
    --bucket https://<container>.<endpoint> \
    --access-key <storage-account-name> \
    --secret-key <storage-account-access-key> \
    ... \
    myjfs
```

除了使用 `--access-key` 和 `--secret-key` 选项之外，你也可以使用 [连接字符串](https://docs.microsoft.com/zh-cn/azure/storage/common/storage-configure-connection-string) 并通过 `AZURE_STORAGE_CONNECTION_STRING` 环境变量进行设定。例如：

```bash
# Use connection string
export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=XXX;AccountKey=XXX;EndpointSuffix=core.windows.net"
juicefs format \
    --storage wasb \
    --bucket https://<container> \
    ... \
    myjfs
```

:::note 注意
对于 Azure 中国用户，`EndpointSuffix` 的值为 `core.chinacloudapi.cn`。
:::

### Backblaze B2

使用 Backblaze B2 作为 JuiceFS 的数据存储，需要先创建 [application key](https://www.backblaze.com/b2/docs/application_keys.html)，**Application Key ID** 和 **Application Key** 分别对应 Access Key 和 Secret Key。

Backblaze B2 支持两种访问接口：B2 原生 API 和 S3 兼容 API。

#### B2 原生 API

存储类型应设置为 `b2`，`--bucket` 只需设置 bucket 名称。例如：

```bash
juicefs format \
    --storage b2 \
    --bucket <bucket> \
    --access-key <application-key-ID> \
    --secret-key <application-key> \
    ... \
    myjfs
```

#### S3 兼容 API

存储类型应设置为 `s3`，`--bucket` 应指定完整的 bucket 地址。例如：

```bash
juicefs format \
    --storage s3 \
    --bucket https://s3.eu-central-003.backblazeb2.com/<bucket> \
    --access-key <application-key-ID> \
    --secret-key <application-key> \
    ... \
    myjfs
```

### IBM 云对象存储

使用 IBM 云对象存储创建 JuiceFS 文件系统，你首先需要创建 [API key](https://cloud.ibm.com/docs/account?topic=account-manapikey) 和 [instance ID](https://cloud.ibm.com/docs/key-protect?topic=key-protect-retrieve-instance-ID)。**API key** 和 **instance ID** 分别对应 Access Key 和 Secret Key。

IBM 云对象存储为每一个区域提供了 `公网` 和 `内网` 两种 [endpoint 地址](https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-endpoints)，你可以根据实际需要选用。例如：

```bash
juicefs format \
    --storage ibmcos \
    --bucket https://<bucket>.<endpoint> \
    --access-key <API-key> \
    --secret-key <instance-ID> \
    ... \
    myjfs
```

### Oracle 云对象存储

Oracle 云对象存储支持 S3 兼容的形式进行访问，详细请参考[官方文档](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm)。

该对象存储的 `endpoint` 格式为：`${namespace}.compat.objectstorage.${region}.oraclecloud.com`，例如：

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.<endpoint> \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

### Scaleway

使用 Scaleway 对象存储作为 JuiceFS 数据存储，请先 [查看文档](https://www.scaleway.com/en/docs/generate-api-keys) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的设置格式为 `https://<bucket>.s3.<region>.scw.cloud`，请将其中的 `<region>` 替换成实际的区域代码，例如：荷兰阿姆斯特丹的区域代码是 `nl-ams`。[点此查看](https://www.scaleway.com/en/docs/object-storage-feature/#-Core-Concepts) 所有可用的区域代码。

```bash
juicefs format \
    --storage scw \
    --bucket https://<bucket>.s3.<region>.scw.cloud \
    ... \
    myjfs
```

### DigitalOcean Spaces

使用 DigitalOcean Spaces 作为 JuiceFS 数据存储，请先 [查看文档](https://www.digitalocean.com/community/tutorials/how-to-create-a-digitalocean-space-and-api-key) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的设置格式为 `https://<space-name>.<region>.digitaloceanspaces.com`，请将其中的 `<region>` 替换成实际的区域代码，例如：`nyc3`。[点此查看](https://www.digitalocean.com/docs/spaces/#regional-availability) 所有可用的区域代码。

```bash
juicefs format \
    --storage space \
    --bucket https://<space-name>.<region>.digitaloceanspaces.com \
    ... \
    myjfs
```

### Wasabi

使用 Wasabi 作为 JuiceFS 数据存储，请先 [查看文档](https://wasabi-support.zendesk.com/hc/en-us/articles/360019677192-Creating-a-Root-Access-Key-and-Secret-Key) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的设置格式为 `https://<bucket>.s3.<region>.wasabisys.com`，请将其中的  `<region>`  替换成实际的区域代码，例如：US East 1 (N. Virginia) 的区域代码为 `us-east-1`。[点此查看](https://wasabi-support.zendesk.com/hc/en-us/articles/360.15.26031-What-are-the-service-URLs-for-Wasabi-s-different-regions-) 所有可用的区域代码。

```bash
juicefs format \
    --storage wasabi \
    --bucket https://<bucket>.s3.<region>.wasabisys.com \
    ... \
    myjfs
```

:::note 注意
Tokyo (ap-northeast-1) 区域的用户，查看 [这篇文档](https://wasabi-support.zendesk.com/hc/en-us/articles/360039372392-How-do-I-access-the-Wasabi-Tokyo-ap-northeast-1-storage-region-) 了解 endpoint URI 的设置方法。
:::

### Storj DCS

使用 Storj DCS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://docs.storj.io/api-reference/s3-compatible-gateway) 了解如何创建 Access Key 和 Secret Key。

Storj DCS 兼容 AWS S3，存储类型使用 `s3` ，`--bucket` 格式为 `https://gateway.<region>.storjshare.io/<bucket>`。`<region>` 为存储区域，目前 DCS 有三个可用存储区域：us1、ap1 和 eu1。

```shell
juicefs format \
    --storage s3 \
    --bucket https://gateway.<region>.storjshare.io/<bucket> \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

:::caution 特别提示
因为 Storj DCS 的 [ListObjects](https://github.com/storj/gateway-st/blob/main/docs/s3-compatibility.md#listobjects) API 并非完全 S3 兼容（返回结果没有实现排序功能），所以 JuiceFS 的部分功能无法使用，比如 `juicefs gc`，`juicefs fsck`，`juicefs sync`，`juicefs destroy`。另外，使用 `juicefs mount` 时需要关闭[元数据自动备份](../administration/metadata_dump_load.md#backup-automatically)功能，即加上 `--backup-meta 0`。
:::

### Vultr 对象存储

Vultr 的对象存储兼容 S3 API，存储类型使用 `s3`，`--bucket` 格式为 `https://<bucket>.<region>.vultrobjects.com/`。例如：

```shell
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.ewr1.vultrobjects.com/ \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

访问对象存储的 API 密钥可以在 [管理控制台](https://my.vultr.com/objectstorage) 中找到。

### Cloudflare R2 {#r2}

R2 是 Cloudflare 的对象存储服务，提供 S3 兼容的 API，因此用法与 Amazon S3 基本一致。请参照[文档](https://developers.cloudflare.com/r2/data-access/s3-api/tokens)了解如何创建 Access Key 和 Secret Key。

```shell
juicefs format \
    --storage s3 \
    --bucket https://<ACCOUNT_ID>.r2.cloudflarestorage.com/myjfs \
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

对于生产环境，建议通过 `ACCESS_KEY` 和 `SECRET_KEY` 环境变量传递密钥信息，例如：

```shell
export ACCESS_KEY=<your-access-key>
export SECRET_KEY=<your-sceret-key>
juicefs format \
    --storage s3 \
    --bucket https://<ACCOUNT_ID>.r2.cloudflarestorage.com/myjfs \
    ... \
    myjfs
```

:::caution 特别提示
因为 Cloudflare R2 的 `ListObjects` API 并非完全 S3 兼容（返回结果没有实现排序功能），所以 JuiceFS 的部分功能无法使用，比如 `juicefs gc`、`juicefs fsck`、`juicefs sync`、`juicefs destroy`。另外，使用 `juicefs mount` 时需要关闭[元数据自动备份](../administration/metadata_dump_load.md#backup-automatically)功能，即加上 `--backup-meta 0`。
:::

### 阿里云 OSS

使用阿里云 OSS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://help.aliyun.com/document_detail/38738.html) 了解如何创建 Access Key 和 Secret Key。如果你已经创建了 [RAM 角色](https://help.aliyun.com/document_detail/93689.html) 并指派给了云服务器实例，则在创建文件系统时可以忽略 `--access-key` 和 `--secret-key` 选项。

阿里云也支持使用 [Security Token Service (STS)](https://help.aliyun.com/document_detail/100624.html) 作为 OSS 的临时访问身份验证。如果你要使用 STS，请设置  `ALICLOUD_ACCESS_KEY_ID`、`ALICLOUD_ACCESS_KEY_SECRET` 和 `SECURITY_TOKEN` 环境变量，不要设置 `--access-key` and `--secret-key` 选项。例如：

```bash
# Use Security Token Service (STS)
export ALICLOUD_ACCESS_KEY_ID=XXX
export ALICLOUD_ACCESS_KEY_SECRET=XXX
export SECURITY_TOKEN=XXX
juicefs format \
    --storage oss \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

阿里云 OSS 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://help.aliyun.com/document_detail/31834.html)，你可以根据实际的场景选用。

如果你是在阿里云的服务器上创建文件系统，可以在 `--bucket` 选项中直接指定 bucket 名称。例如：

```bash
# 在阿里云中运行
juicefs format \
    --storage oss \
    --bucket <bucket> \
    ... \
    myjfs
```

### 腾讯云 COS

使用腾讯云 COS 作为 JuiceFS 数据存储，Bucket 名称格式为 `<bucket>-<APPID>`，即需要在 bucket 名称后面指定 `APPID`，[点此查看](https://cloud.tencent.com/document/product/436/13312) 如何获取  `APPID` 。

`--bucket` 选项的完整格式为 `https://<bucket>-<APPID>.cos.<region>.myqcloud.com`，请将 `<region>` 替换成你实际使用的存储区域，例如：上海的区域代码为 `ap-shanghai`。[点此查看](https://cloud.tencent.com/document/product/436/6224) 所有可用的区域代码。例如：

```bash
juicefs format \
    --storage cos \
    --bucket https://<bucket>-<APPID>.cos.<region>.myqcloud.com \
    ... \
    myjfs
```

如果你是在腾讯云的服务器上创建文件系统，可以在 `--bucket` 选项中直接指定 bucket 名称。例如：

```bash
# 在腾讯云中运行
juicefs format \
    --storage cos \
    --bucket <bucket>-<APPID> \
    ... \
    myjfs
```

### 华为云 OBS

使用华为云 OBS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://support.huaweicloud.com/usermanual-ca/zh-cn_topic_0046606340.html) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的格式为 `https://<bucket>.obs.<region>.myhuaweicloud.com`，请将 `<region>` 替换成你实际使用的存储区域，例如：北京一的区域代码为 `cn-north-1`。[点此查看](https://developer.huaweicloud.com/endpoint?OBS) 所有可用的区域代码。例如：

```bash
juicefs format \
    --storage obs \
    --bucket https://<bucket>.obs.<region>.myhuaweicloud.com \
    ... \
    myjfs
```

如果是你在华为云的服务器上创建文件系统，可以在 `--bucket` 直接指定 bucket 名称。例如：

```bash
# 在华为云中运行
juicefs format \
    --storage obs \
    --bucket <bucket> \
    ... \
    myjfs
```

### 百度 BOS

使用百度云 BOS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://cloud.baidu.com/doc/Reference/s/9jwvz2egb) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的格式为 `https://<bucket>.<region>.bcebos.com`，请将 `<region>` 替换成你实际使用的存储区域，例如：北京的区域代码为 `bj`。[点此查看](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D%EF%BC%88endpoint%EF%BC%89) 所有可用的区域代码。例如：

```bash
juicefs format \
    --storage bos \
    --bucket https://<bucket>.<region>.bcebos.com \
    ... \
    myjfs
```

如果你是在百度云的服务器上创建文件系统，可以在 `--bucket` 直接指定 bucket 名称。例如：

```bash
# 在百度云中运行
juicefs format \
    --storage bos \
    --bucket <bucket> \
    ... \
    myjfs
```

### 火山引擎 TOS <VersionAdd>1.0.3</VersionAdd> {#volcano-engine-tos}

使用火山引擎 TOS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://www.volcengine.com/docs/6291/65568) 了解如何创建 Access Key 和 Secret Key。

火山引擎 TOS 为每个区域都提供了公网和内网 [endpoint 链接](https://www.volcengine.com/docs/6349/107356)，你可以根据实际的场景选用。

```bash
juicefs format \
    --storage tos \
    --bucket https://<bucket>.<endpoint>\
    ... \
    myjfs
```

### 金山云 KS3

使用金山云 KS3 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://docs.ksyun.com/documents/1386) 了解如何创建 Access Key 和 Secret Key。

金山云 KS3 为每个区域都提供了公网和内网 [endpoint 链接](https://docs.ksyun.com/documents/6761)，你可以根据实际的场景选用。

```bash
juicefs format \
    --storage ks3 \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### 青云 QingStor

使用青云 QingStor 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://docsv3.qingcloud.com/storage/object-storage/api/practices/signature/#%E8%8E%B7%E5%8F%96-access-key) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的格式为 `https://<bucket>.<region>.qingstor.com`，请将 `<region>` 替换成你实际使用的存储区域，例如：北京 3-A 的区域代码为 `pek3a`。[点此查看](https://docs.qingcloud.com/qingstor/#%E5%8C%BA%E5%9F%9F%E5%8F%8A%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D) 所有可用的区域代码。例如：

```bash
juicefs format \
    --storage qingstor \
    --bucket https://<bucket>.<region>.qingstor.com \
    ... \
    myjfs
```

:::note 注意
所有 QingStor 兼容的对象存储服务其 `--bucket` 选项的格式为 `http://<bucket>.<endpoint>`。
:::

### 七牛云 Kodo

使用七牛云 Kodo 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://developer.qiniu.com/af/kb/1479/how-to-access-or-locate-the-access-key-and-secret-key) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的格式为 `https://<bucket>.s3-<region>.qiniucs.com`，请将 `<region>` 替换成你实际使用的存储区域，例如：中国东部的区域代码为 `cn-east-1`。[点此查看](https://developer.qiniu.com/kodo/4088/s3-access-domainname) 所有可用的区域代码。例如：

```bash
juicefs format \
    --storage qiniu \
    --bucket https://<bucket>.s3-<region>.qiniucs.com \
    ... \
    myjfs
```

### 天翼云 OOS

使用天翼云 OOS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://www.ctyun.cn/help2/10000101/10473683) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的格式为 `https://<bucket>.<endpoint>`，例如：

```bash
juicefs format \
    --storage oos \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### 移动云 EOS

使用移动云 EOS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://ecloud.10086.cn/op-help-center/doc/article/24501) 了解如何创建 Access Key 和 Secret Key。

移动云 EOS 为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://ecloud.10086.cn/op-help-center/doc/article/40956)，你可以根据实际的场景选用。例如：

```bash
juicefs format \
    --storage eos \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### 京东云 OSS

使用京东云 OSS 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://docs.jdcloud.com/cn/account-management/accesskey-management) 了解如何创建 Access Key 和 Secret Key。

`--bucket` 选项的格式为 `https://<bucket>.<region>.jdcloud-oss.com`，请将 `<region>` 替换成你实际使用的存储区域，区域代码[点此查看](https://docs.jdcloud.com/cn/object-storage-service/oss-endpont-list) 。例如：

```bash
juicefs format \
    --storage s3 \
    --bucket https://<bucket>.<region>.jdcloud-oss.com \
    ... \
    myjfs
```

### 优刻得 US3

使用优刻得 US3 作为 JuiceFS 数据存储，请先参照 [这篇文档](https://docs.ucloud.cn/uai-censor/access/key) 了解如何创建 Access Key 和 Secret Key。

优刻得 US3（原名 UFile）为每个区域都提供了 `公网` 和 `内网` [endpoint 链接](https://docs.ucloud.cn/ufile/introduction/region)，你可以根据实际的场景选用。例如：

```bash
juicefs format \
    --storage ufile \
    --bucket https://<bucket>.<endpoint> \
    ... \
    myjfs
```

### Ceph RADOS

:::note
JuiceFS v1.0 使用的 `go-ceph` 库版本为 v0.4.0，其支持的 Ceph 最低版本为 Luminous（v12.2.*）。
JuiceFS v1.1 使用的 `go-ceph` 库版本为 v0.18.0，其支持的 Ceph 最低版本为 Octopus（v15.2.*）。
使用前请确认 JuiceFS 与使用的 Ceph 和 `librados` 版本是否匹配，详见 [`go-ceph`](https://github.com/ceph/go-ceph#supported-ceph-versions)、[`librados`](https://docs.ceph.com/en/quincy/rados/api/librados-intro/)。
:::

[Ceph 存储集群](https://docs.ceph.com/en/latest/rados) 具有消息传递层协议，该协议使客户端能够与 Ceph Monitor 和 Ceph OSD 守护程序进行交互。[`librados`](https://docs.ceph.com/en/latest/rados/api/librados-intro) API 使您可以与这两种类型的守护程序进行交互：

- [Ceph Monitor](https://docs.ceph.com/en/latest/rados/configuration/common/#monitors) 维护群集映射的主副本
- [Ceph OSD Daemon (OSD)](https://docs.ceph.com/en/latest/rados/configuration/common/#osds) 将数据作为对象存储在存储节点上

JuiceFS 支持使用基于 `librados` 的本地 Ceph API。您需要分别安装 `librados` 库并重新编译 `juicefs` 二进制文件。

首先安装 `librados`，建议使用匹配你的 Ceph 版本的 `librados`，例如 Ceph 版本是 Octopus（v15.2.x），那么 `librados` 也建议使用 v15.2.x 版本。

<Tabs>
  <TabItem value="debian" label="Debian 及衍生版本">

```bash
sudo apt-get install librados-dev
```

  </TabItem>
  <TabItem value="centos" label="RHEL 及衍生版本">

```bash
sudo yum install librados2-devel
```

  </TabItem>
</Tabs>

然后为 Ceph 编译 JuiceFS（要求 Go 1.20+ 和 GCC 5.4+）：

```bash
make juicefs.ceph
```

在使用 Ceph 时，原本 JuiceFS 客户端的对象存储参数的含义不太相同：

* `--bucket` 是 Ceph 存储池，格式为 `ceph://<pool-name>`，[存储池](https://docs.ceph.com/zh_CN/latest/rados/operations/pools)是用于存储对象的逻辑分区，使用前需要先创建好
* `--access-key` 选项的值是 Ceph 集群名称，默认集群名称是 `ceph`。
* `--secret-key` 选项的值是 [Ceph 客户端用户名](https://docs.ceph.com/en/latest/rados/operations/user-management)，默认用户名是 `client.admin`。

为了连接到 Ceph Monitor，`librados` 将通过搜索默认位置读取 Ceph 的配置文件，并使用找到的第一个。这些位置是：

- `CEPH_CONF` 环境变量
- `/etc/ceph/ceph.conf`
- `~/.ceph/config`
- 在当前工作目录中的 `ceph.conf`

创建一个文件系统：

```bash
juicefs.ceph format \
    --storage ceph \
    --bucket ceph://<pool-name> \
    --access-key <cluster-name> \
    --secret-key <user-name> \
    ... \
    myjfs
```

### Ceph RGW

[Ceph Object Gateway](https://ceph.io/ceph-storage/object-storage) 是在 `librados` 之上构建的对象存储接口，旨在为应用程序提供访问 Ceph 存储集群的 RESTful 网关。Ceph 对象网关支持 S3 兼容的接口，因此我们可以将 `--storage` 设置为 `s3`。

`--bucket` 选项的格式为 `http://<bucket>.<endpoint>`（虚拟托管类型），例如：

```bash
juicefs format \
    --storage s3 \
    --bucket http://<bucket>.<endpoint> \
    ... \
    myjfs
```

### Gluster

[Gluster](https://github.com/gluster/glusterfs) 是一款开源的软件定义分布式存储，单集群能支持 PiB 级别的数据。JuiceFS 通过 `libgfapi` 库与 Gluster 集群交互，使用前需要单独编译。

首先安装 `libgfapi`（版本范围 6.0 - 10.1, [10.4+ 暂不支持](https://github.com/juicedata/juicefs/issues/4043))：

<Tabs>
  <TabItem value="debian" label="Debian 及衍生版本">

```bash
sudo apt-get install uuid-dev libglusterfs-dev glusterfs-common
```

  </TabItem>
  <TabItem value="centos" label="RHEL 及衍生版本">

```bash
sudo yum install glusterfs glusterfs-api-devel glusterfs-libs
```

  </TabItem>
</Tabs>

然后编译支持 Gluster 的 JuiceFS：

```bash
make juicefs.gluster
```

现在我们可以创建出基于 Gluster 的 JuiceFS volume：

```bash
juicefs format \
    --storage gluster \
    --bucket host1,host2,host3/gv0 \
    ... \
    myjfs
```

其中 `--bucket` 选项格式为 `<host[,host...]>/<volume_name>`。注意这里的 `volume_name` 为 Gluster 中的卷名称，与 JuiceFS volume 自身的名字没有直接关系。

### Swift

[OpenStack Swift](https://github.com/openstack/swift) 是一种分布式对象存储系统，旨在从一台计算机扩展到数千台服务器。Swift 已针对多租户和高并发进行了优化。Swift 广泛适用于备份、Web 和移动内容的理想选择，可以无限量存储任何非结构化数据。

`--bucket` 选项格式为 `http://<container>.<endpoint>`，`container` 用来设定对象的命名空间。

**当前，JuiceFS 仅支持  [Swift V1 authentication](https://www.swiftstack.com/docs/cookbooks/swift_usage/auth.html)。**

`--access-key` 选项的值是用户名，`--secret-key` 选项的值是密码。例如：

```bash
juicefs format \
    --storage swift \
    --bucket http://<container>.<endpoint> \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

### MinIO

[MinIO](https://min.io) 是开源的轻量级对象存储，兼容 Amazon S3 API。

使用 Docker 可以很容易地在本地运行一个 MinIO 实例。例如，以下命令通过 `--console-address ":9900"` 为控制台设置并映射了 `9900` 端口，还将 MinIO 的数据路径映射到了当前目录下的 `minio-data` 文件夹中，你可以按需修改这些参数：

```shell
$ sudo docker run -d --name minio \
    -p 9000:9000 \
    -p 9900:9900 \
    -e "MINIO_ROOT_USER=minioadmin" \
    -e "MINIO_ROOT_PASSWORD=minioadmin" \
    -v $PWD/minio-data:/data \
    --restart unless-stopped \
    minio/minio server /data --console-address ":9900"
```

容器创建成功以后使用以下地址访问：

- **MinIO API**：[http://127.0.0.1:9000](http://127.0.0.1:9000)，这也是 JuiceFS 访问对象存储时所使用的的 API
- **MinIO 管理界面**：[http://127.0.0.1:9900](http://127.0.0.1:9900)，用于管理对象存储本身，与 JuiceFS 无关

对象存储初始的 Access Key 和 Secret Key 均为 `minioadmin`。

使用 MinIO 作为 JuiceFS 的数据存储，`--storage` 选项设置为 `minio`。

```bash
juicefs format \
    --storage minio \
    --bucket http://127.0.0.1:9000/<bucket> \
    --access-key minioadmin \
    --secret-key minioadmin \
    ... \
    myjfs
```

:::note

1. 当前，JuiceFS 仅支持路径风格的 MinIO URI 地址，例如：`http://127.0.0.1:9000/myjfs`
1. `MINIO_REGION` 环境变量可以用于设置 MinIO 的 region，如果不设置，默认为 `us-east-1`
1. 面对多节点 MinIO 集群，考虑在 Endpoint 中使用 DNS 域名，解析到各个 MinIO 节点，作为简易负载均衡，比如 `http://minio.example.com:9000/myjfs`

:::

### WebDAV

[WebDAV](https://en.wikipedia.org/wiki/WebDAV) 是 HTTP 的扩展协议，有利于用户间协同编辑和管理存储在万维网服务器的文档。JuiceFS 0.15+ 支持使用 WebDAV 协议的存储系统作为后端数据存储。

你需要将 `--storage` 设置为 `webdav`，并通过 `--bucket` 来指定访问 WebDAV 的地址。如果存储系统启用了用户验证，用户名和密码可以通过 `--access-key` 和 `--secret-key` 来指定，例如：

```bash
juicefs format \
    --storage webdav \
    --bucket http://<endpoint>/ \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

### HDFS

Hadoop 的文件系统 [HDFS](https://hadoop.apache.org) 也可以作为对象存储供 JuiceFS 使用。

当使用 HDFS 作为 JuiceFS 数据存储，`--access-key` 的值设置为用户名，默认的超级用户通常是 `hdfs`。例如：

```bash
juicefs format \
    --storage hdfs \
    --bucket namenode1:8020 \
    --access-key hdfs \
    ... \
    myjfs
```

如果在创建文件系统时不指定 `--access-key`，JuiceFS 会使用执行 `juicefs mount` 命令的用户身份或通过 Hadoop SDK 访问 HDFS 的用户身份。如果该用户没有 HDFS 的读写权限，则程序会失败挂起，发生 IO 错误。

JuiceFS 会尝试基于 `$HADOOP_CONF_DIR` 或 `$HADOOP_HOME` 为 HDFS 客户端加载配置。如果 `--bucket` 选项留空，将使用在 Hadoop 配置中找到的默认 HDFS。

bucket 参数支持格式如下：

- `[hdfs://]namenode:port[/path]`

对于 HA 集群，bucket 参数可以：

- `[hdfs://]namenode1:port,namenode2:port[/path]`
- `[hdfs://]nameservice[/path]`

对于启用 Kerberos 的 HDFS，可以通过 `KRB5KEYTAB` 和 `KRB5PRINCIPAL` 环境变量来指定 keytab 和 principal。

### Apache Ozone

Apache Ozone 是 Hadoop 的分布式对象存储系统，提供了 S3 兼容的 API。所以可以通过 S3 兼容的模式作为对象存储供 JuiceFS 使用。例如：

```bash
juicefs format \
    --storage s3 \
    --bucket http://<endpoint>/<bucket>\
    --access-key <your-access-key> \
    --secret-key <your-sceret-key> \
    ... \
    myjfs
```

### Redis

Redis 既可以作为 JuiceFS 的元数据存储，也可以作为数据存储，但当使用 Redis 作为数据存储时，建议不要存储大规模数据。

#### 单机模式

`--bucket` 选项格式为 `redis://<host>:<port>/<db>`。`--access-key` 选项的值是用户名，`--secret-key` 选项的值是密码。例如：

```bash
juicefs format \
    --storage redis \
    --bucket redis://<host>:<port>/<db> \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

#### Redis Sentinel

Redis Sentinel 模式下，`--bucket` 选项格式为 `redis[s]://MASTER_NAME,SENTINEL_ADDR[,SENTINEL_ADDR]:SENTINEL_PORT[/DB]`。Sentinel 的密码则需要通过 `SENTINEL_PASSWORD_FOR_OBJ` 环境变量来声明。例如：

```bash
export SENTINEL_PASSWORD_FOR_OBJ=sentinel_password
juicefs format \
    --storage redis \
    --bucket redis://masterName,1.2.3.4,1.2.5.6:26379/2  \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

#### Redis 集群

Redis 集群模式下，`--bucket` 选项格式为 `redis[s]://ADDR:PORT,[ADDR:PORT],[ADDR:PORT]`。例如：

```bash
juicefs format \
    --storage redis \
    --bucket redis://127.0.0.1:7000,127.0.0.1:7001,127.0.0.1:7002  \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

### TiKV

[TiKV](https://tikv.org) 是一个高度可扩展、低延迟且易于使用的键值数据库。它提供原始和符合 ACID 的事务键值 API。

TiKV 既可以用作 JuiceFS 的元数据存储，也可以用于 JuiceFS 的数据存储。

:::note 注意
建议使用独立部署的 TiKV 5.0+ 集群作为 JuiceFS 的数据存储
:::

`--bucket` 选项格式类似 `<host>:<port>,<host>:<port>,<host>:<port>`，其中 `<host>` 是 Placement Driver（PD）的地址。`--access-key` 和 `--secret-key` 选项没有作用，可以省略。例如：

```bash
juicefs format \
    --storage tikv \
    --bucket "<host>:<port>,<host>:<port>,<host>:<port>" \
    ... \
    myjfs
```

:::note 注意
不要使用同一个 TiKV 集群来存储元数据和数据，因为 JuiceFS 是使用不同的协议来存储元数据（支持事务的 TxnKV) 和数据 (不支持事务的 RawKV)，TxnKV 的对象名会被编码后存储，即使添加了不同的前缀也可能导致它们的名字冲突。另外，建议启用 [Titan](https://tikv.org/docs/latest/deploy/configure/titan) 来提升存储数据的集群的性能。
:::

#### 设置 TLS

如果需要开启 TLS，可以通过在 Bucket URL 后以添加 query 参数的形式设置 TLS 的配置项，目前支持的配置项：

| 配置项      | 值                                                                                                                                                                                             |
|-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `ca`        | CA 根证书，用于用 TLS 连接 TiKV/PD                                                                                                                                                             |
| `cert`      | 证书文件路径，用于用 TLS 连接 TiKV/PD                                                                                                                                                          |
| `key`       | 私钥文件路径，用于用 TLS 连接 TiKV/PD                                                                                                                                                          |
| `verify-cn` | 证书通用名称，用于验证调用者身份，[详情](https://docs.pingcap.com/zh/tidb/dev/enable-tls-between-components#%E8%AE%A4%E8%AF%81%E7%BB%84%E4%BB%B6%E8%B0%83%E7%94%A8%E8%80%85%E8%BA%AB%E4%BB%BD) |

例子：

```bash
juicefs format \
    --storage tikv \
    --bucket "<host>:<port>,<host>:<port>,<host>:<port>?ca=/path/to/ca.pem&cert=/path/to/tikv-server.pem&key=/path/to/tikv-server-key.pem&verify-cn=CN1,CN2" \
    ... \
    myjfs
```

### etcd

[etcd](https://etcd.io) 是一个高可用高可靠的小规模键值数据库，既可以用作 JuiceFS 的元数据存储，也可以用于 JuiceFS 的数据存储。

etcd 默认会[限制](https://etcd.io/docs/latest/dev-guide/limit)单个请求不能超过 1.5MB，需要将 JuiceFS 的分块大小（`--block-size` 选项）改成 1MB 甚至更低。

`--bucket` 选项需要填 etcd 的地址，格式类似 `<host1>:<port>,<host2>:<port>,<host3>:<port>`。`--access-key` 和 `--secret-key` 选项填用户名和密码，当 etcd 没有启用用户认证时可以省略。例如：

```bash
juicefs format \
    --storage etcd \
    --block-size 1024 \  # 这个选项非常重要
    --bucket "<host1>:<port>,<host2>:<port>,<host3>:<port>/prefix" \
    --access-key myname \
    --secret-key mypass \
    ... \
    myjfs
```

#### 设置 TLS

如果需要开启 TLS，可以通过在 Bucket URL 后以添加 query 参数的形式设置 TLS 的配置项，目前支持的配置项：

| 配置项                 | 值           |
|------------------------|--------------|
| `cacert`               | CA 根证书    |
| `cert`                 | 证书文件路径 |
| `key`                  | 私钥文件路径 |
| `server-name`          | 服务器名称   |
| `insecure-skip-verify` | 1            |

例子：

```bash
juicefs format \
    --storage etcd \
    --bucket "<host>:<port>,<host>:<port>,<host>:<port>?cacert=/path/to/ca.pem&cert=/path/to/server.pem&key=/path/to/key.pem&server-name=etcd" \
    ... \
    myjfs
```

:::note 注意
证书的路径需要使用绝对路径，并且确保所有需要挂载的机器上能用该路径访问到它们。
:::

### SQLite

[SQLite](https://sqlite.org) 是全球广泛使用的小巧、快速、单文件、可靠、全功能的单文件 SQL 数据库引擎。

使用 SQLite 作为数据存储时只需要指定它的绝对路径即可。

```shell
juicefs format \
    --storage sqlite3 \
    --bucket /path/to/sqlite3.db \
    ... \
    myjfs
```

:::note 注意
由于 SQLite 是一款嵌入式数据库，只有数据库所在的主机可以访问它，不能用于多机共享场景。如果格式化时使用的是相对路径，会导致挂载时出问题，请使用绝对路径。
:::

### MySQL

[MySQL](https://www.mysql.com) 是受欢迎的开源关系型数据库之一，常被作为 Web 应用程序的首选数据库，既可以作为 JuiceFS 的元数据引擎也可以用来存储文件数据。跟 MySQL 兼容的 [MariaDB](https://mariadb.org)、[TiDB](https://github.com/pingcap/tidb) 等都可以用来作为数据存储。

使用 MySQL 作为数据存储时，需要提前创建数据库并添加想要权限，通过 `--bucket` 选项指定访问地址，通过 `--access-key` 选项指定用户名，通过 `--secret-key` 选项指定密码，示例如下：

```shell
juicefs format \
    --storage mysql \
    --bucket (<host>:3306)/<database-name> \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

创建文件系统后，JuiceFS 会在该数据库中创建名为 `jfs_blob` 的表用来存储数据。

:::note 注意
不要漏掉 `--bucket` 参数里的括号 `()`。
:::

### PostgreSQL

[PostgreSQL](https://www.postgresql.org) 是功能强大的开源关系型数据库，有完善的生态和丰富的应用场景，既可以作为 JuiceFS 的元数据引擎也可以作为数据存储。其他跟 PostgreSQL 协议兼容的数据库（比如 [CockroachDB](https://github.com/cockroachdb/cockroach) 等) 也可以用来作为数据存储。

创建文件系统时需要先创建好数据库并添加相应读写权限，使用 `--bucket` 选项来指定数据的地址，使用 `--access-key` 选项指定用户名，使用 `--secret-key` 选项指定密码，示例如下：

```shell
juicefs format \
    --storage postgres \
    --bucket <host>:<port>/<db>[?parameters] \
    --access-key <username> \
    --secret-key <password> \
    ... \
    myjfs
```

创建文件系统后，JuiceFS 会在该数据库中创建名为 `jfs_blob` 的表用来存储数据。

#### 故障排除

JuiceFS 客户端默认采用 SSL 加密连接 PostgreSQL，如果连接时报错 `pq: SSL is not enabled on the server` 说明数据库没有启用 SSL。可以根据业务场景为 PostgreSQL 启用 SSL 加密，也可以在 bucket URL 中添加参数 `sslmode=disable` 禁用加密验证。

### 本地磁盘

在创建 JuiceFS 文件系统时，如果没有指定任何存储类型，会默认使用本地磁盘作为数据存储，root 用户默认存储路径为 `/var/jfs`，普通用户默认存储路径为 `~/.juicefs/local`。

例如，以下命令使用本地的 Redis 数据库和本地磁盘创建了一个名为 `myfs` 的文件系统：

```shell
juicefs format redis://localhost:6379/1 myjfs
```

本地存储通常仅用于了解和体验 JuiceFS 的基本功能，创建的 JuiceFS 存储无法被网络内的其他客户端挂载，只能单机使用。

### SFTP/SSH {#sftp}

SFTP 全称 Secure File Transfer Protocol 即安全文件传输协议，它并不是文件存储。准确来说，JuiceFS 是通过 SFTP/SSH 这种文件传输协议对远程主机上的磁盘进行连接和读写，从而让任何启用了 SSH 服务的操作系统都可以作为 JuiceFS 的数据存储来使用。

例如，以下命令使用 SFTP 协议连接远程服务器 `192.168.1.11` ，在用户 `tom` 的 `$HOME` 目录下创建 `myjfs/` 文件夹作为文件系统的数据存储。

```shell
juicefs format  \
    --storage sftp \
    --bucket 192.168.1.11:myjfs/ \
    --access-key tom \
    --secret-key 123456 \
    ...
    redis://localhost:6379/1 myjfs
```

#### 注意事项

- `--bucket` 用来设置服务器的地址及存储路径，格式为 `[sftp://]<IP/Domain>:[port]:<Path>`。注意，目录名应该以 `/` 结尾，端口号为可选项默认为 `22`，例如 `192.168.1.11:22:myjfs/`。
- `--access-key` 用来设置远程服务器的用户名
- `--secret-key` 用来设置远程服务器的密码

### NFS {#nfs}

NFS - Network File System，即网络文件系统，是类 Unix 操作系统中很常用的文件共享服务，它可以让网络内的计算机能够像访问本地文件一样访问远程文件。

JuiceFS 支持使用 NFS 作为底层存储来构建文件系统，提供两种使用方式：本地挂载和直连模式。

#### 本地挂载

JuiceFS v1.1 及之前的版本仅支持本地挂载的方式使用 NFS 作为底层存储，这种方式需要先在本地挂载 NFS 服务器上的目录，然后以本地磁盘的方式使用它来创建 JuiceFS 文件系统。

例如，先把远程 NFS 服务器 `192.168.1.11` 上的 `/srv/data` 目录挂载到本地的 `/mnt/data` 目录，然后再使用 `file` 模式访问。

```shell
$ sudo mount -t nfs 192.168.1.11:/srv/data /mnt/data
$ sudo juicefs format \
    --storage file \
    --bucket /mnt/data \
    ...
    redis://localhost:6379/1 myjfs
```

从 JuiceFS 的角度来看，本地挂载的 NFS 仍然是本地磁盘，所以 `--storage` 选项设置为 `file`。

同理，由于底层存储只能在挂载的设备上访问，所以要在多台设备上共享访问，则需要在每台设备上分别挂载 NFS 共享，或通过 WebDAV、S3 Gateway 等基于网络的方式来提供外部访问。

#### 直连模式

JuiceFS v1.2 及以上版本支持直连模式使用 NFS 作为底层存储，这种方式不需要在本地挂载预先挂载 NFS 目录，而是直接通过 JuiceFS 客户端内置的 NFS 协议访问共享目录。

例如，远程服务器 `/etc/exports` 配置文件导出了下面的 NFS 共享：

```
/srv/data    192.168.1.0/24(rw,sync,no_subtree_check)
```

可以直接使用 JuiceFS 客户端连接 NFS 服务器上的 `/srv/data` 目录来创建文件系统：

```shell
$ sudo juicefs format  \
    --storage nfs \
    --bucket 192.168.1.11:/srv/data \
    ...
    redis://localhost:6379/1 myjfs
```

在直连模式下，`--storage` 选项设置为 `nfs`，`--bucket` 选项设置为 NFS 服务器的地址和共享目录，JuiceFS 客户端会直接连接 NFS 服务器上的目录来读写数据。

**几个注意事项：**

1. JuiceFS 直连 NFS 模式目前仅支持 NFSv3 协议
2. JuiceFS 客户端需要有访问 NFS 共享目录的权限
3. NFS 默认会启用 `root_squash` 功能，当以 root 身份访问 NFS 共享时默认会被挤压成 nobody 用户。为了避免无权 NFS 共享的问题，可以将共享目录的所有者设置为 `nobody:nogroup`，或者为 NFS 共享配置 `no_root_squash` 选项来关闭权限挤压。


================================================
FILE: docs/zh_cn/reference/p8s_metrics.md
================================================
---
title: JuiceFS 监控指标
sidebar_position: 4
---

如果你尚未搭建监控系统、收集 JuiceFS 客户端指标，阅读[「监控」](../administration/monitoring.md)文档了解如何收集这些指标以及可视化。

## 全局标签 {#global-labels}

| 名称       | 描述        |
| ----       | ----------- |
| `vol_name` | Volume 名称 |
| `instance` | 客户端主机名，格式为 `<host>:<port>`。详见[官方文档](https://prometheus.io/docs/concepts/jobs_instances) |
| `mp`       | 挂载点路径，如果是通过 [Prometheus Pushgateway](https://github.com/prometheus/pushgateway) 上报，例如 [JuiceFS Hadoop Java SDK](../administration/monitoring.md#hadoop)，那么 `mp` 标签的值为 `sdk-<PID>` |

## 文件系统 {#file-system}

### 指标

| 名称                            | 描述            | 单位 |
|-------------------------------|---------------|----|
| `juicefs_used_space`          | 总使用空间         | 字节 |
| `juicefs_used_inodes`         | 总 inodes 数量   |    |

## 操作系统 {#operating-system}

### 指标

| 名称                | 描述        | 单位 |
| ----                | ----------- | ---- |
| `juicefs_uptime`    | 总运行时间  | 秒   |
| `juicefs_cpu_usage` | CPU 使用量  | 秒   |
| `juicefs_memory`    | 内存使用量  | 字节 |

## 元数据引擎 {#metadata-engine}

### 指标

| 名称                                              | 描述           | 单位 |
| ----                                              | -----------    | ---- |
| `juicefs_transaction_durations_histogram_seconds` | 事务的延时分布 | 秒   |
| `juicefs_transaction_restart`                     | 事务重启的次数 |      |

## FUSE {#fuse}

### 指标

| 名称                                           | 描述                 | 单位 |
| ----                                           | -----------          | ---- |
| `juicefs_fuse_read_size_bytes`                 | 读请求的大小分布     | 字节 |
| `juicefs_fuse_written_size_bytes`              | 写请求的大小分布     | 字节 |
| `juicefs_fuse_ops_durations_histogram_seconds` | 所有请求的延时分布   | 秒   |
| `juicefs_fuse_open_handlers`                   | 打开的文件和目录数量 |      |

## SDK {#sdk}

### 指标

| 名称                                          | 描述               | 单位 |
| ----                                          | -----------        | ---- |
| `juicefs_sdk_read_size_bytes`                 | 读请求的大小分布   | 字节 |
| `juicefs_sdk_written_size_bytes`              | 写请求的大小分布   | 字节 |
| `juicefs_sdk_ops_durations_histogram_seconds` | 所有请求的延时分布 | 秒   |

## 缓存 {#cache}

### 指标

| 名称                                      | 描述          | 单位 |
|-----------------------------------------|-------------|----|
| `juicefs_blockcache_blocks`             | 缓存块的总个数     |    |
| `juicefs_blockcache_bytes`              | 缓存块的总大小     | 字节 |
| `juicefs_blockcache_hits`               | 命中缓存块的总次数   |    |
| `juicefs_blockcache_miss`               | 没有命中缓存块的总次数 |    |
| `juicefs_blockcache_writes`             | 写入缓存块的总次数   |    |
| `juicefs_blockcache_drops`              | 丢弃缓存块的总次数   |    |
| `juicefs_blockcache_evicts`             | 淘汰缓存块的总次数   |    |
| `juicefs_blockcache_hit_bytes`          | 命中缓存块的总大小   | 字节 |
| `juicefs_blockcache_miss_bytes`         | 没有命中缓存块的总大小 | 字节 |
| `juicefs_blockcache_write_bytes`        | 写入缓存块的总大小   | 字节 |
| `juicefs_blockcache_read_hist_seconds`  | 读缓存块的延时分布   | 秒  |
| `juicefs_blockcache_write_hist_seconds` | 写缓存块的延时分布   | 秒  |
| `juicefs_staging_blocks`                | 暂存路径中的块数    |    |
| `juicefs_staging_block_bytes`           | 暂存路径中块的总字节数 | 秒  |
| `juicefs_staging_block_delay_seconds`   | 暂存块延迟的总秒数 | 秒  |

## 对象存储 {#object-storage}

### 标签

| 名称     | 描述                                              |
| ----     | -----------                                       |
| `method` | 请求对象存储的方法（例如 GET、PUT、HEAD、DELETE） |

### 指标

| 名称                                                 | 描述                     | 单位 |
| ----                                                 | -----------              | ---- |
| `juicefs_object_request_durations_histogram_seconds` | 请求对象存储的延时分布   | 秒   |
| `juicefs_object_request_errors`                      | 请求失败的总次数         |      |
| `juicefs_object_request_data_bytes`                  | 请求对象存储的总数据大小 | 字节 |

## 内部特性 {#internal}

### 指标

| 名称                                     | 描述               | 单位 |
|----------------------------------------| -----------        | ---- |
| `juicefs_compact_size_histogram_bytes` | 合并数据的大小分布 | 字节 |
| `juicefs_used_read_buffer_size_bytes`  | 当前用于读取的缓冲区的大小 |    |

## 数据同步 {#sync}

### 指标

| 名称 | 描述 | 单位 |
|-|-|-|
| `juicefs_sync_scanned` | 从源端扫描的所有对象数量 | |
| `juicefs_sync_handled` | 已经处理过的来自源端的对象数量 | |
| `juicefs_sync_pending` | 等待同步的对象数量 | |
| `juicefs_sync_copied` | 已经同步过的对象数量 | |
| `juicefs_sync_copied_bytes` | 已经同步过的数据总大小 | 字节 |
| `juicefs_sync_skipped` | 同步时被跳过的对象数量 | |
| `juicefs_sync_failed` | 同步时失败的对象数量 | |
| `juicefs_sync_deleted` | 同步时被删除的对象数量 | |
| `juicefs_sync_checked` | 同步时校验过 checksum 的对象数量 | |
| `juicefs_sync_checked_bytes` | 同步时校验过 checksum 的数据总大小 | 字节 |


================================================
FILE: docs/zh_cn/reference/posix_compatibility.md
================================================
---
title: POSIX 兼容性
sidebar_position: 6
slug: /posix_compatibility
---

JuiceFS 借助于 pjdfstest 和 LTP 来验证其对 POSIX 的兼容性。

## Pjdfstest

[Pjdfstest](https://github.com/pjd/pjdfstest) 是一个用来帮助验证 POSIX 系统调用的测试集，JuiceFS 通过了其最新的 8813 项测试：

```
All tests successful.

Test Summary Report
-------------------
/root/soft/pjdfstest/tests/chown/00.t          (Wstat: 0 Tests: 1323 Failed: 0)
  TODO passed:   693, 697, 708-709, 714-715, 729, 733
Files=235, Tests=8813, 233 wallclock secs ( 2.77 usr  0.38 sys +  2.57 cusr  3.93 csys =  9.65 CPU)
Result: PASS
```

:::note 注意
测试 pjdfstest 时，需要将 JuiceFS 的回收站关闭，因为 pjdfstest 测试的删除行为是直接删除而非进入回收站，而 JuiceFS 回收站是默认开启的。
关闭回收站命令：`juicefs config <meta-url> --trash-days 0`
:::

此外，JuiceFS 还提供：

- 关闭再打开（close-to-open）一致性。一旦一个文件写入完成并关闭，之后的打开和读操作保证可以访问之前写入的数据。如果是在同一个挂载点，所有写入的数据都可以立即读。
- 重命名以及所有其他元数据操作都是原子的，由元数据引擎的事务机制保证。
- 当文件被删除后，同一个挂载点上如果已经打开了，文件还可以继续访问。
- 支持 mmap
- 支持 fallocate 以及空洞
- 支持扩展属性
- 支持 BSD 锁（flock）
- 支持传统 POSIX 记录锁（fcntl）

:::note 注意
POSIX 记录锁分为**传统锁**和 **OFD 锁**（Open file description locks）两类，它们的加锁操作命令分别为 `F_SETLK` 和 `F_OFD_SETLK`。受限于 FUSE 内核模块的实现，目前 JuiceFS 只支持传统类型的记录锁。更多细节可参见：[https://man7.org/linux/man-pages/man2/fcntl.2.html](https://man7.org/linux/man-pages/man2/fcntl.2.html)。
:::

## LTP

[LTP](https://github.com/linux-test-project/ltp)（Linux Test Project）是一个由 IBM，Cisco 等多家公司联合开发维护的项目，旨在为开源社区提供一个验证 Linux 可靠性和稳定性的测试集。LTP 中包含了各种工具来检验 Linux 内核和相关特性；JuiceFS 通过了其中与文件系统相关的大部分测试例。

### 测试环境

- 测试主机：Amazon EC2: c5d.xlarge (4C 8G)
- 操作系统：Ubuntu 20.04.1 LTS (Kernel `5.4.0-1029-aws`)
- 对象存储：Amazon S3
- JuiceFS 版本：0.17-dev (2021-09-16 292f2b65)

### 测试步骤

1. 在 GitHub 下载 LTP [源码包](https://github.com/linux-test-project/ltp/releases/download/20210524/ltp-full-20210524.tar.bz2)
2. 解压后编译安装：

   ```bash
   tar -jvxf ltp-full-20210524.tar.bz2
   cd ltp-full-20210524
   ./configure
   make all
   make install
   ```

3. 测试工具安装在 `/opt/ltp`，需先切换到此目录：

   ```bash
   cd /opt/ltp
   ```

   测试配置文件在 `runtest` 目录下；为方便测试，删去了 `fs` 和 `syscalls` 中部分压力测试和与文件系统不相关的条目（参见[附录](#附录)，修改后保存到文件 `fs-jfs` 和 `syscalls-jfs`），然后执行命令：

   ```bash
   ./runltp -d /mnt/jfs -f fs_bind,fs_perms_simple,fsx,io,smoketest,fs-jfs,syscalls-jfs
   ```

### 测试结果

```bash
Testcase                                           Result     Exit Value
--------                                           ------     ----------
fcntl17                                            FAIL       7
fcntl17_64                                         FAIL       7
getxattr05                                         CONF       32
ioctl_loop05                                       FAIL       4
ioctl_ns07                                         FAIL       1
lseek11                                            CONF       32
open14                                             CONF       32
openat03                                           CONF       32
setxattr03                                         FAIL       6

-----------------------------------------------
Total Tests: 1270
Total Skipped Tests: 4
Total Failures: 5
Kernel Version: 5.4.0-1029-aws
Machine Architecture: x86_64
```

其中跳过和失败的测试例原因如下：

- fcntl17，fcntl17_64：在 POSIX locks 加锁时需要文件系统自动检测死锁，目前 JuiceFS 尚不支持
- getxattr05：需要设置文件扩展权限 ACL，目前 JuiceFS 尚不支持
- ioctl_loop05，ioctl_ns07，setxattr03：需要调用 `ioctl`，目前 JuiceFS 尚不支持
- lseek11：需要 `lseek` 处理 SEEK_DATA 和 SEEK_HOLE 标记位，目前 JuiceFS 用的是内核通用实现，尚不支持这两个 flags
- open14，openat03：需要 `open` 处理 O_TMPFILE 标记位，由于 FUSE 不支持，JuiceFS 也无法实现

### 附录

在 `fs` 和 `syscalls` 文件中删去的测试例：

```bash
# fs --> fs-jfs
gf01 growfiles -W gf01 -b -e 1 -u -i 0 -L 20 -w -C 1 -l -I r -T 10 -f glseek20 -S 2 -d $TMPDIR
gf02 growfiles -W gf02 -b -e 1 -L 10 -i 100 -I p -S 2 -u -f gf03_ -d $TMPDIR
gf03 growfiles -W gf03 -b -e 1 -g 1 -i 1 -S 150 -u -f gf05_ -d $TMPDIR
gf04 growfiles -W gf04 -b -e 1 -g 4090 -i 500 -t 39000 -u -f gf06_ -d $TMPDIR
gf05 growfiles -W gf05 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -u -f gf07_ -d $TMPDIR
gf06 growfiles -W gf06 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -f g_rand10 -S 2 -d $TMPDIR
gf07 growfiles -W gf07 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -I p -f g_rand13 -S 2 -d $TMPDIR
gf08 growfiles -W gf08 -b -e 1 -u -r 1-5000 -R 0--2 -i 0 -L 30 -C 1 -f g_rand11 -S 2 -d $TMPDIR
gf09 growfiles -W gf09 -b -e 1 -u -r 1-5000 -R 0--1 -i 0 -L 30 -C 1 -I p -f g_rand12 -S 2 -d $TMPDIR
gf10 growfiles -W gf10 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I l -f g_lio14 -S 2 -d $TMPDIR
gf11 growfiles -W gf11 -b -e 1 -u -r 1-5000 -i 0 -L 30 -C 1 -I L -f g_lio15 -S 2 -d $TMPDIR
gf12 mkfifo $TMPDIR/gffifo17; growfiles -b -W gf12 -e 1 -u -i 0 -L 30 $TMPDIR/gffifo17
gf13 mkfifo $TMPDIR/gffifo18; growfiles -b -W gf13 -e 1 -u -i 0 -L 30 -I r -r 1-4096 $TMPDIR/gffifo18
gf14 growfiles -W gf14 -b -e 1 -u -i 0 -L 20 -w -l -C 1 -T 10 -f glseek19 -S 2 -d $TMPDIR
gf15 growfiles -W gf15 -b -e 1 -u -r 1-49600 -I r -u -i 0 -L 120 -f Lgfile1 -d $TMPDIR
gf16 growfiles -W gf16 -b -e 1 -i 0 -L 120 -u -g 4090 -T 101 -t 408990 -l -C 10 -c 1000 -S 10 -f Lgf02_ -d $TMPDIR
gf17 growfiles -W gf17 -b -e 1 -i 0 -L 120 -u -g 5000 -T 101 -t 499990 -l -C 10 -c 1000 -S 10 -f Lgf03_ -d $TMPDIR
gf18 growfiles -W gf18 -b -e 1 -i 0 -L 120 -w -u -r 10-5000 -I r -l -S 2 -f Lgf04_ -d $TMPDIR
gf19 growfiles -W gf19 -b -e 1 -g 5000 -i 500 -t 49900 -T10 -c9 -I p -o O_RDWR,O_CREAT,O_TRUNC -u -f gf08i_ -d $TMPDIR
gf20 growfiles -W gf20 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1-256000:512 -R 512-256000 -T 4 -f gfbigio-$$ -d $TMPDIR
gf21 growfiles -W gf21 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bld-$$ -d $TMPDIR
gf22 growfiles -W gf22 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -T 10 -t 20480 -f gf-bldf-$$ -d $TMPDIR
gf23 growfiles -W gf23 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 512-64000:1024 -R 1-384000 -T 4 -f gf-inf-$$ -d $TMPDIR
gf24 growfiles -W gf24 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -g 20480 -f gf-jbld-$$ -d $TMPDIR
gf25 growfiles -W gf25 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 1024000-2048000:2048 -R 4095-2048000 -T 1 -f gf-large-gs-$$ -d $TMPDIR
gf26 growfiles -W gf26 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -r 128-32768:128 -R 512-64000 -T 4 -f gfsmallio-$$ -d $TMPDIR
gf27 growfiles -W gf27 -b -D 0 -w -g 8b -C 1 -b -i 1000 -u -f gfsparse-1-$$ -d $TMPDIR
gf28 growfiles -W gf28 -b -D 0 -w -g 16b -C 1 -b -i 1000 -u -f gfsparse-2-$$ -d $TMPDIR
gf29 growfiles -W gf29 -b -D 0 -r 1-4096 -R 0-33554432 -i 0 -L 60 -C 1 -u -f gfsparse-3-$$ -d $TMPDIR
gf30 growfiles -W gf30 -D 0 -b -i 0 -L 60 -u -B 1000b -e 1 -o O_RDWR,O_CREAT,O_SYNC -g 20480 -T 10 -t 20480 -f gf-sync-$$ -d $TMPDIR
rwtest01 export LTPROOT; rwtest -N rwtest01 -c -q -i 60s  -f sync 10%25000:$TMPDIR/rw-sync-$$
rwtest02 export LTPROOT; rwtest -N rwtest02 -c -q -i 60s  -f buffered 10%25000:$TMPDIR/rw-buffered-$$
rwtest03 export LTPROOT; rwtest -N rwtest03 -c -q -i 60s -n 2  -f buffered -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-buff-$$
rwtest04 export LTPROOT; rwtest -N rwtest04 -c -q -i 60s -n 2  -f sync -s mmread,mmwrite -m random -Dv 10%25000:$TMPDIR/mm-sync-$$
rwtest05 export LTPROOT; rwtest -N rwtest05 -c -q -i 50 -T 64b 500b:$TMPDIR/rwtest01%f
iogen01 export LTPROOT; rwtest -N iogen01 -i 120s -s read,write -Da -Dv -n 2 500b:$TMPDIR/doio.f1.$$ 1000b:$TMPDIR/doio.f2.$$
quota_remount_test01 quota_remount_test01.sh
isofs isofs.sh

# syscalls --> syscalls-jfs
bpf_prog05 bpf_prog05
cacheflush01 cacheflush01
chown01_16 chown01_16
chown02_16 chown02_16
chown03_16 chown03_16
chown04_16 chown04_16
chown05_16 chown05_16
clock_nanosleep03 clock_nanosleep03
clock_gettime03 clock_gettime03
leapsec01 leapsec01
close_range01 close_range01
close_range02 close_range02
fallocate06 fallocate06
fchown01_16 fchown01_16
fchown02_16 fchown02_16
fchown03_16 fchown03_16
fchown04_16 fchown04_16
fchown05_16 fchown05_16
fcntl06 fcntl06
fcntl06_64 fcntl06_64
getegid01_16 getegid01_16
getegid02_16 getegid02_16
geteuid01_16 geteuid01_16
geteuid02_16 geteuid02_16
getgid01_16 getgid01_16
getgid03_16 getgid03_16
getgroups01_16 getgroups01_16
getgroups03_16 getgroups03_16
getresgid01_16 getresgid01_16
getresgid02_16 getresgid02_16
getresgid03_16 getresgid03_16
getresuid01_16 getresuid01_16
getresuid02_16 getresuid02_16
getresuid03_16 getresuid03_16
getrusage04 getrusage04
getuid01_16 getuid01_16
getuid03_16 getuid03_16
ioctl_sg01 ioctl_sg01
fanotify16 fanotify16
fanotify18 fanotify18
fanotify19 fanotify19
lchown01_16 lchown01_16
lchown02_16 lchown02_16
lchown03_16 lchown03_16
mbind02 mbind02
mbind03 mbind03
mbind04 mbind04
migrate_pages02 migrate_pages02
migrate_pages03 migrate_pages03
modify_ldt01 modify_ldt01
modify_ldt02 modify_ldt02
modify_ldt03 modify_ldt03
move_pages01 move_pages01
move_pages02 move_pages02
move_pages03 move_pages03
move_pages04 move_pages04
move_pages05 move_pages05
move_pages06 move_pages06
move_pages07 move_pages07
move_pages09 move_pages09
move_pages10 move_pages10
move_pages11 move_pages11
move_pages12 move_pages12
msgctl05 msgctl05
msgstress04 msgstress04
openat201 openat201
openat202 openat202
openat203 openat203
madvise06 madvise06
madvise09 madvise09
ptrace04 ptrace04
quotactl01 quotactl01
quotactl04 quotactl04
quotactl06 quotactl06
readdir21 readdir21
recvmsg03 recvmsg03
sbrk03 sbrk03
semctl08 semctl08
semctl09 semctl09
set_mempolicy01 set_mempolicy01
set_mempolicy02 set_mempolicy02
set_mempolicy03 set_mempolicy03
set_mempolicy04 set_mempolicy04
set_thread_area01 set_thread_area01
setfsgid01_16 setfsgid01_16
setfsgid02_16 setfsgid02_16
setfsgid03_16 setfsgid03_16
setfsuid01_16 setfsuid01_16
setfsuid02_16 setfsuid02_16
setfsuid03_16 setfsuid03_16
setfsuid04_16 setfsuid04_16
setgid01_16 setgid01_16
setgid02_16 setgid02_16
setgid03_16 setgid03_16
sgetmask01 sgetmask01
setgroups01_16 setgroups01_16
setgroups02_16 setgroups02_16
setgroups03_16 setgroups03_16
setgroups04_16 setgroups04_16
setregid01_16 setregid01_16
setregid02_16 setregid02_16
setregid03_16 setregid03_16
setregid04_16 setregid04_16
setresgid01_16 setresgid01_16
setresgid02_16 setresgid02_16
setresgid03_16 setresgid03_16
setresgid04_16 setresgid04_16
setresuid01_16 setresuid01_16
setresuid02_16 setresuid02_16
setresuid03_16 setresuid03_16
setresuid04_16 setresuid04_16
setresuid05_16 setresuid05_16
setreuid01_16 setreuid01_16
setreuid02_16 setreuid02_16
setreuid03_16 setreuid03_16
setreuid04_16 setreuid04_16
setreuid05_16 setreuid05_16
setreuid06_16 setreuid06_16
setreuid07_16 setreuid07_16
setuid01_16 setuid01_16
setuid03_16 setuid03_16
setuid04_16 setuid04_16
shmctl06 shmctl06
socketcall01 socketcall01
socketcall02 socketcall02
socketcall03 socketcall03
ssetmask01 ssetmask01
swapoff01 swapoff01
swapoff02 swapoff02
swapon01 swapon01
swapon02 swapon02
swapon03 swapon03
switch01 endian_switch01
sysinfo03 sysinfo03
timerfd04 timerfd04
perf_event_open02 perf_event_open02
statx07 statx07
io_uring02 io_uring02
```


================================================
FILE: docs/zh_cn/reference/spec-limits.md
================================================
---
sidebar_position: 7
---

# 规格限制

## 文件系统限制

以下为 JuiceFS 文件系统的理论限制，实际性能和文件系统规模会受到所采用的的元数据引擎以及对象存储的限制。

* 目录层级：无限制
* 文件名长度：255 字节
* 符号链接长度：4096 字节
* 硬链接个数：2^31
* 单目录的文件数：2^31
* 单个文件系统文件数：无限制
* 单文件长度：2^(26+31)
* 数据量：4EiB


================================================
FILE: docs/zh_cn/release_notes.md
================================================
# 版本更新

:::tip 提示
所有历史版本请查看 [GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面
:::

## 版本号 {#version-number}

JuiceFS 社区版采用[语义化版本号](https://semver.org/lang/zh-CN)标记方式，每个版本号都由三个数字组成 `x.y.z`，分别是主版本号（x）、次版本号（y）和修订号（z）。

1. **主版本号（x）**：主版本号大于等于 `1` 时，表示该版本已经适用于生产环境。当主版本号发生变化时，表明这个版本可能增加了不能向后兼容的重大功能、架构变化或数据格式变化。例如，`v0.8.3` → `v1.0.0` 代表生产就绪，`v1.0.0` → `v2.0.0` 代表架构或功能变化。
2. **次版本号（y）**：次版本号表示该版本增加了一些能够向后兼容的新功能、性能优化和 bug 修复等。例如，`v1.0.0` → `v1.1.0`。
3. **修订号（z）**：修订号表示软件的小更新或者 bug 修复，只是对现有功能的一些小的改动或者修复，不会影响软件兼容性。例如，`v1.0.3` → `v1.0.4`。

## 版本升级 {#changes}

JuiceFS 的客户端只有一个二进制文件，一般情况下升级时只需要用新版本软件替换旧版即可。

### JuiceFS v1.1

:::tip 提示
若您正在使用的版本小于 v1.0，请先[升级到 v1.0 版本](#juicefs-v10)。
:::

JuiceFS 在 v1.1（具体而言，是 v1.1.0-beta2）版本中新增了[**目录用量统计**](https://juicefs.com/docs/zh/community/guide/dir-stats)和[**目录配额**](https://juicefs.com/docs/zh/community/guide/quota#directory-quota)两个功能，且目录配额依赖于用量统计。这两项功能在旧版本客户端中没有，当它们被开启的情况下使用旧客户端写入会导致统计数值出现较大偏差。在升级到 v1.1 时，若您不打算启用这两项新功能，可以直接使用新版本客户端替换升级，无需额外操作。若您打算使用，则建议您在升级前了解以下内容。

#### 默认配置

目前这两项功能的默认配置为：

- 新创建的文件系统，会自动启用

- 已有的文件系统，默认均不启用
  - 目录用量统计可以通过 `juicefs config` 命令单独开启
  - 设置目录配额时，用量统计会自动开启

#### 推荐升级步骤

1. 升级所有客户端软件到 v1.1 版本
2. 拒绝 v1.1 之前的版本再次连接：`juicefs config META-URL --min-client-version 1.1.0-A`
3. 在合适的时间重启服务（重新挂载，重启 gateway 等）
4. 确保所有在线客户端版本都在 v1.1 或以上：`juicefs status META-URL | grep -w Version`
5. 启用新特性，具体参见[目录用量统计](https://juicefs.com/docs/zh/community/guide/dir-stats)和[目录配额](https://juicefs.com/docs/zh/community/guide/quota#directory-quota)

### JuiceFS v1.0

JuiceFS 在 v1.0（具体而言，是 v1.0.0-beta3）版本中有两项兼容性修改。若您原来使用的客户端版本较低，建议您在升级前先了解以下内容。

#### 调整 SQL 表结构以支持非 UTF-8 字符

JuiceFS v1.0 改进了 SQL 元数据引擎对非 UTF-8 字符集的支持。对于已有的文件系统，需要手动调整表结构才能支持非 UTF-8 字符集，建议在升级完所有客户端后再选择访问压力比较低的时候进行操作。

:::note 注意
调整 SQL 表结构时数据库性能可能会下降，影响正在运行的服务。
:::

##### MySQL/MariaDB

```sql
alter table jfs_edge
    modify name varbinary(255) not null;
alter table jfs_symlink
    modify target varbinary(4096) not null;
```

##### PostgreSQL

```sql
alter table jfs_edge
    alter column name type bytea using name::bytea;
alter table jfs_symlink
    alter column target type bytea using target::bytea;
```

##### SQLite

由于 SQLite 不支持修改字段，可以通过 dump 和 load 命令进行迁移，详情参考：[JuiceFS 元数据备份和恢复](administration/metadata_dump_load.md)。

#### 会话管理格式变更

JuiceFS v1.0 使用了新的会话管理格式，历史版本客户端通过 `juicefs status` 或者 `juicefs destroy` 将无法看到 v1.0 客户端产生的会话，新版客户端可以看到所有会话。


================================================
FILE: docs/zh_cn/security/encryption.md
================================================
---
sidebar_position: 1
---
# 数据加密

在数据安全方面，JuiceFS 提供两个方面的数据加密保护：

1. 传输加密
2. 静态数据加密

## 传输加密 {#in-transit}

JuiceFS 的架构决定了它的运行通常涉及与数据库和对象存储之间的网络连接，只要这些服务支持加密连接，JuiceFS 就可以通过其提供的加密通道进行访问。

### 通过 HTTPS 与对象存储连接

公有云对象存储一般会同时支持 HTTP 和 HTTPS，在创建文件系统时如果没有指定协议头，JuiceFS 会默认使用 HTTPS 协议头。例如：

```shell {2}
juicefs format --storage s3 \
  --bucket myjfs.s3.ap-southeast-1.amazonaws.com \
  ...
```

以上命令，客户端会默认将 bucket 识别为 `https://myjfs.s3.ap-southeast-1.amazonaws.com`。

对于服务器和对象存储运行在相同 VPC 网络的情况，如果不需要加密连接，可以明确指定要使用的协议头，例如：`--bucket http://myjfs.s3.ap-southeast-1.amazonaws.com`。

### 通过 TLS/SSL 加密连接到数据库

对于所有[支持的元数据引擎](../reference/how_to_set_up_metadata_engine.md)，只要数据库本身支持并配置了 TLS/SSL 等加密链接，JuiceFS 即可通过其加密通道进行连接。例如，配置了 TLS 加密的 Redis 数据库可以使用 `rediss://` 协议头进行链接：

```shell {3}
juicefs format --storage s3 \
  --bucket myjfs.s3.ap-southeast-1.amazonaws.com \
  "rediss://myredis.ap-southeast-1.amazonaws.com:6379/1" myjfs
```

## 静态数据加密 {#at-rest}

JuiceFS 提供静态数据加密支持，即先加密，再上传。所有存入 JuiceFS 的文件都会在本地完成加密后再上传到对象存储，这可以在对象存储本身被破坏时有效地防止数据泄露。

JuiceFS 的静态数据加密采用混合加密架构：对称加密负责数据加密，非对称加密负责密钥保护。只需在创建文件系统时提供一个私钥即可启用数据加密功能，通过 `JFS_RSA_PASSPHRASE` 环境变量提供私钥密码。在使用上，挂载点对应用程序完全透明，即加密和解密过程对文件系统的访问不会产生影响。

### 加密原理

#### 加密架构设计

JuiceFS 采用**混合加密架构**，包含两个加密层次：

1. **数据加密层**（对称加密 - AES-256-GCM 或 ChaCha20-Poly1305 或 SM4-GCM）
   - **作用**：实际加密用户数据内容
   - **机制**：每个 block 生成唯一的对称密钥 `S` + 随机种子 `N`（均使用 256 位密钥）
   - **优势**：AES-256-GCM 和 ChaCha20-Poly1305 都提供高速加密和完整性验证（AEAD）
   - **标准**：256 位密钥强度符合 NIST 安全标准，ChaCha20-Poly1305 是 RFC 8439 标准算法

2. **密钥保护层**（非对称加密）
   - **作用**：保护对称密钥的安全分发和存储
   - **机制**：使用私钥 `M` 加密每个数据块的对称密钥 `S`
   - **优势**：解决密钥分发难题，避免密钥重用风险
   - **方案**：支持PKCS#1、PKCS#8格式的私钥

需要用户预先为文件系统创建一个全局私钥 `M`。在对象存储中保存的每个对象都将有自己的随机对称密钥 `S`。

符号说明：

- `M` 代表用户自行创建的私钥
- `S` 代表 JuiceFS 客户端为每个文件对象生成的 256 位对称密钥
- `N` 代表 JuiceFS 客户端为每个文件对象生成的随机种子
- `K` 代表 `M` 加密 `S` 得到的密文

![Encryption At-rest](../images/encryption.png)

#### 数据加密过程

- 在写入对象存储之前，数据块会使用 LZ4 或 Zstandard 进行压缩。
- 为每个数据块生成一个随机的 256 位对称密钥 `S` 和一个随机种子 `N`。
- 基于 AES-256-GCM 或 ChaCha20-Poly1305 或 SM4-GCM 使用 `S` 和 `N` 对每个数据块进行加密得到 `encrypted_data`。
- 为了避免对称密钥 `S` 在网络上明文传输，使用 RSA 私钥 `M` 对对称密钥 `S` 进行加密得到密文 `K` 。
- 将加密后的数据 `encrypted_data`、密文 `K` 和随机种子 `N` 组合成对象，然后写入对象存储。

#### 数据解密过程

- 读取整个加密对象（它可能比 4MB 大一点）。
- 解析对象数据得到密文 `K`、随机种子 `N` 和被加密的数据 `encrypted_data`。
- 用私钥解密 `K`，得到对称密钥 `S`。
- 基于 AES-256-GCM 或 ChaCha20-Poly1305 或 SM4-GCM 使用 `S` 和 `N` 解密数据 `encrypted_data` 得到数据块明文。
- 对数据块解压缩。

### 启用静态加密

:::note 注意
静态数据加密功能必须在创建文件系统时启用，已创建的文件系统无法再启用数据加密。
:::

启用静态加密功能的步骤为：

1. 创建私钥
2. 使用私钥创建加密的文件系统
3. 挂载文件系统

#### 第一步 创建私钥

私钥是静态数据加密的关键，一般使用 OpenSSL 手动生成。以下命令将使用 aes256 算法在当前目录生成长度为 2048 位，文件名为 `my-priv-key.pem` 的 RSA 私钥：

```shell
openssl genrsa -out my-priv-key.pem -aes256 2048
```

由于使用了 `aes256` 加密算法，命令行会要求必须为该私钥提供一个至少 4 位的 `Passphrase`，可以简单地把它理解为一个用于加密 RSA 私钥文件本身的密码，它也是 RSA 私钥文件的最后一道安全保障。

:::caution 特别注意
私钥的安全极其重要，需要特别注意以下几点：

- **Passphrase 泄露风险**：如果私钥的 Passphrase 泄露，攻击者可能解密存储在元数据引擎中的私钥，从而危及所有加密数据的安全
- **私钥文件泄露**：如果加密的私钥文件本身泄露，同时 Passphrase 也被获取，将导致严重的安全风险
- **数据不可恢复性**：如果无法提供正确的 Passphrase 来访问存储在元数据引擎中的私钥，**所有的加密数据将永久丢失且无法恢复**

建议专注于保护 Passphrase 的安全，并通过环境变量方式传递，避免在命令行历史中泄露。
:::

#### 第二步 创建加密的文件系统

创建加密的文件系统需要使用 `--encrypt-rsa-key` 选项指定私钥，提供的私钥内容将写入元数据引擎。需要用环境变量 `JFS_RSA_PASSPHRASE` 来指定私钥的 Passphrase。

JuiceFS 支持三种加密算法组合，可以通过 `--encrypt-algo` 选项指定：

- `aes256gcm-rsa`（默认）：使用 AES-256-GCM + RSA（或其他私钥）
- `chacha20-rsa`：使用 ChaCha20-Poly1305 + RSA（或其他私钥）
- `sm4gcm`: 使用 SM4-GCM + SM2（或其他私钥）

1. 用环境变量设置 Passphrase

   ```shell
   export JFS_RSA_PASSPHRASE=the-passwd-for-rsa
   ```

2. 创建文件系统（使用默认的 AES-256-GCM 加密）

   ```shell {2}
   juicefs format --storage s3 \
     --encrypt-rsa-key my-priv-key.pem \
     ...
   ```

   或者明确指定使用 ChaCha20-Poly1305 加密：

   ```shell {2,3}
   juicefs format --storage s3 \
     --encrypt-rsa-key my-priv-key.pem \
     --encrypt-algo chacha20-rsa \
     ...
   ```

3. （可选）删除本地私钥文件

   JuiceFS 在格式化文件系统时会将私钥的内容安全地存储在元数据引擎中。因此，在完成文件系统创建后（除非有特殊的合规性要求），建议您删除本地的私钥文件：

   ```shell
   rm my-priv-key.pem
   ```

   这样只需确保 `JFS_RSA_PASSPHRASE` 环境变量的安全，后续的文件系统挂载和访问只需要提供正确的 Passphrase 即可。

   如果由于合规性要求或其他原因需要保留私钥文件，请务必将私钥文件存储在安全位置，设置严格的访问权限，并确保私钥文件和 Passphrase 分开保管。

#### 第三步 挂载文件系统

挂载加密的文件系统无需指定额外的选项，但在挂载之前需要通先过环境变量设置私钥的 Passphrase。

1. 用环境变量设置 Passphrase

   ```shell
   export JFS_RSA_PASSPHRASE=the-passwd-for-rsa
   ```

2. 挂载文件系统

   ```shell
   juicefs mount redis://127.0.0.1:6379/1 /mnt/myjfs
   ```

### 性能考量

启用加密功能确实会带来一定的性能开销，但现代硬件技术已经让这种影响变得相当可控。具体的性能影响取决于工作负载类型、硬件配置（特别是 CPU 的加密指令集支持）和数据访问模式。

现代 CPU 中 TLS、HTTPS 和 AES-256 这些加密技术都有专门的硬件优化。特别是 Intel 和 AMD 的现代处理器都内置了 AES-NI 指令集，能够以接近原生的速度执行 AES 加密操作，这让数据加密的性能损耗大大降低。

#### 加密算法选择建议

**AES-256-GCM**（默认选择）：

- 在支持 AES-NI 指令集的现代 CPU 上性能优异
- 广泛的行业标准支持和验证
- 适合大多数生产环境

**ChaCha20-Poly1305**：

- 在不支持 AES-NI 的 CPU 上可能提供更好的性能
- 适合 ARM 架构或较旧的 x86 处理器
- 对抗时序攻击具有更好的抗性
- Google 等公司在移动设备和某些服务器环境中的首选算法

在选择加密密钥时，推荐使用 RSA-2048 密钥，它在安全强度和性能表现之间有较好的平衡。RSA-4096 提供更高的安全性，但其解密操作会更慢，在高并发读取场景下可能影响性能。

值得一提的是，加密后的数据会比原始数据稍大一些，主要是因为 AES-256-GCM 和 ChaCha20-Poly1305 加密算法需要添加认证标签（16 字节）和其他加密元数据。

### 安全实践指南

加密方案的安全性不仅取决于算法本身，更在于如何正确地管理和使用加密密钥。以下是一些重要的安全实践建议：

**密钥管理是安全的核心**。私钥的密码应该足够强大——建议使用至少 16 个字符的组合，包含大小写字母、数字和特殊符号。建议通过环境变量传递密码，避免在命令行历史中泄露。

虽然定期更换私钥是个好习惯，但需要注意的是，更换私钥意味着需要重新格式化整个文件系统。因此，在规划私钥轮换策略时，要权衡安全需求和业务连续性。

**访问控制同样重要**。确保您的元数据引擎（无论是 Redis、MySQL 还是其他数据库）都配置了适当的认证和授权机制。对象存储的访问权限也应该遵循最小权限原则，只授予必要的操作权限。

在网络层面，尽量使用 VPC 或私有网络来隔离元数据引擎和对象存储之间的通信流量，减少被中间人攻击的风险。

**监控和审计**能帮助您及时发现异常情况。建议记录所有与加密相关的操作日志，定期检查密钥的使用模式，建立异常访问的检测机制。这样即使发生安全事件，您也能快速响应并采取应对措施。

### 重要注意事项

在使用 JuiceFS 加密功能时，有几个重要的技术限制需要了解：

首先，客户端本地缓存的数据是**不加密的**。虽然只有 root 用户或文件所有者能够访问这些缓存数据，但如果您的使用场景要求端到端的完全加密，就需要考虑额外的保护措施，比如将缓存目录放在加密的文件系统或块存储上。

其次，加密功能有一些固有的限制。文件元数据（如文件名、大小、权限等信息）是不加密的，解密后的数据在内存中也是明文状态。最重要的是，一旦为文件系统启用了加密，就无法再关闭这个功能——加密是不可逆的操作。

在部署规划时，请考虑到加密会带来额外的 CPU 和内存开销。为了确保最佳的兼容性和稳定性，建议所有访问加密文件系统的客户端使用相同或兼容版本的 JuiceFS。

### 适用场景分析

JuiceFS 的加密功能特别适合这些场景：保护云端对象存储中的敏感数据、满足 GDPR、HIPAA 等合规性要求、长期安全存储重要业务数据，以及在多租户环境中实现数据隔离。

不过，如果您需要客户端本地缓存也加密，或者想要为现有的文件系统后期添加加密功能，这个方案可能就不太适合。同样，对于性能要求极其苛刻的应用，或者需要频繁更换密钥但又不能接受重新格式化的场景，也需要慎重考虑。


================================================
FILE: docs/zh_cn/security/posix_acl.md
================================================
---
title: POSIX ACL
description: 本文介绍了 JuiceFS 支持的 POSIX ACL 功能，以及如何启用和使用 ACL 权限。
sidebar_position: 3
---

POSIX ACL（Portable Operating System Interface for Unix - Access Control List）是 Unix-like 操作系统中的一种访问控制机制，可以对文件和目录的访问权限进行更细粒度的控制。

## 版本及兼容性要求

- JuiceFS 从 v1.2 版本开始支持 POSIX ACL；
- 所有版本客户端都可以挂载没有开启 ACL 的卷，不论这些卷是由新版本客户端创建的还是由旧版本客户端创建的；
- ACL 开启后暂不支持取消，因此 `--enable-acl` 选项是关联到卷的。

:::caution 提示
如果计划使用 ACL 功能，建议将所有客户端升级的最新版，避免旧版本客户端影响 ACL 的正确性。
:::

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114691951041052&bvid=BV136MqzFEDD&cid=30526082573&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 启用 ACL

如前所述，可以用新版客户端在创建新卷时开启 ACL，也可以用新版客户端在已创建的卷上开启 ACL。

### 创建新卷并开启 ACL

```shell
juicefs format --enable-acl sqlite3://myjfs.db myjfs
```

### 在已有卷上开启 ACL

使用 `config` 命令为一个已创建的卷开启 ACL 功能：

```shell
juicefs config --enable-acl sqlite3://myjfs.db
```

## 使用方法

为一个文件或目录设置 ACL 权限，可以使用 `setfacl` 命令，例如：

```shell
setfacl -m u:alice:rw- /mnt/jfs/file
```

更多关于 POSIX ACL 的详细规则，请参考：

- [POSIX Access Control Lists on Linux](https://www.usenix.org/legacy/publications/library/proceedings/usenix03/tech/freenix03/full_papers/gruenbacher/gruenbacher_html/main.html)
- [setfacl](https://linux.die.net/man/1/setfacl)
- [JuiceFS ACL 功能全解析，更精细的权限控制](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v12-beta-1-acl)

## 注意事项

- ACL 权限检测需要 [Linux kernel 4.9](https://lkml.iu.edu/hypermail/linux/kernel/1610.0/01531.html) 及以上版本；
- 启用 ACL 会有额外的性能影响。但因为有内存缓存优化，大部分使用场景性能损耗都较低，可参考[压测结果](https://juicefs.com/zh-cn/blog/release-notes/juicefs-v12-beta-1-acl#03-%E6%80%A7%E8%83%BD)。


================================================
FILE: docs/zh_cn/security/trash.md
================================================
---
sidebar_position: 2
---

# 回收站

:::note 注意
此特性需要使用 1.0.0 及以上版本的 JuiceFS。旧版本 JuiceFS 欲使用回收站，需要在升级所有挂载点后通过 `config` 命令手动设置回收站，详见下方示范。
:::

JuiceFS 默认开启回收站功能，你删除的文件会被保存在文件系统根目录下的 `.trash` 目录内，保留指定时间后才将数据真正清理。在清理到来之前，通过 `df -h` 命令看到的文件系统使用量并不会减少，对象存储中的对象也会依然存在。

不论你正在用 `format` 命令初始化文件系统，还是用 `config` 命令调整已有的文件系统，都可以用 [`--trash-days`](../reference/command_reference.mdx#format) 参数来指定回收站保留时长：

```shell
# 初始化新的文件系统
juicefs format META-URL myjfs --trash-days=7

# 修改已有文件系统
juicefs config META-URL --trash-days=7

# 设置为 0 以禁用回收站
juicefs config META-URL --trash-days=0
```

另外，回收站自动清理依赖 JuiceFS 客户端的后台任务，为了保证后台任务能够正常执行，需要至少 1 个在线的挂载点，并且在挂载文件系统时不可以使用 [`--no-bgjob`](../reference/command_reference.mdx#mount-metadata-options) 参数。

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114737031550670&bvid=BV1cFKGzeEjk&cid=30669867418&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 恢复文件 {#recover}

文件被删除时，会根据删除时间，被保存在格式为 `.trash/YYYY-MM-DD-HH/[parent inode]-[file inode]-[file name]` 的目录，其中 `YYYY-MM-DD-HH` 就是删除操作的 UTC 时间。因此只需要确定文件的删除时间，就能在对应的目录中找到他们，来进行恢复操作。

如果已经顺利找到想要恢复的文件，只需将其 `mv` 出来即可：

```shells
mv .trash/2022-11-30-10/[parent inode]-[file inode]-[file name] .
```

被删除的文件会完全丢失其目录结构，在回收站中“平铺”存储，但会在文件名保留父目录的 inode，如果你确实忘记了被误删的文件名，可以使用 [`juicefs info`](../reference/command_reference.mdx#info) 命令先找出父目录的 inode，然后顺藤摸瓜地定位到误删文件。

假设挂载点为 `/jfs`，你误删了 `/jfs/data/config.json`，但无法直接通过 `config.json` 文件名来操作恢复文件（因为你忘了），可以用下方流程反查父目录 inode，然后在回收站中定位文件：

```shell
# 用 info 命令确定父目录 inode
juicefs info /jfs/data

# 在上方的输出中，关注 inode 字段，假设 /jfs/data 这个目录的 inode 为 3
# 使用 find 命令，就能找出该目录下所有被删除的文件
find /jfs/.trash -name '3-*'

# 将该目录下所有文件进行恢复
mv /jfs/.trash/2022-11-30-10/3-* /jfs/data
```

需要注意，只有 root 用户具有回收站目录的写权限，因此只能使用 root 用户能用 `mv` 进行上述恢复操作。普通用户如果有这些文件的读权限，也可以用 `cp` 的方式读取文件，再写到新文件，虽然产生了存储空间浪费，但也能实现恢复文件的效果。

如果误删了结构复杂的目录，用 `mv` 命令手动恢复原样会非常艰难，比方说：

```shell
$ tree data
data
├── app1
│   └── config
│       └── config.json
└── app2
    └── config
        └── config.json

# 删除上方的复杂目录
$ juicefs rmr data

# 文件会在回收站内平铺存储，丢失目录结构
$ tree .trash/2023-08-14-05
.trash/2023-08-14-05
├── 1-12-data
├── 12-13-app1
├── 12-15-app2
├── 13-14-config
├── 14-17-config.json
├── 15-16-config
└── 16-18-config.json
```

正因如此，JuiceFS v1.1 提供了 [`restore`](../reference/command_reference.mdx#restore) 子命令来快速恢复大量误删的文件，以上方目录结构为例，恢复操作如下：

```shell
# 先运行 restore 命令，在回收站内重建目录结构
$ juicefs restore $META_URL 2023-08-14-05

# 预览恢复完毕的目录结构，确定需要恢复的范畴
# 既可以直接用下方命令完整恢复整个目录，也可以单独用 mv 命令恢复某一部分
$ tree .trash/2023-08-14-05
.trash/2023-08-14-05
└── 1-12-data
    ├── app1
    │   └── config
    │       └── config.json
    └── app2
        └── config
            └── config.json

# 增加 --put-back 参数将文件恢复至原位
juicefs restore $META_URL 2023-08-14-05 --put-back
```

## 彻底删除文件 {#purge}

当回收站中的文件到了过期时间，会被自动清理。需要注意的是，文件清理由 JuiceFS 客户端的后台任务（background job，也称 bgjob）执行，默认每小时清理一次，因此面对大量文件过期时，对象存储的清理速度未必和你期望的一样快，可能需要一些时间才能看到存储容量变化。

如果你希望在过期时间到来之前彻底删除文件，需要使用 root 用户身份，用 [`juicefs rmr`](../reference/command_reference.mdx#rmr) 或系统自带的 `rm` 命令来删除回收站目录 `.trash` 中的文件，这样就能立刻释放存储空间。

例如，彻底删除回收站中某个目录：

```shell
juicefs rmr .trash/2022-11-30-10/
```

如果希望更快速删除过期文件，可以挂载多个挂载点来突破单个客户端的删除速度上限。

## 选择性跳过回收站 {#skip}

开启回收站功能后，可以通过 chattr 命令为文件或目录设置's'属性，带有's'属性的文件或目录在被删除时不会进入回收站，而是直接从文件系统中移除。如果父目录设置了's'属性，则该目录下新创建的文件和子目录都会继承该属性，但是已存在的和之后转移过来的文件或目录不会继承这个属性。

需要在挂载时启用`--enable-ioctl`选项，才能使用 chattr 命令修改文件属性。

## 回收站和文件碎片 {#gc}

在回收站里，除了因用户操作而产生的文件，还存在另一类对用户不可见的数据——覆写产生的文件碎片。关于文件碎片是怎么产生的，可以详细阅读[「JuiceFS 如何存储文件」](../introduction/architecture.md#how-juicefs-store-files)。总而言之，如果应用经常删除文件或者频繁覆盖写文件，会导致对象存储使用量远大于文件系统用量。

虽然失效的文件碎片不能直接浏览、操作，但你可以通过 [`juicefs status`](../reference/command_reference.mdx#status) 命令来简单观测其规模：

```shell
# 下方 Trash Slices 就是失效的文件碎片统计
$ juicefs status META-URL --more
...
           Trash Files: 0                     0.0/s
           Trash Files: 0.0 b   (0 Bytes)     0.0 b/s
 Pending Deleted Files: 0                     0.0/s
 Pending Deleted Files: 0.0 b   (0 Bytes)     0.0 b/s
          Trash Slices: 27                    26322.2/s
          Trash Slices: 783.0 b (783 Bytes)   753.1 KiB/s
Pending Deleted Slices: 0                     0.0/s
Pending Deleted Slices: 0.0 b   (0 Bytes)     0.0 b/s
...
```

文件碎片也按照回收站设置的时间进行保留，这对数据安全同样具有重要意义：如果你不小心对文件进行了错误修改，或者覆盖写，一样可以通过元数据备份，把数据找回来（当然，前提是误操作之前已经设置好了元数据备份）。如果确实需要对误修改的文件进行恢复，则需要找回旧版元数据，挂载后手动将文件拷贝出来进行恢复，详见[备份与恢复](../administration/metadata_dump_load.md)。

由于对用户不可见，这些失效的文件碎片无法轻易删除。如果规模巨大，确实需要主动清理它们，可以用以下操作手动处理：

```shell
# 临时禁用回收站
juicefs config META-URL --trash-days 0

# 如果有需要，可以手动触发再次运行碎片合并
juicefs gc --compact

# 运行 gc 命令删除泄露对象
juicefs gc --delete

# 操作完成后，记得重新开启回收站
```

## 访问权限 {#permission}

所有用户均有权限浏览回收站，可以看到所有被删除的文件。然而 `.trash` 目录只有 root 具备写权限，但就算文件被移入回收站，也会保留原先的文件权限，因此在操作回收站内的文件时，注意权限问题并根据情况调整操作用户。

关于回收站的权限问题，还需要注意：

* 当 JuiceFS 客户端由非 root 用户启动时，需要在 mount 时指定 `-o allow_root` 参数，允许 root 用户访问文件系统，否则将无法正常清空回收站。
* `.trash` 目录只能通过文件系统根目录访问，子目录挂载点无法访问。
* 回收站内不允许用户自行创建新的文件，只有 root 才能删除或移动其中的文件。


================================================
FILE: docs/zh_cn/tutorials/aliyun.md
================================================
---
title: 在阿里云使用 JuiceFS
sidebar_position: 7
slug: /clouds/aliyun
---

如下图所示，JuiceFS 存储由数据库和对象存储共同驱动。存入 JuiceFS 的文件会按照一定的规则被拆分成固定大小的数据块存储在对象存储中，数据对应的元数据则会存储在数据库中。

元数据完全独立存储，对文件的检索和处理并不会直接操作对象存储中的数据，而是先在数据库中操作元数据，只有当数据发生变化的时候，才会与对象存储交互。

这样的设计可以有效缩减对象存储在请求数量上的费用，同时也能让我们显著感受到 JuiceFS 带来的性能提升。

![JuiceFS-aliyun](../images/juicefs-aliyun.png)

## 准备

通过前面的架构描述，可以知道 JuiceFS 需要搭配数据库和对象存储一起使用。这里我们直接使用阿里云的 ECS 云服务器，结合云数据库和 OSS 对象存储。

在创建云计算资源时，尽量选择在相同的区域，这样可以让资源之间通过内网线路相互访问，避免使用公网线路产生额外的流量费用。

### 一、云服务器 ECS

JuiceFS 对服务器硬件没有特殊要求，一般来说，云平台上最低配的云服务器也能稳定使用 JuiceFS，通常你只需要选择能够满足自身业务的配置即可。

需要特别说明的是，你不需要为使用 JuiceFS 重新购买服务器或是重装系统，JuiceFS 没有业务入侵性，不会对你现有的系统和程序造成任何的干扰，你完全可以在正在运行的服务器上安装和使用 JuiceFS。

JuiceFS 默认会占用不超过 1GB 的硬盘空间作为缓存，可以根据需要调整缓存空间的大小。该缓存是客户端与对象存储之间的一个数据缓冲层，选择性能更好的云盘，可以获得更好的性能表现。

在操作系统方面，阿里云 ECS 提供的所有操作系统都可以安装 JuiceFS。

**本文使用的 ECS 配置如下：**

| **实例规格**     | ecs.t5-lc1m1.small       |
| ---------------- | ------------------------ |
| **CPU**          | 1 核                     |
| **内存**         | 1 GB                     |
| **存储**         | 40 GB                    |
| **操作系统**     | Ubuntu Server 20.04 64 位 |
| **地域及可用区** | 华东 2（上海）           |

### 二、云数据库

JuiceFS 会将数据对应的元数据全部存储在独立的数据库中，目前已开放支持的数据库有 Redis、MySQL、PostgreSQL、SQLite，以及 OceanBase。

根据数据库类型的不同，带来的元数据性能和可靠性表现也各不相同。比如 Redis 是完全运行在内存上的，它能提供极致的性能，但运维难度较高，可靠性相对低。而 MySQL、PostgreSQL 是关系型数据库，性能不如 Redis，但运维难度不高，可靠性也有一定的保障。SQLite 是单机单文件关系型数据库，性能较低，也不适合用于大规模数据存储，但它免配置，适合单机少量数据存储的场景。相比之下，OceanBase 是一款分布式关系型数据库，能够在提供高性能的同时，确保数据的一致性和高可靠性（RTO < 8s）。它特别适合金融、零售、电信等对事务一致性和分布式能力要求较高的场景，使 JuiceFS 在处理海量元数据时可以实现更高效率、更低延迟和更强稳定性，从而满足现代分布式存储系统对底层数据库的苛刻要求。

如果只是为了评估 JuiceFS 的功能，你可以在 ECS 云服务器手动搭建数据库使用。当你要在生产环境使用 JuiceFS 时，如果没有专业的数据库运维团队，阿里云的云数据库服务通常是更好的选择。

当然，如果你愿意，也可以使用其他云平台上提供的云数据库服务。但在这种情况下，你只能通过公网访问云数据库，也就是说，你必须向公网暴露数据库的端口，这存在极大的安全风险，最好不要这样使用。

如果必须通过公网访问数据库，可以通过云数据库控台提供的白名单功能，严格限制允许访问数据库的 IP 地址，从而提升数据的安全性。从另一个角度说，如果你通过公网无法成功连接云数据库，那么可以检查数据库的白名单，检查是不是该设置限制了你的访问。

|    数据库    |          Redis           |     MySQL、PostgreSQL      |         SQLite         |        OceanBase        |
| :----------: | :----------------------: | :------------------------: | :--------------------: | :---------------------: |
|   **性能**   |            强            |            适中            |           弱           |           强            |
| **运维门槛** |            高            |            适中            |           低           |           适中          |
|  **可靠性**  |            低            |            适中            |           低           |           高            |
| **应用场景** | 海量数据、分布式高频读写 | 海量数据、分布式中低频读写 | 少量数据单机中低频读写 | 分布式场景、强事务一致性、高可靠性要求 |

**本文使用了[云数据 Redis 版](https://www.aliyun.com/product/kvstore)，以下连接地址只是为了演示目的编制的伪地址：**

| Redis 版本   | 5.0 社区版                             |
|--------------|----------------------------------------|
| **实例规格** | 256M 标准版 - 单副本                   |
| **连接地址** | `herald-sh-abc.redis.rds.aliyuncs.com` |
| **可用区**   | 上海                                   |

### 三、对象存储 OSS

JuiceFS 会将所有的数据都存储到对象存储中，它支持几乎所有的对象存储服务。但为了获得最佳的性能，当使用阿里云 ECS 时，搭配阿里云 OSS 对象存储通常是最优选择。不过请注意，将 ECS 和 OSS Bucket 选择在相同的地区，这样才能通过阿里云的内网线路进行访问，不但延时低，而且不需要额外的流量费用。

当然，如果你愿意，也可以使用其他云平台提供的对象存储服务，但不推荐这样做。首先，通过阿里云 ECS 访问其他云平台的对象存储要走公网线路，对象存储会产生流量费用，而且这样的访问延时相比也会更高，可能会影响 JuiceFS 的性能发挥。

阿里云 OSS 有不同的存储级别，由于 JuiceFS 需要与对象存储频繁交互，建议使用标准存储。你可以搭配 OSS 资源包使用，降低对象存储的使用成本。

### API 访问秘钥

阿里云 OSS 需要通过 API 进行访问，你需要准备访问秘钥，包括  `Access Key ID` 和 `Access Key Secret` ，[点此查看](https://help.aliyun.com/document_detail/38738.html)获取方式。

> **安全建议**：显式使用 API 访问秘钥可能导致密钥泄露，推荐为云服务器分配 [RAM 服务角色](https://help.aliyun.com/document_detail/93689.htm)。当一台 ECS 被授予 OSS 操作权限以后，无需使用 API 访问秘钥即可访问 OSS。

## 安装

当前使用的是 Ubuntu Server 20.04 64 位系统，依次执行以下命令可以安装最新版本客户端。

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

你也可以访问 [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面选择其他版本。

执行命令，看到返回 `juicefs` 的命令帮助信息，代表客户端安装成功。

```shell
$ juicefs
NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   0.15.2 (2021-07-07T05:51:36Z 4c16847)

COMMANDS:
   format   format a volume
   mount    mount a volume
   umount   unmount a volume
   gateway  S3-compatible gateway
   sync     sync between two storage
   rmr      remove directories recursively
   info     show internal information for paths or inodes
   bench    run benchmark to read/write/stat big/small files
   gc       collect any leaked objects
   fsck     Check consistency of file system
   profile  analyze access log
   status   show status of JuiceFS
   warmup   build cache for target directories/files
   dump     dump metadata into a JSON file
   load     load metadata from a previously dumped JSON file
   help, h  Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             only warning and errors (default: false)
   --trace                 enable trace log (default: false)
   --no-agent              disable pprof (:6060) agent (default: false)
   --help, -h              show help (default: false)
   --version, -V           print only the version (default: false)

COPYRIGHT:
   Apache License 2.0
```

JuiceFS 具有良好的跨平台兼容性，同时支持在 Linux、Windows 和 macOS 上使用。本文着重介绍 JuiceFS 在 Linux 系统上的安装和使用，如果你需要了解其他系统上的安装方法，请[查阅文档](../getting-started/installation.md)。

## 创建 JuiceFS 存储

JuiceFS 客户端安装好以后，现在就可以使用前面准备好的 Redis 数据库和 OSS 对象存储来创建 JuiceFS 存储了。

严格意义上说，这一步操作应该叫做“Format a volume”，即格式化一个卷。但考虑到有很多用户可能不了解或者不关心文件系统的标准术语，所以简单起见，我们就直白的把这个过程叫做“创建 JuiceFS 存储”。

以下命令使用 JuiceFS 客户端提供的 `format` 子命令创建了一个名为 `mystor` 的存储，即文件系统：

```shell
$ juicefs format \
    --storage oss \
    --bucket https://<your-bucket-name> \
    --access-key <your-access-key-id> \
    --secret-key <your-access-key-secret> \
    redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    mystor
```

**选项说明：**

- `--storage`：指定对象存储类型，[点此查看](../reference/how_to_set_up_object_storage.md#supported-object-storage) JuiceFS 支持的对象存储。
- `--bucket`：对象存储的 Bucket 域名。当使用阿里云 OSS 时，只需填写 bucket 名称即可，无需填写完整的域名，JuiceFS 会自动识别并补全地址。
- `--access-key` 和 `--secret-key`：访问对象存储 API 的秘钥对，[点此查看](https://help.aliyun.com/document_detail/38738.html)获取方式。

> Redis 6.0 身份认证需要用户名和密码两个参数，地址格式为 `redis://username:password@redis-server-url:6379/1`。目前阿里云数据库 Redis 版只提供 Reids 4.0 和 5.0 两个版本，认证身份只需要密码，在设置 Redis 服务器地址时只需留空用户名即可，例如：`redis://:password@redis-server-url:6379/1`

使用 RAM 角色绑定 ECS 时，创建 JuiceFS 存储只需指定 `--storage` 和  `--bucket` 两个选项，无需提供 API 访问秘钥。命令可以改写成：

```shell
$ juicefs format \
    --storage oss \
    --bucket https://mytest.oss-cn-shanghai.aliyuncs.com \
    redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    mystor
```

看到类似下面的输出，代表文件系统创建成功了。

```shell
2021/07/13 16:37:14.264445 juicefs[22290] <INFO>: Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1
2021/07/13 16:37:14.277632 juicefs[22290] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/07/13 16:37:14.281432 juicefs[22290] <INFO>: Ping redis: 3.609453ms
2021/07/13 16:37:14.527879 juicefs[22290] <INFO>: Data uses oss://mytest/mystor/
2021/07/13 16:37:14.593450 juicefs[22290] <INFO>: Volume is formatted as {Name:mystor UUID:4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b Storage:oss Bucket:https://mytest340 AccessKey:LTAI4G4v6ioGzQXy56m3XDkG SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

## 挂载 JuiceFS 存储

文件系统创建完成，对象存储相关的信息会被存入数据库，挂载时无需再输入对象存储的 Bucket 和秘钥等信息。

使用 `mount` 子命令，将文件系统挂载到 `/mnt/jfs` 目录：

```shell
sudo juicefs mount -d redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1 /mnt/jfs
```

> **注意**：挂载文件系统时，只需填写 Redis 数据库地址，不需要文件系统名称。默认的缓存路径为 `/var/jfsCache`，请确保当前用户有足够的读写权限。

看到类似下面的输出，代表文件系统挂载成功。

```shell
2021/07/13 16:40:37.088847 juicefs[22307] <INFO>: Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com/1
2021/07/13 16:40:37.101279 juicefs[22307] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/07/13 16:40:37.104870 juicefs[22307] <INFO>: Ping redis: 3.408807ms
2021/07/13 16:40:37.384977 juicefs[22307] <INFO>: Data use oss://mytest/mystor/
2021/07/13 16:40:37.387412 juicefs[22307] <INFO>: Disk cache (/var/jfsCache/4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b/): capacity (1024 MB), free ratio (10%), max pending pages (15)
.2021/07/13 16:40:38.410742 juicefs[22307] <INFO>: OK, mystor is ready at /mnt/jfs
```

使用 `df` 命令，可以看到文件系统的挂载情况：

```shell
$ df -Th
文件系统           类型          容量   已用  可用   已用% 挂载点
JuiceFS:mystor   fuse.juicefs  1.0P   64K  1.0P    1% /mnt/jfs
```

文件系统挂载成功以后，现在就可以像使用本地硬盘那样，在 `/mnt/jfs` 目录中存储数据了。

> **多主机共享**：JuiceFS 存储支持被多台云服务器同时挂载使用，你可以在其他 ECS 上安装 JuiceFS 客户端，然后使用 `redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1` 数据库地址挂载文件系统到每一台主机上。

## 查看文件系统状态

使用 JuiceFS 客户端的 `status` 子命令可以查看一个文件系统的基本信息和连接状态。

```shell
$ juicefs status redis://:<your-redis-password>@herald-sh-abc.redis.rds.aliyuncs.com:6379/1

2021/07/13 16:56:17.143503 juicefs[22415] <INFO>: Meta address: redis://@herald-sh-abc.redis.rds.aliyuncs.com:6379/1
2021/07/13 16:56:17.157972 juicefs[22415] <WARNING>: maxmemory_policy is "volatile-lru", please set it to 'noeviction'.
2021/07/13 16:56:17.161533 juicefs[22415] <INFO>: Ping redis: 3.392906ms
{
  "Setting": {
    "Name": "mystor",
    "UUID": "4ad0bb86-6ef5-4861-9ce2-a16ac5dea81b",
    "Storage": "oss",
    "Bucket": "https://mytest",
    "AccessKey": "<your-access-key-id>",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0
  },
  "Sessions": [
    {
      "Sid": 3,
      "Heartbeat": "2021-07-13T16:55:38+08:00",
      "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)",
      "Hostname": "demo-test-sh",
      "MountPoint": "/mnt/jfs",
      "ProcessID": 22330
    }
  ]
}
```

## 卸载 JuiceFS 存储

使用 JuiceFS 客户端提供的 `umount` 命令即可卸载文件系统，比如：

```shell
sudo juicefs umount /mnt/jfs
```

> **注意**：强制卸载使用中的文件系统可能导致数据损坏或丢失，请务必谨慎操作。

## 开机自动挂载

请参考[「启动时自动挂载 JuiceFS」](../administration/mount_at_boot.md)


================================================
FILE: docs/zh_cn/tutorials/aws.md
================================================
---
title: 在 AWS 上使用 JuiceFS
sidebar_position: 4
slug: /clouds/aws
---

亚马逊云（AWS）是全球领先的云计算平台，提供几乎所有类型的云计算服务。AWS 丰富的产品线，为创建和使用 JuiceFS 文件系统提供了灵活的选择。

## 可以在哪里使用 JuiceFS {#where-can-juicefs-be-used}

JuiceFS 具有丰富的 API 接口，对 AWS 而言，通常可以在以下产品中使用：

- **Amazon EC2**：通过挂载 JuiceFS 文件系统来使用
- **Amazon Elastic Kubernetes Service（EKS）**：通过 JuiceFS CSI 驱动使用
- **Amazon EMR**：通过 JuiceFS Hadoop Java SDK 使用

## 准备 {#preparation}

一个 JuiceFS 文件系统由两部分组成：

1. **对象存储**：用于数据存储
2. **元数据引擎**：用于元数据存储的数据库

可以根据具体需求，选择在 AWS 上使用全托管的数据库和 S3 对象存储，或者在 EC2、EKS 上自行部署。

:::tip
本文着重介绍使用 AWS 全托管的服务创建 JuiceFS 文件系统的方法，对于自托管的情况，请查阅[「JuiceFS 支持的元数据引擎」](../reference/how_to_set_up_metadata_engine.md)和[「JuiceFS 支持的对象存储」](../reference/how_to_set_up_object_storage.md)以及相应程序文档。
:::

### 对象存储 {#object-storage}

S3 是 AWS 提供的对象存储服务，可以根据需要在相应地区创建 bucket，也可以通过 [IAM 角色授权](../reference/how_to_set_up_object_storage.md#aksk)让 JuiceFS 客户端自动创建 bucket。

Amazon S3 提供多种[存储类](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/userguide/storage-class-intro.html)，例如：

- **S3 Standard**：标准存储，适用于频繁访问数据的通用型存储，实时访问，无取回费用。
- **S3 Standard-IA**：低频存储，适用于长期需要但访问频率不太高的数据，实时访问，有取回费用。
- **S3 Glacier**：归档存储，适用于长期存档几乎不访问的数据，访问前需解冻。

你可以在创建或者挂载 JuiceFS 文件系统时设置存储类，具体请参考[文档](../reference/how_to_set_up_object_storage.md#storage-class)。建议优先选择标准的存储类，其他的存储类虽然有更低的单位存储价格，但会涉及最低存储时长要求和检索（取回）费用。

另外，访问对象存储服务需要通过 Access Key（也叫 access key ID）和 Secret Key（也叫 secret access key）验证用户身份，可以参照文档[「管理 IAM 用户的访问密钥」](https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/id_credentials_access-keys.html)进行创建。当通过 EC2 云服务器访问 S3 时，还可以为 EC2 分配 [IAM 角色](https://docs.aws.amazon.com/zh_cn/IAM/latest/UserGuide/id_roles.html)，实现在 EC2 上免密钥调用 S3 API。

### 数据库 {#database}

AWS 提供了多种基于网络的全托管数据库，可以用于构建 JuiceFS 的元数据引擎，主要有：

- **Amazon MemoryDB for Redis**（以下简称 MemoryDB）：持久的 Redis 内存数据库服务，可提供超快的性能。
- **Amazon RDS**：全托管的 MariaDB、MySQL、PostgreSQL 等数据库。

:::note 注意
虽然 Amazon ElastiCache for Redis（以下简称 ElastiCache）也提供兼容 Redis 协议的服务，但是相比 MemoryDB 来说，ElastiCache 无法提供「强一致性保证」，因此更推荐使用 MemoryDB。
:::

## 在 EC2 上使用 JuiceFS {#using-juicefs-on-ec2}

### 安装 JuiceFS 客户端 {#installing-the-juicefs-client}

请根据 EC2 所使用的操作系统，参考[安装](../getting-started/installation.md)文档安装最新的 JuiceFS 社区版客户端。

这里以 Linux 系统为例，使用一键安装脚本自动安装客户端：

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

### 创建文件系统 {#creating-a-file-system}

#### 准备对象存储 {#preparing-object-storage}

可以通过创建一个拥有 [AmazonS3FullAccess](https://docs.aws.amazon.com/zh_cn/AmazonS3/latest/userguide/security-iam-awsmanpol.html#security-iam-awsmanpol-amazons3fullaccess) 权限的 IAM 角色分配给 EC2，从而无需使用 Access Key 和 Secret Key 即可直接在 EC2 上创建和使用 S3 Bucket。

#### 准备数据库 {#preparing-the-database}

这里以 MemoryDB 为例，请参考[「Redis 最佳实践」](../administration/metadata/redis_best_practices.md)及 AWS 文档创建数据库。

为了让 EC2 能够访问 Redis 集群，需要将它们创建在相同的 VPC，或者为 Redis 集群的安全组添加规则允许 EC2 实例访问。

:::note 注意
如果创建的是 Redis 7.0 版本集群，需要安装 JuiceFS v1.1 及以上版本客户端。
:::

#### 格式化文件系统 {#formatting-file-system}

```shell
juicefs format --storage s3 \
  --bucket https://s3.ap-east-1.amazonaws.com/myjfs \
  rediss://clustercfg.myredis.hc79sw.memorydb.ap-east-1.amazonaws.com:6379/1 \
  myjfs
```

### 挂载文件系统 {#mounting-file-system}

```shell
sudo juicefs mount -d \
  rediss://clustercfg.myredis.hc79sw.memorydb.ap-east-1.amazonaws.com:6379/1 \
  /mnt/myjfs
```

对于通过 IAM 角色授权 S3 访问创建的文件系统，如果需要在 AWS 外部挂载使用，需要使用 `juicefs config` 为文件系统添加 Access Key 和 Secret Key：

```shell
juicefs config \
  --access-key=<your-access-key> \
  --secret-key=<your-secret-key> \
  rediss://clustercfg.myredis.hc79sw.memorydb.ap-east-1.amazonaws.com:6379/1
```

### 开机自动挂载 {#mounting-at-boot}

请参考文档[启动时自动挂载 JuiceFS](../administration/mount_at_boot.md)。

## 在 Amazon EKS 上使用 JuiceFS {#using-juicefs-on-amazon-eks}

Amazon EKS 支持[三种节点类型](https://docs.aws.amazon.com/zh_cn/eks/latest/userguide/eks-compute.html)：

- **EKS 托管节点组**：使用 Amazon EC2 作为计算节点
- **自行管理的节点**：使用 Amazon EC2 作为计算节点
- **Fargate**：一个无服务器的计算引擎

Fargate 类型节点暂不支持安装 JuiceFS CSI 驱动，请使用「EKS 托管节点组」或者「自行管理的节点」类型。

Amazon EKS 是标准的 Kubernetes 集群，可以使用 `eksctl`、`kubectl`、`helm` 等工具进行管理，请查阅 [JuiceFS CSI 驱动文档](/docs/zh/csi/introduction)了解如何安装和使用。

## 在 Amazon EMR 上使用 JuiceFS {#using-juicefs-on-amazon-emr}

请参考文档[「在 Hadoop 生态使用 JuiceFS」](../deployment/hadoop_java_sdk.md)。


================================================
FILE: docs/zh_cn/tutorials/digitalocean.md
================================================
---
title: 在 DigitalOcean 使用 JuiceFS
sidebar_position: 6
slug: /clouds/digitalocean
---

JuiceFS 是面向云设计的，使用云平台开箱即用的存储和数据库服务，最快几分钟就能完成配置投入使用，本文以 DigitalOcean 平台为例，介绍如何在云计算平台上快速简单的安装和使用 JuiceFS。

## 准备工作

JuiceFS 由存储和数据库组合驱动，因此你需要准备的东西应该包括：

### 1. 云服务器

DigitalOcean 上的云服务器被称为 Droplet。你不需要为使用 JuiceFS 而单独购买新的 Droplet，哪个云服务器上需要使用 JuiceFS 存储，就在它上面安装 JuiceFS 客户端即可。

#### 硬件配置

JuiceFS 对硬件配置没有特殊的要求，任何规格的 Droplet 都能稳定的使用。但建议选择性能更好的 SSD 并预留至少 1GB 的容量提供给 JuiceFS 作为本地缓存使用。

#### 操作系统

JuiceFS 支持 Linux、BSD、macOS 和 Windows，在本文中，我们会以 Ubuntu Server 20.04 为例进行介绍。

### 2. 对象存储

JuiceFS 使用对象存储来存储所有的数据，在 DigitalOcean 上使用 Spaces 是最简便的方案。Spaces 是一个 S3 兼容的对象存储服务，开箱即用。在创建时建议选择与 Droplet 相同的区域，这样可以获得最佳的访问速度，同时也能避免额外的流量开销。

当然，你也可以使用其他平台的对象存储服务，或是在 Droplet 上使用 Ceph 或 MinIO 手动搭建。总之，你可以自由选择要使用的对象存储，只要确保 JuiceFS 客户端能够访问到对象存储的 API 就可以。

这里，我们创建了一个名为 `juicefs` 的 Spaces 存储桶，区域为新加坡 `sgp1`，它的访问地址为：

- `https://juicefs.sgp1.digitaloceanspaces.com`

另外，还需要在 API 菜单创建 `Spaces access keys`，JuiceFS 需要用它访问 Spaces 的 API。

### 3. 数据库

与一般的文件系统不同，JuiceFS 将数据所对应的所有元数据都存储在独立的数据库，存储的数据规模越大性能越出色。目前，JuiceFS 支持 Redis、TiKV、MySQL/MariaDB、PostgreSQL、SQLite 等常见数据库，同时也在持续开发对其他数据库的支持。如果你需要的数据库暂未支持，请提交 [Issue](https://github.com/juicedata/juicefs/issues) 反馈。

在性能、规模和可靠性等方面，每种数据库都有各自的优缺点，你应该根据实际的场景需要进行选择。

在数据库的选择方面请不要有顾虑，JuiceFS 客户端提供了元数据迁移功能，你可以将元数据从一种数据库中轻松的导出并迁移到其他的数据库中。

本文我们使用 DigitalOcean 的 Redis 6 数据库托管服务，区域选择 `新加坡`，选择与已存在的 Droplet 相同的 VPC 私有网络。创建 Redis 大概需要 5 分钟左右的时间，我们跟随设置向导对数据库进行初始化设置。

![DigitalOcean-Redis-guide](../images/digitalocean-redis-guide.png)

默认情况下 Redis 允许所有入站连接，出于安全考虑，应该在设置向导的安全设置环节，在 `Add trusted sources` 中选中有权访问 Redis 的 Droplet，即仅允许选中的主机访问 Redis。

在数据回收策略的设置环节，建议选择 `noeviction`，即当内存耗尽时，仅报告错误，不回收任何数据。

> **注意**：为了确保元数据的安全和完整，回收策略请不要选择 `allkeys-lru` 和 `allkey-random`。

Redis 的访问地址可以从控制台的 `Connection Details` 中找到，如果所有计算资源都在 DigitalOcean，则建议优先使用 VPC 私有网络进行连接，这样能最大程度的提升安全性。

![DigitalOcean-Redis-url](../images/digitalocean-redis-url.png)

## 安装和使用

### 1. 安装 JuiceFS 客户端

我们当前使用的是 Ubuntu Server 20.04，执行以下命令即可安装最新版本客户端。

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

执行命令，看到返回 `juicefs` 的命令帮助信息，代表客户端安装成功。

```shell
$ juicefs

NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   0.16.2 (2021-08-25T04:01:15Z 29d6fee)

COMMANDS:
   format   format a volume
   mount    mount a volume
   umount   unmount a volume
   gateway  S3-compatible gateway
   sync     sync between two storage
   rmr      remove directories recursively
   info     show internal information for paths or inodes
   bench    run benchmark to read/write/stat big/small files
   gc       collect any leaked objects
   fsck     Check consistency of file system
   profile  analyze access log
   stats    show runtime stats
   status   show status of JuiceFS
   warmup   build cache for target directories/files
   dump     dump metadata into a JSON file
   load     load metadata from a previously dumped JSON file
   help, h  Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             only warning and errors (default: false)
   --trace                 enable trace log (default: false)
   --no-agent              disable pprof (:6060) agent (default: false)
   --help, -h              show help (default: false)
   --version, -V           print only the version (default: false)

COPYRIGHT:
   Apache License 2.0
```

另外，你也可以访问 [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面选择其他版本进行手动安装。

### 2. 创建文件系统

创建文件系统使用 `format` 子命令，格式为：

```shell
juicefs format [command options] META-URL NAME
```

以下命令创建了一个名为 `mystor` 的文件系统：

```shell
$ juicefs format \
    --storage space \
    --bucket https://juicefs.sgp1.digitaloceanspaces.com \
    --access-key <your-access-key-id> \
    --secret-key <your-access-key-secret> \
    rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 \
    mystor
```

**参数说明：**

- `--storage`：指定数据存储引擎，这里使用的是 `space`，点此查看所有[支持的存储](../reference/how_to_set_up_object_storage.md)。
- `--bucket`：指定存储桶访问地址。
- `--access-key` 和 `--secret-key`：指定访问对象存储 API 的秘钥。
- DigitalOcean 托管的 Redis 需要使用 TLS/SSL 加密访问，因此需要使用 `rediss://` 协议头，链接最后添加的 `/1` 代表使用 Redis 的 1 号数据库。

看到类似下面的输出，代表文件系统创建成功。

```shell
2021/08/23 16:36:28.450686 juicefs[2869028] <INFO>: Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:36:28.481251 juicefs[2869028] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/08/23 16:36:28.481763 juicefs[2869028] <INFO>: Ping redis: 331.706µs
2021/08/23 16:36:28.482266 juicefs[2869028] <INFO>: Data uses space://juicefs/mystor/
2021/08/23 16:36:28.534677 juicefs[2869028] <INFO>: Volume is formatted as {Name:mystor UUID:6b0452fc-0502-404c-b163-c9ab577ec766 Storage:space Bucket:https://juicefs.sgp1.digitaloceanspaces.com AccessKey:7G7WQBY2QUCBQC5H2DGK SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

### 3. 挂载文件系统

挂载文件系统使用 `mount` 子命令，使用 `-d` 参数以守护进程的形式挂载。以下命令将刚刚创建的文件系统挂载到当前目录下的 `mnt` 目录：

```shell
$ sudo juicefs mount -d \
    rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt
```

使用 sudo 执行挂载操作的目的是为了让 JuiceFS 能够有权限在 `/var/` 下创建缓存目录。值得注意的是，在挂载文件系统时，只需要指定`数据库地址`和`挂载点`，并不需要指定文件系统的名称。

看到类似下面的输出，代表文件系统挂载成功。

```shell
2021/08/23 16:39:14.202151 juicefs[2869081] <INFO>: Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:39:14.234925 juicefs[2869081] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/08/23 16:39:14.235536 juicefs[2869081] <INFO>: Ping redis: 446.247µs
2021/08/23 16:39:14.236231 juicefs[2869081] <INFO>: Data use space://juicefs/mystor/
2021/08/23 16:39:14.236540 juicefs[2869081] <INFO>: Disk cache (/var/jfsCache/6b0452fc-0502-404c-b163-c9ab577ec766/): capacity (1024 MB), free ratio (10%), max pending pages (15)
2021/08/23 16:39:14.738416 juicefs[2869081] <INFO>: OK, mystor is ready at mnt
```

使用 `df` 命令，可以看到文件系统的挂载情况：

```shell
$ df -Th
文件系统           类型          容量   已用  可用   已用% 挂载点
JuiceFS:mystor fuse.juicefs  1.0P   64K  1.0P   1% /home/herald/mnt
```

从挂载命令的输出信息中可以看到，JuiceFS 默认设置了 1024 MB 的作为本地缓存。设置更大的缓存，可以让 JuiceFS 有更好的性能表现，可以在挂载文件系统时通过 `--cache-size` 选项设置缓存（单位 MiB），例如，设置 20GB 的本地缓存：

```shell
$ sudo juicefs mount -d --cache-size 20000 \
    rediss://default:your-password@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1 mnt
```

文件系统挂载成功以后，就可以像使用本地硬盘那样，在 `~/mnt` 目录中存储数据了。

### 4. 查看文件系统

使用 `status` 子命令可以查看一个文件系统的基本信息和连接状态，只需指定数据库访问地址即可。

```shell
$ juicefs status rediss://default:bn8l7ui2cun4iaji@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:48:48.567046 juicefs[2869156] <INFO>: Meta address: rediss://default@private-db-redis-sgp1-03138-do-user-2500071-0.b.db.ondigitalocean.com:25061/1
2021/08/23 16:48:48.597513 juicefs[2869156] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/08/23 16:48:48.598193 juicefs[2869156] <INFO>: Ping redis: 491.003µs
{
  "Setting": {
    "Name": "mystor",
    "UUID": "6b0452fc-0502-404c-b163-c9ab577ec766",
    "Storage": "space",
    "Bucket": "https://juicefs.sgp1.digitaloceanspaces.com",
    "AccessKey": "7G7WQBY2QUCBQC5H2DGK",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0
  },
  "Sessions": [
    {
      "Sid": 1,
      "Heartbeat": "2021-08-23T16:46:14+08:00",
      "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869091
    },
    {
      "Sid": 2,
      "Heartbeat": "2021-08-23T16:47:59+08:00",
      "Version": "0.16.2 (2021-08-25T04:01:15Z 29d6fee)",
      "Hostname": "ubuntu-s-1vcpu-1gb-sgp1-01",
      "MountPoint": "/home/herald/mnt",
      "ProcessID": 2869146
    }
  ]
}
```

### 5. 卸载文件系统

使用 `umount` 子命令卸载文件系统，比如：

```shell
sudo juicefs umount ~/mnt
```

> **注意**：强制卸载使用中的文件系统可能导致数据损坏或丢失，请务必谨慎操作。

### 6. 开机自动挂载

请参考[「启动时自动挂载 JuiceFS」](../administration/mount_at_boot.md)

### 7. 多主机共享挂载

JuiceFS 文件系统支持被多台云服务器同时挂载，而且对云服务器的地理位置没有要求，可以很容的实现同平台之间、跨云平台之间、公有云和私有云之间服务器的数据实时共享。

不单如此，JuiceFS 的共享挂载功能还能提供数据的强一致性保证，在多台服务器挂载了同一个文件系统时，文件系统上确认的写入会在所有主机上实时可见。

使用共享挂载功能，务必要确保组成文件系统的数据库和对象存储服务，能够被每一台要挂载它的主机正常访问。在本文的演示环境中，Spaces 对象存储是对整个互联网开放访问的，只要使用正确的秘钥就能够通过 API 进行读写。但对于平台托管的 Redis 数据库，你需要合理的配置访问策略，确保平台外的主机有访问权限。

在使用多主机共享挂载功能时，首先在任何一台主机上创建文件系统，然后在其他主机上安装 JuiceFS 客户端，使用同一个数据库地址通过 `mount` 命令挂载即可。特别注意，文件系统只需创建一次，不应该也不需要在其他主机上重复执行文件系统创建操作。


================================================
FILE: docs/zh_cn/tutorials/juicefs_on_colab.md
================================================
---
title: 在 Colab 上通过 Google CloudSQL 和 GCS 使用 JuiceFS
sidebar_position: 5
slug: /juicefs_on_colab
---

[Colaboratory](https://colab.research.google.com), 或者简称“Colab”, 是 Google Research 的产品，它允许任何人通过浏览器编写和执行 Python 代码，特别适合机器学习、数据分析和教育。
Colab 支持从 Google Drive 将文件上传到 Colab 实例或从 Colab 实例下载文件。然而在某些情况下，Google Drive 可能不太方便与 Colab 一起使用，在这种情况下，JuiceFS 是一个很有用的工具，因为他允许在 Colab 实例之间，或在 Colab 实例与本地或本地机器之间轻松的同步文件。[这里是一个使用了 JuiceFS 的 Colab 笔记本示例](https://colab.research.google.com/drive/1wA8vRwqiihXkI6ViDU8Ud868UeYtmCo5)

说明下在 Colab 环境中使用 JuiceFS 的必要步骤。我们使用 Google CloudSQL 作为 JuiceFS 的元数据引擎，使用 Google Cloud Storage (GCS) 作为 JuiceFS 的对象存储。其他类型的元数据引擎与对象存储可以参考 [如何设置元数据引擎](../reference/how_to_set_up_metadata_engine.md) 和 [如何设置对象存储](../reference/how_to_set_up_object_storage.md)。

下面将要提到的很多步骤你可以也参考 [快速上手指南](../getting-started/for_distributed.md)。

## 步骤

1. 在任何一个可以访问 Google Cloud 资源的机器或者实例上格式化一个 JuiceFS 文件系统
2. 挂载 JuiceFS 文件系统到 Colab Notebook 上
3. 愉快的跨平台跨机器分享存储的文件

## 先决条件

在这个示例中，我们使用了 Google Cloud 平台的 CloudSQL 和 Google Cloud Storage (GCS) 来创建一个高性能的 JuiceFS 文件系统。因此它需要你有一个 Google Cloud 平台的账户才能按照文档操作下去。
或者如果你有其他云平台的资源（比如 AWS 的 RDBS 和 S3），您也可以根据本指南和其他参考文档，以实现类似的解决方案。

您可能还希望 Colab 实例位于同一区域或靠近部署 CloudSQL 和 GCS 的区域使 JuiceFS 达到最佳性能。该教程适用于随机托管的 Colab 实例，所以您或许注意到了由于 Colab 实例和 CloudSQL/GCS 区域之间的延迟而导致 JuiceFS 性能缓慢。如果想要实例在特定地区去启动 Colab，可以参考[通过 GCP Marketplace 在 Colab 上启动 GCE 虚拟机](https://research.google.com/colaboratory/marketplace.html)

按照本指南操作前，您需要准备好以下资源：

* 谷歌云平台账户需要准备就绪，还要创建了一个 *project* 。就这个示例而言，我们将创建 `juicefs-learning` GCP 项目作为演示项目
* 准备使用的 CloudSQL（Postgres）。在本演示中使用实例 `juicefs-learning:europe-west1:juicefs-sql-example-1` 作为元数据服务
* 创建的 GCS 桶作为对象存储服务。在这个演示中，我们将使用`gs://juicefs-bucket-example-1`作为存储文件的桶。
* 对 Postgres 服务器和 GCS 存储桶具有写入访问权限的服务账户或授权用户帐户

## 详细步骤

### 步骤 1 - 创建并挂载一个 JuiceFS 文件系统

这个步骤只需要操作一次，你可以在任何可以访问你的 Google Cloud 资源的机器或者实例上执行。
在这里例子中，我将在我的本地机器上操作，首先你可以使用 `gcloud auth application-default login` 获取本地的凭证，或者使用 `GOOGLE_APPLICATION_CREDENTIALS` 设置 JSON 凭证文件。
然后你可以使用 [Cloud SQL 代理功能](https://cloud.google.com/sql/docs/mysql/connect-admin-proxy) 将你的 Postgres 云服务暴露在你本地机器上的一个端口上（这里是 5432）。

```shell
gcloud auth application-default login

# 或者设置 JSON 凭证文件 GOOGLE_APPLICATION_CREDENTIALS=/path/to/key

cloud_sql_proxy -instances=juicefs-learning:europe-west1:juicefs-sql-example-1=tcp:0.0.0.0:5432
```

然后使用 `juicefs format` 命令创建一个名为“myvolume”的新文件系统。之后将此文件系统挂载到您可以访问云资源的任何其他机器/实例中。
你可以在[这里](https://github.com/juicedata/juicefs/releases)下载 JuiceFS。

```shell
juicefs format \
    --storage gs \
    --bucket gs://juicefs-bucket-example-1 \
    "postgres://postgres:mushroom1@localhost:5432/juicefs?sslmode=disable" \
    myvolume
```

再次提醒：这个步骤只需要被执行一次。

### 步骤 2 - 挂载 JuiceFS 到 Colab

完成上述步骤 1 后，这意味着您已经有一个 JuiceFS 文件系统（此案例中为“myvolume”）并准备就绪可以使用了。
因此，在这里，我们打开一个 Colab 页面并运行这些命令，将我们的文件系统挂载到一个名为“mnt”的文件夹中。
首先我们下载 JuiceFS 二进制然后按照步骤一操作获取 GCP 的凭证和打开 Cloud SQL 代理。
请注意，以下命令在 Colab 环境中运行，一个 `!` 在开头意味着开始运行 shell 命令。

1. 下载 `JuiceFS`到 Colab 实例上

   ```shell
   ! curl -sSL https://d.juicefs.com/install | sh -
   ```

2. 设置 Google Cloud 凭证

   ```shell
   ! gcloud auth application-default login
   ```

3. 打开 cloud_sql 代理

   ```shell
   ! wget https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 -O cloud_sql_proxy
   ! chmod +x cloud_sql_proxy
   ! GOOGLE_APPLICATION_CREDENTIALS=/content/.config/application_default_credentials.json nohup ./cloud_sql_proxy -instances=juicefs-learning:europe-west1:juicefs-sql-example-1=tcp:0.0.0.0:5432 >> cloud_sql_proxy.log &
   ```

4. 挂载 JuiceFS file system `myvolumn` 到 `mnt` 目录上。

   ```shell
   ! GOOGLE_APPLICATION_CREDENTIALS=/content/.config/application_default_credentials.json nohup juicefs mount  "postgres://postgres:mushroom1@localhost:5432/juicefs?sslmode=disable" mnt > juicefs.log &
   ```

现在你应该可以像使用本地文件系统一样使用 `mnt` 目录了。

### 步骤 3 - 在任意时间从其他实例加载数据

现在，由于您在 JuiceFS 文件系统中的第 2 步中存储了数据，因此您可以随时在任何其他机器中重复第 2 步中提到的所有操作，以便再次访问之前存储的数据或存储更多数据。

恭喜！现在您已经学会了如何使用 JuiceFS，特别是如何将其与 Google Colab 一起以分布式的方式共享和存储数据文件。
[一个使用了 JuiceFS 的 Colab 笔记本示例](https://colab.research.google.com/drive/1wA8vRwqiihXkI6ViDU8Ud868UeYtmCo5)

愉快的编码吧 :）


================================================
FILE: docs/zh_cn/tutorials/juicefs_on_k3s.md
================================================
---
title: 在 K3s 上使用 JuiceFS
sidebar_position: 2
slug: /juicefs_on_k3s
---

[K3s](https://k3s.io) 是一个经过功能优化的 Kubernetes 发行版，它与 Kubernetes 完全兼容，即几乎所有在 Kubernetes 的操作都可以在 K3s 上执行。K3s 将整个容器编排系统打包进了一个容量不足 100MB 的二进制程序，减少了部署 Kubernetes 生产集群的环境依赖，大大降低了安装难度，对系统硬件的性能要求也更低。

在本文中，我们会建立一个包含两个节点的 K3s 集群，为集群安装并配置使用 [JuiceFS CSI Driver](https://github.com/juicedata/juicefs-csi-driver)，最后会创建一个 NGINX 容器进行验证。

## 部署 K3s 集群

K3s 对硬件的**最低要求**很低：

- **内存**：512MB+（建议 1GB+）
- **CPU**：1 核

在部署生产集群时，通常可以将 4 核 CPU 和 8G 内存作为一个节点的硬件配置起点，详情查看[硬件需求](https://rancher.com/docs/k3s/latest/en/installation/installation-requirements/#hardware)。

### K3s server 节点

运行 server 节点的服务器 IP 地址为：`192.168.1.35`

使用 K3s 官方提供的脚本，即可将常规的 Linux 发行版自动部署成为 server 节点。

```shell
curl -sfL https://get.k3s.io | sh -
```

部署成功后，K3s 服务会自动启动，kubectl 等工具也会一并安装。

执行命令查看节点状态：

```shell
sudo kubectl get nodes
```

```output
NAME     STATUS   ROLES                  AGE   VERSION
k3s-s1   Ready    control-plane,master   28h   v1.21.4+k3s1
```

获取 `node-token`：

```shell
sudo -u root cat /var/lib/rancher/k3s/server/node-token
```

### K3s worker 节点

运行 worker 节点的服务器 IP 地址为：`192.168.1.36`

执行以下命令，将其中 `K3S_URL` 的值改成 server 节点的 IP 或域名，默认端口 `6443`。将 `K3S_TOKEN` 的值替换成从 server 节点获取的 `node-token`。

```shell
curl -sfL https://get.k3s.io | K3S_URL=http://192.168.1.35:6443 K3S_TOKEN=K1041f7c4fabcdefghijklmnopqrste2ec338b7300674f::server:3d0ab12800000000000000006328bbd80 sh -
```

部署成功以后，回到 server 节点查看节点状态：

```shell
sudo kubectl get nodes
```

```output
NAME     STATUS   ROLES                  AGE   VERSION
k3s-s1   Ready    control-plane,master   28h   v1.21.4+k3s1
k3s-n1   Ready    <none>                 28h   v1.21.4+k3s1
```

## 安装 CSI Driver

与在 [Kubernetes 上安装 JuiceFS CSI Driver](../deployment/how_to_use_on_kubernetes.md) 的方法一致，你可以通过 Helm 安装，也可以通过 kubectl 安装。

这里我们用 kubectl 安装，执行以下命令安装 JuiceFS CSI Driver：

```shell
kubectl apply -f https://raw.githubusercontent.com/juicedata/juicefs-csi-driver/master/deploy/k8s.yaml
```

### 创建存储类

复制并修改以下代码创建一个配置文件，例如：`juicefs-sc.yaml`

```yaml
apiVersion: v1
kind: Secret
metadata:
  name: juicefs-sc-secret
  namespace: kube-system
type: Opaque
stringData:
  name: "test"
  metaurl: "redis://juicefs.afyq4z.0001.use1.cache.amazonaws.com/3"
  storage: "s3"
  bucket: "https://juicefs-test.s3.us-east-1.amazonaws.com"
  access-key: "<your-access-key-id>"
  secret-key: "<your-access-key-secret>"
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: juicefs-sc
provisioner: csi.juicefs.com
reclaimPolicy: Retain
volumeBindingMode: Immediate
parameters:
  csi.storage.k8s.io/node-publish-secret-name: juicefs-sc-secret
  csi.storage.k8s.io/node-publish-secret-namespace: kube-system
  csi.storage.k8s.io/provisioner-secret-name: juicefs-sc-secret
  csi.storage.k8s.io/provisioner-secret-namespace: kube-system
```

配置文件中 `stringData` 部分用来设置 JuiceFS 文件系统相关的信息，系统会根据你指定的信息创建文件系统。当需要在存储类中使用已经预先创建好的文件系统时，则只需要填写 `name` 和 `metaurl` 两项即可，其他项可以删除或将值留空。

执行命令，部署存储类：

```shell
kubectl apply -f juicefs-sc.yaml
```

查看存储类状态：

```shell
sudo kubectl get sc
```

```output
NAME                   PROVISIONER             RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE
local-path (default)   rancher.io/local-path   Delete          WaitForFirstConsumer   false                  28h
juicefs-sc             csi.juicefs.com         Retain          Immediate              false                  28h
```

> **注意**：一个存储类与一个 JuiceFS 文件系统相关联，你可以根据需要创建任意数量的存储类。但需要注意修改配置文件中的存储类名称，避免同名冲突。

## 使用 JuiceFS 持久化 NGINX 数据

接下来部署一个 NGINX Pod，使用 JuiceFS 存储类声明的持久化存储。

### Deployment

创建一个配置文件，例如：`depolyment.yaml`

```yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: web-pvc
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 10Pi
  storageClassName: juicefs-sc
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-run
  labels:
    app: nginx
spec:
  replicas: 2
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
        - name: nginx
          image: linuxserver/nginx
          ports:
            - containerPort: 80
          volumeMounts:
            - mountPath: /config
              name: web-data
      volumes:
        - name: web-data
          persistentVolumeClaim:
            claimName: web-pvc
```

执行部署：

```
sudo kubectl apply -f depolyment.yaml
```

### Service

创建一个配置文件，例如：`service.yaml`

```yaml
apiVersion: v1
kind: Service
metadata:
  name: nginx-run-service
spec:
  selector:
    app: nginx
  ports:
    - name: http
      port: 80
```

执行部署：

```shell
sudo kubectl apply -f service.yaml
```

### Ingress

K3s 默认预置了 traefik-ingress，通过以下配置为 NGINX 创建一个 ingress。例如：`ingress.yaml`

```yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: nginx-run-ingress
  annotations:
    traefik.ingress.kubernetes.io/router.entrypoints: web
spec:
  rules:
    - http:
        paths:
          - pathType: Prefix
            path: "/web"
            backend:
              service:
                name: nginx-run-service
                port:
                  number: 80
```

执行部署：

```shell
sudo kubectl apply -f ingress.yaml
```

### 访问

部署完成以后，使用相同局域网的主机访问任何一个集群节点，即可看到 NGINX 的欢迎页面。

![K3s-NGINX-welcome](../images/k3s-nginx-welcome.png)

接下来查看一下容器是否成功挂载了 JuiceFS，执行命令查看 Pod 状态：

```shell
sudo kubectl get pods
```

```output
NAME                         READY   STATUS    RESTARTS   AGE
nginx-run-7d6fb7d6df-qhr2m   1/1     Running   0          28h
nginx-run-7d6fb7d6df-5hpv7   1/1     Running   0          24h
```

执行命令，查看任何一个 Pod 的文件系统挂载情况：

```shell
$ sudo kubectl exec nginx-run-7d6fb7d6df-qhr2m -- df -Th
Filesystem     Type          Size  Used Avail Use% Mounted on
overlay        overlay        20G  3.2G   17G  17% /
tmpfs          tmpfs          64M     0   64M   0% /dev
tmpfs          tmpfs         2.0G     0  2.0G   0% /sys/fs/cgroup
JuiceFS:jfs    fuse.juicefs  1.0P  174M  1.0P   1% /config
/dev/sda1      ext4           20G  3.2G   17G  17% /etc/hosts
shm            tmpfs          64M     0   64M   0% /dev/shm
tmpfs          tmpfs         2.0G   12K  2.0G   1% /run/secrets/kubernetes.io/serviceaccount
tmpfs          tmpfs         2.0G     0  2.0G   0% /proc/acpi
tmpfs          tmpfs         2.0G     0  2.0G   0% /proc/scsi
tmpfs          tmpfs         2.0G     0  2.0G   0% /sys/firmware
```

可以看到，名为 `jfs` 的文件系统已经挂载到了容器的 `/config` 目录，已使用空间为 174M。

这就表明集群中的 Pod 已经成功配置并使用 JuiceFS 持久化数据了。


================================================
FILE: docs/zh_cn/tutorials/juicefs_on_kubesphere.md
================================================
---
title: 在 KubeSphere 上使用 JuiceFS
sidebar_position: 3
slug: /juicefs_on_kubesphere
---

[KubeSphere](https://kubesphere.com.cn) 是在 Kubernetes 之上构建的以应用为中心的多租户容器平台，提供全栈的 IT 自动化运维的能力，简化企业的 DevOps 工作流。

KubeSphere 提供了运维友好的向导式操作界面，即便是 Kubernetes 经验并不丰富的用户，也能相对轻松的上手开始管理和使用。它提供了基于 Helm 的应用市场，可以在图形化界面下非常轻松地安装各种 Kubernetes 应用。

本文将介绍如何在 KubeSphere 中一键部署 JuiceFS CSI Driver，为集群上的各种应用提供数据持久化。

## 前提条件

1. 安装 KubeSphere

   安装 KubeSphere 有两种方法。一是在 Linux 上直接安装，可以参考文档：[在 Linux 安装 KubeSphere](https://kubesphere.com.cn/docs/quick-start/all-in-one-on-linux) ；
二是在已有 Kubernetes 中安装，可以参考文档：[在 Kubernetes 安装 KubeSphere](https://kubesphere.com.cn/docs/quick-start/minimal-kubesphere-on-k8s) 。

2. 在 KubeSphere 中启用应用商店

   在 KubeSphere 中启用应用商店可以参考文档：[KubeSphere 应用商店](https://kubesphere.com.cn/docs/pluggable-components/app-store) 。

## 安装 JuiceFS CSI Driver

如果 KubeSphere 的版本为 v3.2.0 及以上，可以直接在应用商店中安装 CSI Driver，跳过「配置应用模板/应用仓库」步骤，直接进入「安装」步骤；如果 KubeSphere 版本低于 v3.2.0，按照以下步骤配置应用模板/应用仓库。

### 配置应用模板/应用仓库

安装 JuiceFS CSI Driver 首先需要创建应用模板，这里有两种方法。

#### 方法一：应用仓库

在企业空间中点击进去应用管理，选择「应用仓库」，点击创建按钮添加 JuiceFS CSI 仓库，填写：

- 仓库名称：`juicefs-csi-driver`
- Index URL：`https://juicedata.github.io/charts/`

![kubesphere_app_shop](../images/kubesphere_app_shop.png)

#### 方法二：应用模板

先在 JuiceFS CSI Driver 仓库下载 chart 压缩包：[https://github.com/juicedata/juicefs-csi-driver/releases](https://github.com/juicedata/juicefs-csi-driver/releases)。

在「企业空间」中点击进入「应用管理」，选择「应用模板」，点击「创建」，上传 chart 压缩包：

![kubesphere_app_template](../images/kubesphere_app_template.png)

### 安装

在「企业空间」中选择您所需部署的「项目」（KubeSphere 中的项目即为 K8s 中的 namespace），选择「应用负载」，点击「部署新应用」按钮，选择「来自应用商店」，然后选择 `juicefs`：

![kubesphere_shop_juicefs](../images/kubesphere_shop_juicefs.jpg)

若 KubeSphere 版本低于 v3.2.0，根据上一步配置好的应用模板，选择部署应用「来自应用模板」：

![kubesphere_install_csi](../images/kubesphere_install_csi.png)

进入配置修改页面后一致，修改以下两个地方：

- namespace：改成对应的项目名
- storageClass.backend：
  `backend` 部分用来定义文件系统后端的数据库和对象存储，可以查阅[创建文件系统](../getting-started/standalone.md#juicefs-format)了解相关内容。

您也可以通过 KubeSphere 的应用商店快速创建数据库（如 Redis）和对象存储（如 MinIO）。
比如在 KubeSphere 平台搭建 Redis：在当前所在项目中选择「应用负载」，点击「部署新应用」按钮，选择「来自应用商店」，选择「Redis」，然后快速部署即可。Redis 的访问 URL 可以通过部署好的应用的服务名，如下：

![kubesphere_redis](../images/kubesphere_redis.png)

在 KubeSphere 平台搭建 MinIO 也是类似的流程，不过在部署 MinIO 之前可以修改 MinIO 的 accessKey 和 secretKey，并且需要记住配置的值。如下图：

![kubesphere_create_minio](../images/kubesphere_create_minio.png)

> 注：如果部署 MinIO 出现权限问题，可以将配置中的 `securityContext.enables` 设置为 false。

MinIO 的访问 URL 可以通过部署好的应用的服务名，如下：

![kubesphere_minio](../images/kubesphere_minio.png)

Redis 和 MinIO 都搭建好之后，就可以填写 JuiceFS CSI Driver 的 `backend` 值了。其中：

1. `metaurl` 为刚才创建的 Redis 的数据库地址，Redis 的访问地址可用 Redis 应用对应的服务名，如 `redis://redis-rzxoz6:6379/1`
2. `storage` 为对象存储的类型，如 `minio`
3. `bucket` 为刚才创建的 MinIO 的可用 bucket（JuiceFS 会自动创建，不需要手动创建），MinIO 的访问地址可用 MinIO 应用对应的服务名，如 `http://minio-qkp9my:9000/minio/test`
4. `accessKey` 和 `secretKey` 用刚才创建的 MinIO 的 accessKey 和 secretKey

![kubesphere_update_csi](../images/kubesphere_update_csi.png)

配置修改完毕后，点击安装即可。

## 使用

### 部署应用

按照上述方法安装好的 JuiceFS CSI Driver 已经创建好一个 `StorageClass`，名为上述 `storageClass` 的 `name`，比如上述创建的 `StorageClass` 为 `juicefs-sc`，可以直接使用。

然后需要创建一个 PVC，指定使用 `juicefs-sc` 这个 `StorageClass`。在「项目」中，选择「存储管理」，再选择「存储卷」，点击「创建」按钮创建 PVC，其中「存储类型」选择 `juicefs-sc`，如下：

![kubesphere_pvc](../images/kubesphere_pvc.png)

PVC 创建好之后，再在「项目」的「应用负载」中，选择「工作负载」，点击「创建」按钮部署工作负载，其中「基本信息」页填写自己喜欢的名字；「容器镜像」页可以填写镜像 `centos` ；
启动命令 `sh,-c,while true; do echo $(date -u) >> /data/out.txt; sleep 5; done` ；「存储卷来源」选择「已有存储卷」，再选择上一步创建的 PVC，容器内路径填写 `/data` 如下：

![kubesphere_deployment](../images/kubesphere_deployment.png)

![kubesphere_workload](../images/kubesphere_workload.png)

部署完成后可以看到运行中的容器组：

![kubesphere_pod](../images/kubesphere_pod.png)

### 新建 StorageClass

若安装 JuiceFS CSI Driver 的时候没有创建 `StorageClass`，或者需要另外新建，可以遵循以下步骤：

准备好元数据服务和对象存储服务后，新建一个 `Secret`。在「平台管理」页面选择「配置中心」，选择「密钥」，点击「创建」按钮新建：

![kubesphere_create_secret](../images/kubesphere_create_secret.png)

「密钥设置」中填入准备好的元数据服务和对象存储信息，如下：

![kubesphere_update_secret](../images/kubesphere_update_secret.png)

`Secret` 新建好之后，创建 `StorageClass`，在「平台管理」页面选择「存储管理」，选择「存储类型」，点击「创建」按钮新建，其中「存储系统」选择「自定义」：

![kubesphere_sc_create](../images/kubesphere_sc_create.png)

设置页面信息如下，其中「存储系统」填写 `csi.juicefs.com`，另外再设置 4 个参数：

- `csi.storage.k8s.io/provisioner-secret-name`: 刚刚创建好的 secret name
- `csi.storage.k8s.io/provisioner-secret-namespace`: secret 对应的项目名
- `csi.storage.k8s.io/node-publish-secret-name`: 刚刚创建好的 secret name
- `csi.storage.k8s.io/node-publish-secret-namespace`: secret 对应的项目名

![kubesphere_sc_update](../images/kubesphere_sc_update.png)

点击「创建」按钮之后，`StorageClass` 就创建好了。


================================================
FILE: docs/zh_cn/tutorials/juicefs_on_rancher.md
================================================
---
title: 在 Rancher 上使用 JuiceFS
sidebar_position: 2
slug: /juicefs_on_rancher
---

简单来说，[Rancher](https://rancher.com) 是一个企业级的 Kubernetes 集群管理工具，使用它可以非常轻松的在各种云计算平台上快速的完成 Kubernetes 集群的部署。

Rancher 提供了基于浏览器的管理界面，即便是 Kubernetes 经验并不丰富的用户，也能相对轻松的上手开始管理和使用。它默认预置了基于 Helm 的应用市场，可以在图形化界面下非常轻松的安装各种 Kubernetes 应用。

本文将介绍如何在 Linux 系统上部署 Rancher，并在上面创建 Kubernetes 集群，然后通过其内置的应用市场，一键部署 JuiceFS CSI Driver，为集群上的各种应用提供数据持久化。

## 安装 Rancher

几乎所有主流的现代 Linux 发行版都可以安装 Rancher，它既可以直接安装在操作系统上，也可以安装在 Docker、Kubernetes、K3s 或 RKE 上，不论在哪种环境上安装都是“Product-Ready”的。

这里我们选择将 Rancher 安装在 Docker 上，配置上需要满足以下要求：

- **操作系统**：x86-64 架构的 Linux 系统
- **内存**：4GB 以上
- **Docker**：19.03+

执行以下命令安装 Rancher：

```shell
sudo docker run --privileged -d --restart=unless-stopped -p 80:80 -p 443:443 rancher/rancher
```

容器创建完成以后，通过浏览器访问主机的 IP 地址就能打开 Rancher 的管理界面。

![Rancher-welcome](../images/rancher-welcome.jpeg)

## 创建 Kubernetes 集群

Rancher 安装成功以后，可以看到它已经在当前容器中部署了一个 K3s 集群，Rancher 相关资源都运行在这个内部的 K3s 集群中，无需理会这个集群。

接下来开始创建 Kubernetes 集群，在欢迎页面的 Cluster 部分点击 `Create` 创建集群。Rancher 支持在各大主流云计算平台创建 Kubernetes 集群，这里我们要在 Rancher 的宿主机上直接选择集群，因此选择 `Custom`。然后根据向导填写集群名称，选择 Kubernetes 版本即可。

![Rancher-cluster-create](../images/rancher-cluster-create.jpg)

在 `Cluster Options` 页面中，选择要创建的节点角色，然后复制生成命令，在目标主机上执行即可。

![Rancher-cluster-options](../images/rancher-cluster-options.jpg)

集群创建完成后，Rancher 的集群列表中会有状态显示。

![Rancher-clusters](../images/rancher-clusters.jpg)

## 一键安装 JuiceFS CSI Driver

在集群列表中点击进入创建的 Kubernetes 集群，左侧导航菜单点击展开 `应用市场` → `Chart 仓库`，点击 `创建` 按钮添加 JuiceFS CSI 仓库，填写：

- **仓库名称**：`juicefs`
- **Index URL**：`https://juicedata.github.io/charts/`

![Rancher-new-repo](../images/rancher-new-repo.jpg)

创建以后，在仓库列表中可以看到刚刚添加的 JuiceFS CSI 仓库。

![Rancher-repos](../images/rancher-repos.jpg)

紧接着通过左侧菜单点击打开 `应用市场` → `Charts`，搜索栏中输入 `juicefs`，然后点击打开检索出的 `juicefs-csi-driver`。

![Rancher-chart-search](../images/rancher-chart-search.jpg)

在应用详情页面点击“安装”按钮，默认会安装最新版本，也可以点选切换到历史版本进行安装。

![Rancher-chart-info](../images/rancher-chart-info.jpg)

安装向导共有两步：

### 第一步：设置应用的 `Namespace`

JuiceFS CSI Driver 默认为 `kube-system`，这一步无需设置。

### 第二步：调整配置参数

这个页面提供了 YAML 编辑器，你可以根据需要调整 JuiceFS 相关的信息，通常只需要修改 `storageClasses` 部分，其中 `backend` 部分用来定义文件系统后端的数据库和对象存储。如果你使用的是已经预先创建的文件系统，那么只需填写 `metaurl` 和 `name` 两项即可，例如：

```yaml
...
storageClasses:
  - backend:
      accessKey: ''
      bucket: ''
      metaurl: 'redis://:mypasswd@efgh123.redis.rds.aliyuncs.com/1'
      name: myjfs
      secretKey: ''
      storage: ''
    enabled: true
    name: juicefs-sc
    reclaimPolicy: Retain
...
```

> **提示**：如果你有多个 JuiceFS 文件系统，分别需要关联到 Kubernetes 集群不同的 storageClass，可以在 `storageClasses` 数组后面再加 storageClass 配置项，注意修改存储类的名称，避免冲突。

点击「安装」，等待应用安装完成。

![Rancher-chart-installed](../images/rancher-chart-installed.jpg)

## 使用 JuiceFS 持久化数据

部署应用时，在存储配置中指定 `juicefs-sc` 即可。

![Rancher-PVC](../images/rancher-pvc.jpg)


================================================
FILE: docs/zh_cn/tutorials/juicefs_on_wsl.md
================================================
---
title: 在 WSL 中使用 JuiceFS
sidebar_position: 9
---

WSL 全称 Windows Subsystem for Linux，即适用于 Linux 的 Windows 子系统。它可以让你在 Windows 系统环境下运行大多数 GNU/Linux 原生命令、工具和程序，且不必像用虚拟机或双系统那样产生额外的硬件开销。

## 安装 WSL

使用 WSL 要求必须是 Windows 10 2004 以上或 Windows 11。

查看当前系统的版本，可以通过组合键 <kbd>Win</kbd> + <kbd>R</kbd> 唤出运行程序，输入并运行 `winver`。

![WSL/winver](../images/wsl/winver.png)

确认 Windows 版本以后，以管理员身份打开 PowerShell 或 Windows 命令提示符，运行安装命令：

```powershell
wsl --install
```

该命令会下载最新的 Linux 内核，安装并将 WSL 2 作为默认版本，并安装 Linux 发行版（默认为 Ubuntu）。

也可以直接指定要安装的发行版：

```powershell
wsl --install -d ubuntu
```

:::tip 提示
`wsl --list --online`  命令可以查看所有可选的发行版。
:::

## 设置 Linux 用户和密码

WSL 安装完成以后，即可在开始菜单找到新安装的 Linux 发行版。

![WSL/startmenu](../images/wsl/startmenu.png)

点击 Ubuntu 子系统的快捷方式，WSL 会打开 Linux 子系统的终端。初次运行会要求设置管理 Linux 子系统的用户和密码，根据提示设置即可。

![WSL/init](../images/wsl/init.png)

这里设置的用户名和密码有以下几点需要注意：

- 此用户专用于该 Linux 子系统的管理，与 Windows 系统中的用户无关；
- 此用户将作为 Linux 子系统的默认用户，并在启动时自动登录；
- 此用户将被视为 Linux 子系统的管理员，允许执行 `sudo` 命令；
- WSL 中允许同时运行多个 Linux 子系统，且每个子系统都需要设置一个管理用户。

## 在 WSL 中使用 JuiceFS

在 WSL 中使用 JuiceFS，即是在 Linux 系统中使用 JuiceFS，这里以社区版为例进行介绍。

### 安装客户端

执行命令，在 Linux 子系统中安装 JuiceFS 客户端：

   ```shell
   curl -sSL https://d.juicefs.com/install | sh -
   ```

### 创建文件系统

JuiceFS 是数据与元数据分离的分布式文件系统，通常用对象存储作为数据存储，用 Redis、PostgreSQL 或 MySQL 作为元数据存储。这里假设已经准备了如下材料：

#### 对象存储

查看「[JuiceFS 支持的数据存储](../reference/how_to_set_up_object_storage.md)」

- **Bucket Endpoint**：`https://myjfs.oss-cn-shanghai.aliyuncs.com`
- **Access Key ID**：`ABCDEFGHIJKLMNopqXYZ`
- **Access Key Secret**：`ZYXwvutsrqpoNMLkJiHgfeDCBA`

#### 数据库

查看「[JuiceFS 支持的元数据引擎](../reference/how_to_set_up_metadata_engine.md)」

- **数据库地址**：`myjfs-sh-abc.redis.rds.aliyuncs.com:6379`
- **数据库密码**：`mypassword`

将私密信息写入环境变量：

```shell
export ACCESS_KEY=ABCDEFGHIJKLMNopqXYZ
export SECRET_KEY=ZYXwvutsrqpoNMLkJiHgfeDCBA
export REDIS_PASSWORD=mypassword
```

创建名为 `myjfs` 的文件系统：

```shell
juicefs format \
    --storage oss \
    --bucket https://myjfs.oss-cn-shanghai.aliyuncs.com \
    redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 \
    myjfs
```

### 挂载和使用

把数据库密码写入环境变量：

```shell
export REDIS_PASSWORD=mypassword
```

:::note 注意
对象存储的 API 密钥信息仅在创建文件系统时需要设置，一旦文件系统创建成功，相应的密钥信息会被写入数据库，JuiceFS 客户端会在挂载文件系统时自动从数据库中读取，无需重复设置。
:::

挂载文件系统到用户家目录下的 `mnt`：

```shell
sudo juicefs mount -d redis://myjfs-sh-abc.redis.rds.aliyuncs.com:6379/1 $HOME/mnt
```

如果需要从 Windows 系统访问 Linux 子系统中挂载的 JuiceFS 文件系统，在资源管理器左侧列表中找到 Linux 子系统，然后找到并打开挂载点路径即可。

![WSL/access-jfs-from-win](../images/wsl/access-jfs-from-win.png)

有关 JuiceFS 使用方面的更多内容请查阅官方文档。

## WSL 文件存储性能问题

WSL 打通了 Windows 与 Linux 子系统，允许二者相互访问彼此系统中存储的文件。

![WSL/Windows-to-Linux](../images/wsl/windows-to-linux.png)

但需要注意，从 Windows 访问 Linux 子系统或从 Linux 子系统访问 Windows 势必会因系统之间的转换而产生一定的性能开销。因此，推荐的做法是根据程序所在的系统来决定文件存储的位置，对于 Linux 子系统中的程序，它要处理的文件也应该存储在 Linux 子系统中性能才更理想。

在 Linux 子系统中，WSL 将 Windows 的各个盘符挂载到了 `/mnt`，比如 C: 盘在 Linux 子系统中的挂载点是 `/mnt/c`。

![WSL/mount-point](../images/wsl/mount-point.png)

为了保证性能最优，在 WSL 中使用 JuiceFS 时，不论存储还是缓存路径都应设置在 Linux 子系统中。换言之，应该避免把存储或缓存设置在 `/mnt/c` 类似的 Windows 分区挂载点上。

通过使用 JuiceFS 自带的 `bench` 基准测试工具，结果显示，将文件系统挂载到 Windows（如 `/mnt/c`）的性能要比挂载到 Linux 子系统内部（如 `$HOME/mnt`）低 30% 左右。

## 已知问题

当通过 Windows 资源管理器拷贝文件到 Linux 子系统时，WSL 会自动为每个文件附加一个带有 `Zone.Identifier` 标识的同名文件。这是 NTFS 文件系统的一种安全防护机制，意在对外部文件的来源进行跟踪，但对于 WSL 来说，这个功能应该属于 bug 且已经有人在 GitHub 上向微软开发团队反馈 [#7456](https://github.com/microsoft/WSL/issues/7456)。

受此问题影响，通过 Windows 资源管理器向 Linux 子系统中挂载的 JuiceFS 文件系统存入文件时也会出现同样的问题。但在 Linux 子系统内部读写 JuiceFS 文件系统不受该 bug 的干扰。

![WSL/zone-identifier](../images/wsl/zone-identifier.png)


================================================
FILE: docs/zh_cn/tutorials/qcloud.md
================================================
---
title: 在腾讯云使用 JuiceFS
sidebar_position: 8
slug: /clouds/qcloud
---

如下图所示，JuiceFS 存储由数据库和对象存储共同驱动。存入 JuiceFS 的文件会按照一定的规则被拆分成固定大小的数据块存储在对象存储中，数据对应的元数据则会存储在数据库中。

元数据完全独立存储，对文件的检索和处理并不会直接操作对象存储中的数据，而是先在数据库中操作元数据，只有当数据发生变化的时候，才会与对象存储交互。

这样的设计可以有效缩减对象存储在请求数量上的费用，同时也能让我们显著感受到 JuiceFS 带来的性能提升。

![JuiceFS-qcloud](../images/juicefs-qcloud.png)

## 准备

通过前面的架构描述，可以知道 JuiceFS 需要搭配数据库和对象存储一起使用。这里我们直接使用腾讯云的 CVM 云服务器，结合云数据库和 COS 对象存储。

在创建云计算资源时，尽量选择在相同的区域，这样可以让资源之间通过内网线路相互访问，避免使用公网线路产生额外的流量费用。

### 一、云服务器 CVM

JuiceFS 对服务器硬件没有特殊要求，一般来说，云平台上最低配的云服务器也能稳定使用 JuiceFS，通常你只需要选择能够满足自身业务的配置即可。

需要特别说明的是，你不需要为使用 JuiceFS 重新购买服务器或是重装系统，JuiceFS 没有业务入侵性，不会对你现有的系统和程序造成任何的干扰，你完全可以在正在运行的服务器上安装和使用 JuiceFS。

JuiceFS 默认会占用不超过 1GB 的硬盘空间作为缓存，可以根据需要调整缓存空间的大小。该缓存是客户端与对象存储之间的一个数据缓冲层，选择性能更好的云盘，可以获得更好的性能表现。

在操作系统方面，腾讯云 CVM 提供的所有操作系统都可以安装 JuiceFS。

**本文使用的 CVM 配置如下：**

| 服务器配置   |                          |
| ------------ | ------------------------ |
| **CPU**      | 1 核                     |
| **内存**     | 2 GB                     |
| **存储**     | 50 GB                    |
| **操作系统** | Ubuntu Server 20.04 64 位 |
| **地域**     | 上海五区                 |

### 二、云数据库

JuiceFS 会将数据对应的元数据全部存储在独立的数据库中，目前已开放支持的数据库有 Redis、MySQL、PostgreSQL、TiKV 和 SQLite。

根据数据库类型的不同，带来的元数据性能和可靠性表现也各不相同。比如 Redis 是完全运行在内存上的，它能提供极致的性能，但运维难度较高，可靠性相对低。而 MySQL、PostgreSQL 是关系型数据库，性能不如 Redis，但运维难度不高，可靠性也有一定的保障。SQLite 是单机单文件关系型数据库，性能较低，也不适合用于大规模数据存储，但它免配置，适合单机少量数据存储的场景。

如果只是为了评估 JuiceFS 的功能，你可以在 CVM 云服务器手动搭建数据库使用。当你要在生产环境使用 JuiceFS 时，如果没有专业的数据库运维团队，腾讯云的云数据库服务通常是更好的选择。

当然，如果你愿意，也可以使用其他云平台上提供的云数据库服务。但在这种情况下，你只能通过公网访问云数据库，也就是说，你必须向公网暴露数据库的端口，这存在极大的安全风险，最好不要这样使用。

如果必须通过公网访问数据库，可以通过云数据库控台提供的白名单功能，严格限制允许访问数据库的 IP 地址，从而提升数据的安全性。从另一个角度说，如果你通过公网无法成功连接云数据库，那么可以检查数据库的白名单，检查是不是该设置限制了你的访问。

|    数据库    |          Redis           |     MySQL、PostgreSQL      |         SQLite         |
| :----------: | :----------------------: | :------------------------: | :--------------------: |
|   **性能**   |            强            |            适中            |           弱           |
| **运维门槛** |            高            |            适中            |           低           |
|  **可靠性**  |            低            |            适中            |           低           |
| **应用场景** | 海量数据、分布式高频读写 | 海量数据、分布式中低频读写 | 少量数据单机中低频读写 |

**本文使用了云数据库 TencentDB Redis，通过 VPC 私有网络与 CVM 云服务器交互访问：**

| Redis 版本   | 5.0 社区版             |
| ------------ | ----------------       |
| **实例规格** | 1GB 内存版（标准架构） |
| **连接地址** | 192.168.5.5:6379       |
| **可用区**   | 上海五区               |

注意，数据库的连接地址取决于你创建的 VPC 网络设置，创建 Redis 实例时会自动在你定义的网段中获取地址。

![qcloud-Redis-network](../images/qcloud-redis-network.png)

### 三、对象存储 COS

JuiceFS 会将所有的数据都存储到对象存储中，它支持几乎所有的对象存储服务。但为了获得最佳的性能，当使用腾讯云 CVM 时，搭配腾讯云 COS 对象存储通常是最优选择。不过请注意，将 CVM 和 COS Bucket 选择在相同的地区，这样才能通过腾讯云的内网线路进行访问，不但延时低，而且不需要额外的流量费用。

> **提示**：腾讯云对象存储 COS 提供的唯一访问地址同时支持内网和外网访问，当通过内网访问时，COS 会自动解析到内网 IP，此时产生的流量均为内网流量，不会产生流量费用。

当然，如果你愿意，也可以使用其他云平台提供的对象存储服务，但不推荐这样做。首先，通过腾讯云 CVM 访问其他云平台的对象存储要走公网线路，对象存储会产生流量费用，而且这样的访问延时相比也会更高，可能会影响 JuiceFS 的性能发挥。

腾讯云 COS 有不同的存储级别，由于 JuiceFS 需要与对象存储频繁交互，建议使用标准存储。你可以搭配 COS 资源包使用，降低对象存储的使用成本。

### API 访问秘钥

腾讯云 COS 需要通过 API 进行访问，你需要准备访问秘钥，包括  `Access Key ID` 和 `Access Key Secret` ，[点此查看](https://cloud.tencent.com/document/product/598/37140)获取方式。

> **安全建议**：显式使用 API 访问秘钥可能导致密钥泄露，推荐为云服务器分配 [CAM 服务角色](https://cloud.tencent.com/document/product/598/19420)。当一台 CVM 被授予 COS 操作权限以后，无需使用 API 访问秘钥即可访问 COS。

## 安装

我当前使用的是 Ubuntu Server 20.04 64 位系统，执行以下命令可以安装最新版本客户端。

```shell
curl -sSL https://d.juicefs.com/install | sh -
```

你也可以访问 [JuiceFS GitHub Releases](https://github.com/juicedata/juicefs/releases) 页面选择其他版本。

执行命令，看到返回 `juicefs` 的命令帮助信息，代表客户端安装成功。

```shell
$ juicefs
NAME:
   juicefs - A POSIX file system built on Redis and object storage.

USAGE:
   juicefs [global options] command [command options] [arguments...]

VERSION:
   0.15.2 (2021-07-07T05:51:36Z 4c16847)

COMMANDS:
   format   format a volume
   mount    mount a volume
   umount   unmount a volume
   gateway  S3-compatible gateway
   sync     sync between two storage
   rmr      remove directories recursively
   info     show internal information for paths or inodes
   bench    run benchmark to read/write/stat big/small files
   gc       collect any leaked objects
   fsck     Check consistency of file system
   profile  analyze access log
   status   show status of JuiceFS
   warmup   build cache for target directories/files
   dump     dump metadata into a JSON file
   load     load metadata from a previously dumped JSON file
   help, h  Shows a list of commands or help for one command

GLOBAL OPTIONS:
   --verbose, --debug, -v  enable debug log (default: false)
   --quiet, -q             only warning and errors (default: false)
   --trace                 enable trace log (default: false)
   --no-agent              disable pprof (:6060) agent (default: false)
   --help, -h              show help (default: false)
   --version, -V           print only the version (default: false)

COPYRIGHT:
   Apache License 2.0
```

JuiceFS 具有良好的跨平台兼容性，同时支持在 Linux、Windows 和 macOS 上使用。本文着重介绍 JuiceFS 在 Linux 系统上的安装和使用，如果你需要了解其他系统上的安装方法，请[查阅文档](../getting-started/installation.md)。

## 创建 JuiceFS 存储

JuiceFS 客户端安装好以后，现在就可以使用前面准备好的 Redis 数据库和 COS 对象存储来创建 JuiceFS 存储了。

严格意义上说，这一步操作应该叫做“Format a volume”，即格式化一个卷。但考虑到有很多用户可能不了解或者不关心文件系统的标准术语，所以简单起见，我们就直白的把这个过程叫做“创建 JuiceFS 存储”。

以下命令使用 JuiceFS 客户端提供的 `format` 子命令创建了一个名为 `mystor` 的存储，即文件系统：

```shell
$ juicefs format \
    --storage cos \
    --bucket https://<your-bucket-name> \
    --access-key <your-access-key-id> \
    --secret-key <your-access-key-secret> \
    redis://:<your-redis-password>@192.168.5.5:6379/1 \
    mystor
```

**选项说明：**

- `--storage`：指定对象存储类型，[点此查看](../reference/how_to_set_up_object_storage.md#supported-object-storage) JuiceFS 支持的对象存储。
- `--bucket`：对象存储的 Bucket 访问域名，可以在 COS 的管理控制台找到。
  ![cos-bucket-url](../images/cos-bucket-url.png)
- `--access-key` 和 `--secret-key`：访问对象存储 API 的秘钥对，[点此查看](https://cloud.tencent.com/document/product/598/37140)获取方式。

> Redis 6.0 身份认证需要用户名和密码两个参数，地址格式为 `redis://username:password@redis-server-url:6379/1`。目前腾讯云数据库 Redis 版只提供 Reids 4.0 和 5.0 两个版本，认证身份只需要密码，在设置 Redis 服务器地址时只需留空用户名即可，例如：`redis://:password@redis-server-url:6379/1`

看到类似下面的输出，代表文件系统创建成功了。

```shell
2021/07/30 11:44:31.904157 juicefs[44060] <INFO>: Meta address: redis://@192.168.5.5:6379/1
2021/07/30 11:44:31.907083 juicefs[44060] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/07/30 11:44:31.907634 juicefs[44060] <INFO>: Ping redis: 474.98µs
2021/07/30 11:44:31.907850 juicefs[44060] <INFO>: Data uses cos://juice-0000000000/mystor/
2021/07/30 11:44:32.149692 juicefs[44060] <INFO>: Volume is formatted as {Name:mystor UUID:dbf05314-57af-4a2c-8ac1-19329d73170c Storage:cos Bucket:https://juice-0000000000.cos.ap-shanghai.myqcloud.com AccessKey:AKIDGLxxxxxxxxxxxxxxxxxxZ8QRBdpkOkp SecretKey:removed BlockSize:4096 Compression:none Shards:0 Partitions:0 Capacity:0 Inodes:0 EncryptKey:}
```

## 挂载 JuiceFS 存储

文件系统创建完成，对象存储相关的信息会被存入数据库，挂载时无需再输入对象存储的 Bucket 和秘钥等信息。

使用 `mount` 子命令，将文件系统挂载到 `/mnt/jfs` 目录：

```shell
sudo juicefs mount -d redis://:<your-redis-password>@192.168.5.5:6379/1 /mnt/jfs
```

> **注意**：挂载文件系统时，只需填写 Redis 数据库地址，不需要文件系统名称。默认的缓存路径为 `/var/jfsCache`，请确保当前用户有足够的读写权限。

看到类似下面的输出，代表文件系统挂载成功。

```shell
2021/07/30 11:49:56.842211 juicefs[44175] <INFO>: Meta address: redis://@192.168.5.5:6379/1
2021/07/30 11:49:56.845100 juicefs[44175] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/07/30 11:49:56.845562 juicefs[44175] <INFO>: Ping redis: 383.157µs
2021/07/30 11:49:56.846164 juicefs[44175] <INFO>: Data use cos://juice-0000000000/mystor/
2021/07/30 11:49:56.846731 juicefs[44175] <INFO>: Disk cache (/var/jfsCache/dbf05314-57af-4a2c-8ac1-19329d73170c/): capacity (1024 MB), free ratio (10%), max pending pages (15)
2021/07/30 11:49:57.354763 juicefs[44175] <INFO>: OK, mystor is ready at /mnt/jfs
```

使用 `df` 命令，可以看到文件系统的挂载情况：

```shell
$ df -Th
文件系统           类型          容量   已用  可用   已用% 挂载点
JuiceFS:mystor   fuse.juicefs  1.0P   64K  1.0P    1% /mnt/jfs
```

文件系统挂载成功以后，现在就可以像使用本地硬盘那样，在 `/mnt/jfs` 目录中存储数据了。

> **多主机共享**：JuiceFS 存储支持被多台云服务器同时挂载使用，你可以在其他 CVM 上安装 JuiceFS 客户端，然后使用 `redis://:<your-redis-password>@192.168.5.5:6379/1` 数据库地址挂载文件系统到每一台主机上。

## 查看文件系统状态

使用 JuiceFS 客户端的 `status` 子命令可以查看一个文件系统的基本信息和连接状态。

```shell
$ juicefs status redis://:<your-redis-password>@192.168.5.5:6379/1

2021/07/30 11:51:17.864767 juicefs[44196] <INFO>: Meta address: redis://@192.168.5.5:6379/1
2021/07/30 11:51:17.866619 juicefs[44196] <WARNING>: AOF is not enabled, you may lose data if Redis is not shutdown properly.
2021/07/30 11:51:17.867092 juicefs[44196] <INFO>: Ping redis: 379.391µs
{
  "Setting": {
    "Name": "mystor",
    "UUID": "dbf05314-57af-4a2c-8ac1-19329d73170c",
    "Storage": "cos",
    "Bucket": "https://juice-0000000000.cos.ap-shanghai.myqcloud.com",
    "AccessKey": "AKIDGLxxxxxxxxxxxxxxxxx8QRBdpkOkp",
    "BlockSize": 4096,
    "Compression": "none",
    "Shards": 0,
    "Partitions": 0,
    "Capacity": 0,
    "Inodes": 0
  },
  "Sessions": [
    {
      "Sid": 1,
      "Heartbeat": "2021-07-30T11:49:56+08:00",
      "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)",
      "Hostname": "VM-5-6-ubuntu",
      "MountPoint": "/mnt/jfs",
      "ProcessID": 44175
    },
    {
      "Sid": 3,
      "Heartbeat": "2021-07-30T11:50:56+08:00",
      "Version": "0.15.2 (2021-07-07T05:51:36Z 4c16847)",
      "Hostname": "VM-5-6-ubuntu",
      "MountPoint": "/mnt/jfs",
      "ProcessID": 44185
    }
  ]
}
```

## 卸载 JuiceFS 存储

使用 JuiceFS 客户端提供的 `umount` 命令即可卸载文件系统，比如：

```shell
sudo juicefs umount /mnt/jfs
```

> **注意**：强制卸载使用中的文件系统可能导致数据损坏或丢失，请务必谨慎操作。

## 开机自动挂载

请参考[「启动时自动挂载 JuiceFS」](../administration/mount_at_boot.md)


================================================
FILE: docs/zh_cn/tutorials/windows.md
================================================
---
title: 在 Windows 上使用 JuiceFS
sidebar_position: 1
---

## 快速上手视频

<div className="video-container">
  <iframe
    src="//player.bilibili.com/player.html?isOutside=true&aid=114499784808051&bvid=BV1jtEczZEvq&cid=29939011077&p=1&autoplay=false"
    width="100%"
    height="360"
    scrolling="no"
    frameBorder="0"
    allowFullScreen
  ></iframe>
</div>

## 安装 JuiceFS 客户端

:::tip 环境依赖
在 Windows 系统上，JuiceFS 依赖 WinFsp 实现文件系统的挂载。你可以在 [WinFsp 源码仓库](https://github.com/winfsp/winfsp) 下载最新版本，安装后建议重启计算机，以确保所有组件正常加载。
:::

[安装文档](../getting-started/installation.md#windows) 介绍了在 Windows 上安装 JuiceFS 客户端的多种方式，这里我们展开介绍手动安装方式。

### 第一步 下载 JuiceFS 客户端

在项目仓库的 [Release 页面](https://github.com/juicedata/juicefs/releases) 下载最新版本的 JuiceFS 客户端，例如 `juicefs-1.3.0-windows-amd64.tar.gz`。

### 第二步 创建程序目录

为了便于管理，建议在系统中创建一个专用的目录来存放 JuiceFS 客户端程序。例如，可以在 `C:\` 目录下创建一个名为 `juicefs` 的文件夹，将解压后的 `juicefs.exe` 客户端程序放入该目录。

### 第三步 配置环境变量

为了在命令行中方便地使用 `juicefs` 命令，需要将 JuiceFS 客户端所在的目录添加到系统的环境变量中。具体操作如下：

1. 右键点击“此电脑”或“计算机”，选择“属性”；
2. 点击“高级系统设置”；
3. 在“系统属性”窗口中，点击“环境变量”按钮；
4. 在“系统变量”部分，找到名为 `Path` 的变量，选中后点击“编辑”；
5. 在编辑窗口中，点击“新建”，然后输入 JuiceFS 客户端所在的目录路径，例如 `C:\juicefs`；
6. 点击“确定”保存更改。

![Windows 环境变量设置](https://static1.juicefs.com/docs/windows-path.png)

### 第四步 验证安装

安装完成后，可以通过命令行验证 JuiceFS 客户端是否安装成功。打开命令提示符（CMD）或 PowerShell，输入以下命令：

```bash
juicefs version
```

如果安装成功，你应该能看到类似以下的输出：

```
juicefs version 1.3.0+2025-07-03.30190ca1094d2
```

## 创建和挂载文件系统

创建和挂载 JuiceFS 文件系统的步骤与其他操作系统类似，但需要注意 Windows 上的命令行语法和路径格式。

### 创建文件系统

```shell
juicefs format --storage oss `
    --bucket https://your-bucket.oss-cn-region.aliyuncs.com `
    --access-key your-access-key `
    --secret-key your-secret-key `
    redis://your-redis-host:6379/0 `
    mywinfs
```

> 与 Linux 系统不同，Windows 上的命令行需要使用反引号（`）来换行。

### 挂载文件系统

在 Windows 上，挂载点需要指定一个未被占用的盘符（如 X、Y、Z 等）。这与 Linux 和 macOS 上的挂载方式不同，因为这些系统是将文件系统挂载到目录中。

```shell
juicefs mount -d redis://your-redis-host:6379/0 X:
```

## 环境变量配置

从安全性的角度出发，为了避免明文输入密码，可以通过设置环境变量来存储敏感信息。这样在挂载文件系统或启用 S3 Gateway 时无需填写密码，客户端会自动从环境变量中读取。

以下是在 Windows 上使用 JuiceFS 时常用的环境变量：

| 环境变量名            | 说明                   |
|----------------------|------------------------|
| `META_PASSWORD`      | 元数据引擎密码         |
| `MINIO_ROOT_USER`    | S3 网关 Access Key     |
| `MINIO_ROOT_PASSWORD`| S3 网关 Secret Key     |

可以直接在命令行设置这些环境变量：

```cmd
set META_PASSWORD=your_password
set MINIO_ROOT_USER=your_access_key
set MINIO_ROOT_PASSWORD=your_secret_key
```

但这样的设置方式仅在当前命令行会话中有效，关闭窗口后环境变量失效，需重新设置。

### 持久化环境变量

如果希望在每次启动 Windows 时都能自动加载这些环境变量，可以通过系统环境变量设置来实现。

1. **打开系统环境变量设置**
   - 按下 `Win + S`，搜索并打开“编辑系统环境变量”。
   - 点击“环境变量”按钮。

   ![系统环境变量设置](https://static1.juicefs.com/docs/win_env_01.png)

2. **新建系统级环境变量**
   - 在“系统变量”区域点击“新建”。
   - **变量名**：例如 `META_PASSWORD`
   - **变量值**：填写密码或秘钥
   - 点击“确定”保存。

   ![添加环境变量](https://static1.juicefs.com/docs/win_env_02.png)

   ![添加环境变量](https://static1.juicefs.com/docs/win_env_03.png)

3. **验证环境变量**

    重新打开终端，尝试不带密码挂载文件系统。如果能够成功挂载，则说明环境变量已生效。

## 开机自启动挂载

通过 Windows 计划任务实现开机自动挂载有多种方式，这里介绍通过“任务计划程序”设置的方法。

1. 打开“任务计划程序”，点击“创建任务”。

   ![任务计划程序](https://static1.juicefs.com/docs/task_00.png)

2. 在“常规”选项卡中，设置任务名称（如 `JuiceFS_AutoMount`），并勾选“使用最高权限运行”。

   ![常规设置](https://static1.juicefs.com/docs/task_01.png)

3. 切换到“触发器”选项卡，点击“新建”，选择“系统启动时”作为触发条件。

   ![触发器设置](https://static1.juicefs.com/docs/task_02.png)

4. 切换到“操作”选项卡，点击“新建”，填写以下信息：

   - **程序或脚本**：浏览选择 JuiceFS 客户端路径（如 `C:\juicefs\juicefs.exe`）。
   - **参数**：填写挂载命令参数。建议将元数据引擎密码通过系统环境变量进行设置，这样可以避免在此处明文输入密码。

   ![触发器设置](https://static1.juicefs.com/docs/task_03.png)

5. 在“条件”选项卡中，勾选“仅当网络连接可用时”，以确保挂载操作在网络可用时执行。

   ![触发器设置](https://static1.juicefs.com/docs/task_04.png)

6. 点击“确定”保存任务。

**注意事项：**

- 确保挂载命令参数正确，无需在命令中包含密码（环境变量已存储）。
- 卸载文件系统：右键点击挂载盘符，选择“断开连接”。


================================================
FILE: go.mod
================================================
module github.com/juicedata/juicefs

go 1.23.0

require (
	cloud.google.com/go/compute/metadata v0.5.2
	cloud.google.com/go/storage v1.48.0
	github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0
	github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0
	github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.1
	github.com/DataDog/zstd v1.5.6
	github.com/IBM/ibm-cos-sdk-go v1.12.1
	github.com/agiledragon/gomonkey/v2 v2.6.0
	github.com/aliyun/alibabacloud-oss-go-sdk-v2 v1.2.1
	github.com/aliyun/credentials-go v1.4.5
	github.com/apple/foundationdb/bindings/go v0.0.0-20211207225159-47b9a81d1c10
	github.com/aws/aws-sdk-go-v2 v1.36.1
	github.com/aws/aws-sdk-go-v2/config v1.29.6
	github.com/aws/aws-sdk-go-v2/credentials v1.17.59
	github.com/aws/aws-sdk-go-v2/service/s3 v1.72.3
	github.com/aws/smithy-go v1.22.2
	github.com/baidubce/bce-sdk-go v0.9.221
	github.com/bytedance/mockey v1.2.14
	github.com/ceph/go-ceph v0.18.0
	github.com/charlievieth/fastwalk v1.0.14
	github.com/cloudsoda/go-smb2 v0.0.0-20250228001242-d4c70e6251cc
	github.com/colinmarc/hdfs/v2 v2.4.0
	github.com/davies/groupcache v0.0.0-20230821031435-e4e8362f58e1
	github.com/dgraph-io/badger/v4 v4.5.1
	github.com/dustin/go-humanize v1.0.1
	github.com/emmansun/gmsm v0.34.1
	github.com/erikdubbelboer/gspt v0.0.0-20210805194459-ce36a5128377
	github.com/go-http-utils/headers v0.0.0-20181008091004-fed159eddc2a
	github.com/go-sql-driver/mysql v1.9.1
	github.com/goccy/go-json v0.10.5
	github.com/gofrs/flock v0.8.1
	github.com/golang/snappy v0.0.4
	github.com/google/btree v1.1.2
	github.com/google/uuid v1.6.0
	github.com/grafana/pyroscope-go v1.2.1
	github.com/grafana/pyroscope-go/godeltaprof v0.1.8
	github.com/hanwen/go-fuse/v2 v2.1.1-0.20210611132105-24a1dfe6b4f8
	github.com/hashicorp/consul/api v1.29.2
	github.com/hashicorp/go-hclog v1.6.3
	github.com/hashicorp/golang-lru/v2 v2.0.7
	github.com/huaweicloud/huaweicloud-sdk-go-obs v3.21.12+incompatible
	github.com/hungys/go-lz4 v0.0.0-20170805124057-19ff7f07f099
	github.com/jackc/pgx/v5 v5.7.3
	github.com/jcmturner/gokrb5/v8 v8.4.4
	github.com/json-iterator/go v1.1.12
	github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d
	github.com/juicedata/gogfapi v0.0.0-20241204082332-ecd102647f80
	github.com/juju/ratelimit v1.0.2
	github.com/ks3sdklib/aws-sdk-go v1.6.0
	github.com/l0wl3vel/bunny-storage-go-sdk v0.0.10
	github.com/mattn/go-isatty v0.0.20
	github.com/mattn/go-sqlite3 v1.14.24
	github.com/minio/cli v1.24.2
	github.com/minio/minio v0.0.0-20210206053228-97fe57bba92c
	github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78
	github.com/ncw/swift/v2 v2.0.3
	github.com/oliverisaac/shellescape v0.0.0-20220131224704-1b6c6b87b668
	github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3
	github.com/pkg/errors v0.9.1
	github.com/pkg/sftp v1.13.5
	github.com/pkg/xattr v0.4.9
	github.com/prometheus/client_golang v1.21.1
	github.com/prometheus/client_model v0.6.1
	github.com/prometheus/common v0.62.0
	github.com/prometheus/prometheus v0.54.1
	github.com/qingstor/qingstor-sdk-go/v4 v4.4.0
	github.com/qiniu/go-sdk/v7 v7.25.2
	github.com/redis/go-redis/v9 v9.16.0
	github.com/sirupsen/logrus v1.9.3
	github.com/smartystreets/goconvey v1.7.2
	github.com/spf13/cast v1.7.1
	github.com/stretchr/testify v1.10.0
	github.com/studio-b12/gowebdav v0.10.0
	github.com/tencentyun/cos-go-sdk-v5 v0.7.63
	github.com/tikv/client-go/v2 v2.0.7
	github.com/tikv/pd/client v0.0.0-20230329114254-1948c247c2b1
	github.com/twmb/murmur3 v1.1.8
	github.com/urfave/cli/v2 v2.19.3
	github.com/vbauerster/mpb/v7 v7.0.3
	github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8
	github.com/vimeo/go-util v1.4.1
	github.com/vmware/go-nfs-client v0.0.0-20190605212624-d43b92724c1b
	github.com/volcengine/ve-tos-golang-sdk/v2 v2.7.8
	github.com/winfsp/cgofuse v1.6.0
	go.etcd.io/etcd v3.3.27+incompatible
	go.etcd.io/etcd/client/v3 v3.5.9
	go.uber.org/automaxprocs v1.6.0
	go.uber.org/zap v1.24.0
	golang.org/x/crypto v0.41.0
	golang.org/x/net v0.42.0
	golang.org/x/oauth2 v0.24.0
	golang.org/x/sync v0.16.0
	golang.org/x/sys v0.35.0
	golang.org/x/term v0.34.0
	golang.org/x/text v0.28.0
	google.golang.org/api v0.210.0
	google.golang.org/protobuf v1.36.3
	gopkg.in/kothar/go-backblaze.v0 v0.0.0-20210124194846-35409b867216
	pgregory.net/rapid v0.5.3
	xorm.io/xorm v1.0.7
)

require (
	cel.dev/expr v0.16.1 // indirect
	cloud.google.com/go v0.116.0 // indirect
	cloud.google.com/go/auth v0.11.0 // indirect
	cloud.google.com/go/auth/oauth2adapt v0.2.6 // indirect
	cloud.google.com/go/iam v1.2.2 // indirect
	cloud.google.com/go/monitoring v1.21.2 // indirect
	filippo.io/edwards25519 v1.1.0 // indirect
	git.apache.org/thrift.git v0.13.0 // indirect
	github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect
	github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c // indirect
	github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
	github.com/BurntSushi/toml v1.3.2 // indirect
	github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.24.1 // indirect
	github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.48.1 // indirect
	github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1 // indirect
	github.com/IBM/go-sdk-core/v5 v5.18.5 // indirect
	github.com/VividCortex/ewma v1.2.0 // indirect
	github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
	github.com/alecthomas/participle v0.2.1 // indirect
	github.com/alex-ant/gomath v0.0.0-20160516115720-89013a210a82 // indirect
	github.com/alibabacloud-go/debug v1.0.1 // indirect
	github.com/alibabacloud-go/tea v1.2.2 // indirect
	github.com/andybalholm/brotli v1.1.0 // indirect
	github.com/armon/go-metrics v0.4.1 // indirect
	github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect
	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28 // indirect
	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32 // indirect
	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32 // indirect
	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 // indirect
	github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.27 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.8 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13 // indirect
	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.8 // indirect
	github.com/aws/aws-sdk-go-v2/service/sso v1.24.15 // indirect
	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14 // indirect
	github.com/aws/aws-sdk-go-v2/service/sts v1.33.14 // indirect
	github.com/bcicen/jstream v1.0.1 // indirect
	github.com/beevik/ntp v0.3.0 // indirect
	github.com/benbjohnson/clock v1.3.0 // indirect
	github.com/beorn7/perks v1.0.1 // indirect
	github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
	github.com/cespare/xxhash/v2 v2.3.0 // indirect
	github.com/cheggaaa/pb v1.0.29 // indirect
	github.com/clbanning/mxj v1.8.4 // indirect
	github.com/cloudsoda/sddl v0.0.0-20250224235906-926454e91efc // indirect
	github.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78 // indirect
	github.com/coredns/coredns v1.4.0 // indirect
	github.com/coreos/etcd v3.3.27+incompatible // indirect
	github.com/coreos/go-semver v0.3.0 // indirect
	github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
	github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb // indirect
	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
	github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect
	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
	github.com/dchest/siphash v1.2.1 // indirect
	github.com/dgraph-io/ristretto/v2 v2.1.0 // indirect
	github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 // indirect
	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
	github.com/djherbis/atime v1.0.0 // indirect
	github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d // indirect
	github.com/elastic/gosigar v0.14.2 // indirect
	github.com/envoyproxy/go-control-plane v0.13.0 // indirect
	github.com/envoyproxy/protoc-gen-validate v1.1.0 // indirect
	github.com/fatih/color v1.16.0 // indirect
	github.com/fatih/structs v1.1.0 // indirect
	github.com/felixge/httpsnoop v1.0.4 // indirect
	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
	github.com/gammazero/toposort v0.1.1 // indirect
	github.com/geoffgarside/ber v1.1.0 // indirect
	github.com/go-asn1-ber/asn1-ber v1.5.1 // indirect
	github.com/go-ldap/ldap/v3 v3.2.4 // indirect
	github.com/go-logr/logr v1.4.2 // indirect
	github.com/go-logr/stdr v1.2.2 // indirect
	github.com/go-ole/go-ole v1.2.6 // indirect
	github.com/go-openapi/errors v0.22.0 // indirect
	github.com/go-openapi/strfmt v0.23.0 // indirect
	github.com/go-playground/locales v0.14.1 // indirect
	github.com/go-playground/universal-translator v0.18.1 // indirect
	github.com/go-playground/validator/v10 v10.19.0 // indirect
	github.com/go-resty/resty/v2 v2.13.1 // indirect
	github.com/gogo/protobuf v1.3.2 // indirect
	github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
	github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
	github.com/golang/glog v1.2.2 // indirect
	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
	github.com/golang/protobuf v1.5.4 // indirect
	github.com/google/flatbuffers v24.12.23+incompatible // indirect
	github.com/google/go-querystring v1.1.0 // indirect
	github.com/google/readahead v0.0.0-20161222183148-eaceba169032 // indirect
	github.com/google/s2a-go v0.1.8 // indirect
	github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
	github.com/googleapis/gax-go/v2 v2.14.0 // indirect
	github.com/gopherjs/gopherjs v1.12.80 // indirect
	github.com/gorilla/handlers v1.5.1 // indirect
	github.com/gorilla/mux v1.8.1 // indirect
	github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
	github.com/grpc-ecosystem/go-grpc-middleware v1.1.0 // indirect
	github.com/hashicorp/errwrap v1.1.0 // indirect
	github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
	github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
	github.com/hashicorp/go-multierror v1.1.1 // indirect
	github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
	github.com/hashicorp/go-rootcerts v1.0.2 // indirect
	github.com/hashicorp/go-uuid v1.0.3 // indirect
	github.com/hashicorp/golang-lru v0.6.0 // indirect
	github.com/hashicorp/serf v0.10.1 // indirect
	github.com/jackc/pgpassfile v1.0.0 // indirect
	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
	github.com/jackc/puddle/v2 v2.2.2 // indirect
	github.com/jcmturner/aescts/v2 v2.0.0 // indirect
	github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect
	github.com/jcmturner/gofork v1.7.6 // indirect
	github.com/jcmturner/goidentity/v6 v6.0.1 // indirect
	github.com/jcmturner/rpc/v2 v2.0.3 // indirect
	github.com/jmespath/go-jmespath v0.4.0 // indirect
	github.com/jtolds/gls v4.20.0+incompatible // indirect
	github.com/klauspost/compress v1.17.11 // indirect
	github.com/klauspost/cpuid v1.3.1 // indirect
	github.com/klauspost/cpuid/v2 v2.2.3 // indirect
	github.com/klauspost/pgzip v1.2.5 // indirect
	github.com/klauspost/readahead v1.3.1 // indirect
	github.com/klauspost/reedsolomon v1.9.11 // indirect
	github.com/kr/fs v0.1.0 // indirect
	github.com/kylelemons/godebug v1.1.0 // indirect
	github.com/leodido/go-urn v1.4.0 // indirect
	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
	github.com/mattn/go-colorable v0.1.13 // indirect
	github.com/mattn/go-runewidth v0.0.13 // indirect
	github.com/miekg/dns v1.1.61 // indirect
	github.com/minio/highwayhash v1.0.2 // indirect
	github.com/minio/md5-simd v1.1.1 // indirect
	github.com/minio/selfupdate v0.3.1 // indirect
	github.com/minio/sha256-simd v1.0.1 // indirect
	github.com/minio/simdjson-go v0.2.1 // indirect
	github.com/minio/sio v0.2.1 // indirect
	github.com/mitchellh/go-homedir v1.1.0 // indirect
	github.com/mitchellh/mapstructure v1.5.0 // indirect
	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
	github.com/modern-go/reflect2 v1.0.2 // indirect
	github.com/montanaflynn/stats v0.7.0 // indirect
	github.com/mozillazg/go-httpheader v0.2.1 // indirect
	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
	github.com/ncw/directio v1.0.5 // indirect
	github.com/oklog/ulid v1.3.1 // indirect
	github.com/opentracing/opentracing-go v1.2.0 // indirect
	github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 // indirect
	github.com/philhofer/fwd v1.1.1 // indirect
	github.com/pierrec/lz4 v2.5.2+incompatible // indirect
	github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect
	github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c // indirect
	github.com/pingcap/kvproto v0.0.0-20230403051650-e166ae588106 // indirect
	github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
	github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
	github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7 // indirect
	github.com/prometheus/procfs v0.15.1 // indirect
	github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 // indirect
	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
	github.com/rivo/uniseg v0.2.0 // indirect
	github.com/rjeczalik/notify v0.9.3 // indirect
	github.com/rs/cors v1.7.0 // indirect
	github.com/rs/xid v1.2.1 // indirect
	github.com/russross/blackfriday/v2 v2.1.0 // indirect
	github.com/secure-io/sio-go v0.3.1 // indirect
	github.com/shirou/gopsutil/v3 v3.23.11 // indirect
	github.com/shoenig/go-m1cpu v0.1.6 // indirect
	github.com/smartystreets/assertions v1.2.0 // indirect
	github.com/spaolacci/murmur3 v1.1.0 // indirect
	github.com/syndtr/goleveldb v1.0.0 // indirect
	github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a // indirect
	github.com/tidwall/gjson v1.6.7 // indirect
	github.com/tidwall/match v1.0.3 // indirect
	github.com/tidwall/pretty v1.0.2 // indirect
	github.com/tidwall/sjson v1.0.4 // indirect
	github.com/tinylib/msgp v1.1.3 // indirect
	github.com/tklauser/go-sysconf v0.3.12 // indirect
	github.com/tklauser/numcpus v0.6.1 // indirect
	github.com/valyala/bytebufferpool v1.0.0 // indirect
	github.com/valyala/fasthttp v1.52.0 // indirect
	github.com/valyala/tcplisten v1.0.0 // indirect
	github.com/willf/bitset v1.1.11 // indirect
	github.com/willf/bloom v2.0.3+incompatible // indirect
	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
	github.com/yusufpapurcu/wmi v1.2.3 // indirect
	go.etcd.io/etcd/api/v3 v3.5.9 // indirect
	go.etcd.io/etcd/client/pkg/v3 v3.5.9 // indirect
	go.mongodb.org/mongo-driver v1.14.0 // indirect
	go.opencensus.io v0.24.0 // indirect
	go.opentelemetry.io/contrib/detectors/gcp v1.29.0 // indirect
	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
	go.opentelemetry.io/otel v1.29.0 // indirect
	go.opentelemetry.io/otel/metric v1.29.0 // indirect
	go.opentelemetry.io/otel/sdk v1.29.0 // indirect
	go.opentelemetry.io/otel/sdk/metric v1.29.0 // indirect
	go.opentelemetry.io/otel/trace v1.29.0 // indirect
	go.uber.org/atomic v1.11.0 // indirect
	go.uber.org/multierr v1.11.0 // indirect
	golang.org/x/arch v0.11.0 // indirect
	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
	golang.org/x/mod v0.26.0 // indirect
	golang.org/x/time v0.8.0 // indirect
	golang.org/x/tools v0.35.0 // indirect
	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
	google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 // indirect
	google.golang.org/genproto/googleapis/api v0.0.0-20241113202542-65e8d215514f // indirect
	google.golang.org/genproto/googleapis/rpc v0.0.0-20241118233622-e639e219e697 // indirect
	google.golang.org/grpc v1.67.2 // indirect
	google.golang.org/grpc/stats/opentelemetry v0.0.0-20240907200651-3ffb98b2c93a // indirect
	gopkg.in/ini.v1 v1.67.0 // indirect
	gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
	gopkg.in/yaml.v2 v2.4.0 // indirect
	gopkg.in/yaml.v3 v3.0.1 // indirect
	modernc.org/fileutil v1.0.0 // indirect
	xorm.io/builder v0.3.7 // indirect
)

replace github.com/minio/minio v0.0.0-20210206053228-97fe57bba92c => github.com/juicedata/minio v0.0.0-20251120043259-079fa6a601db

replace github.com/hanwen/go-fuse/v2 v2.1.1-0.20210611132105-24a1dfe6b4f8 => github.com/juicedata/go-fuse/v2 v2.1.1-0.20250807045235-112198daa7df

replace github.com/dgrijalva/jwt-go v3.2.0+incompatible => github.com/golang-jwt/jwt v3.2.1+incompatible

replace github.com/vbauerster/mpb/v7 v7.0.3 => github.com/juicedata/mpb/v7 v7.0.4-0.20231024073412-2b8d31be510b

replace xorm.io/xorm v1.0.7 => gitea.com/davies/xorm v1.0.8-0.20220528043536-552d84d1b34a

replace github.com/huaweicloud/huaweicloud-sdk-go-obs v3.21.12+incompatible => github.com/juicedata/huaweicloud-sdk-go-obs v3.22.12-0.20230228031208-386e87b5c091+incompatible

replace github.com/urfave/cli/v2 v2.19.3 => github.com/juicedata/cli/v2 v2.19.4-0.20230605075551-9c9c5c0dce83

replace github.com/vmware/go-nfs-client v0.0.0-20190605212624-d43b92724c1b => github.com/juicedata/go-nfs-client v0.0.0-20250220101412-d3a8c1ca64a1

replace github.com/mattn/go-colorable v0.1.13 => github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db

replace github.com/mattn/go-colorable v0.1.12 => github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db

replace github.com/mattn/go-colorable v0.1.4 => github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db

replace github.com/mattn/go-colorable v0.1.6 => github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db

replace github.com/mattn/go-colorable v0.1.9 => github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db

replace github.com/mattn/go-colorable v0.0.9 => github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db

replace github.com/cloudsoda/go-smb2 => github.com/juicedata/go-smb2 v0.0.0-20260310064141-58f27d06634e

replace github.com/hashicorp/golang-lru/v2 v2.0.7 => github.com/juicedata/golang-lru/v2 v2.0.8-0.20251126062551-1b321869f904


================================================
FILE: go.sum
================================================
cel.dev/expr v0.16.1 h1:NR0+oFYzR1CqLFhTAqg3ql59G9VfN8fKq1TCHJ6gq1g=
cel.dev/expr v0.16.1/go.mod h1:AsGA5zb3WruAEQeQng1RZdGEXmBj0jvMWh6l5SnNuC8=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE=
cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U=
cloud.google.com/go/auth v0.11.0 h1:Ic5SZz2lsvbYcWT5dfjNWgw6tTlGi2Wc8hyQSC9BstA=
cloud.google.com/go/auth v0.11.0/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI=
cloud.google.com/go/auth/oauth2adapt v0.2.6 h1:V6a6XDu2lTwPZWOawrAa9HUK+DB2zfJyTuciBG5hFkU=
cloud.google.com/go/auth/oauth2adapt v0.2.6/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8=
cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo=
cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k=
cloud.google.com/go/iam v1.2.2 h1:ozUSofHUGf/F4tCNy/mu9tHLTaxZFLOUiKzjcgWHGIA=
cloud.google.com/go/iam v1.2.2/go.mod h1:0Ys8ccaZHdI1dEUilwzqng/6ps2YB6vRsjIe00/+6JY=
cloud.google.com/go/logging v1.12.0 h1:ex1igYcGFd4S/RZWOCU51StlIEuey5bjqwH9ZYjHibk=
cloud.google.com/go/logging v1.12.0/go.mod h1:wwYBt5HlYP1InnrtYI0wtwttpVU1rifnMT7RejksUAM=
cloud.google.com/go/longrunning v0.6.2 h1:xjDfh1pQcWPEvnfjZmwjKQEcHnpz6lHjfy7Fo0MK+hc=
cloud.google.com/go/longrunning v0.6.2/go.mod h1:k/vIs83RN4bE3YCswdXC5PFfWVILjm3hpEUlSko4PiI=
cloud.google.com/go/monitoring v1.21.2 h1:FChwVtClH19E7pJ+e0xUhJPGksctZNVOk2UhMmblmdU=
cloud.google.com/go/monitoring v1.21.2/go.mod h1:hS3pXvaG8KgWTSz+dAdyzPrGUYmi2Q+WFX8g2hqVEZU=
cloud.google.com/go/storage v1.48.0 h1:FhBDHACbVtdPx7S/AbcKujPWiHvfO6F8OXGgCEbB2+o=
cloud.google.com/go/storage v1.48.0/go.mod h1:aFoDYNMAjv67lp+xcuZqjUKv/ctmplzQ3wJgodA7b+M=
cloud.google.com/go/trace v1.11.2 h1:4ZmaBdL8Ng/ajrgKqY5jfvzqMXbrDcBsUGXOT9aqTtI=
cloud.google.com/go/trace v1.11.2/go.mod h1:bn7OwXd4pd5rFuAnTrzBuoZ4ax2XQeG3qNgYmfCy0Io=
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
git.apache.org/thrift.git v0.13.0 h1:/3bz5WZ+sqYArk7MBBBbDufMxKKOA56/6JO6psDpUDY=
git.apache.org/thrift.git v0.13.0/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
gitea.com/davies/xorm v1.0.8-0.20220528043536-552d84d1b34a h1:awR9qREIs6qSnKr/cmSewVwDo74/kQ32x0CDEXUtiB8=
gitea.com/davies/xorm v1.0.8-0.20220528043536-552d84d1b34a/go.mod h1:uF9EtbhODq5kNWxMbnBEj8hRRZnlcNSz2t2N7HW/+A4=
gitea.com/xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a h1:lSA0F4e9A2NcQSqGqTOXqu2aRi/XEQxDCBwM8yJtE6s=
gitea.com/xorm/sqlfiddle v0.0.0-20180821085327-62ce714f951a/go.mod h1:EXuID2Zs0pAQhH8yz+DNjUbjppKQzKFAn28TMYPB6IU=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0 h1:GJHeeA2N7xrG3q30L2UXDyuWRzDM900/65j70wcM4Ww=
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0/go.mod h1:l38EPgmsp71HHLq9j7De57JcKOWPyhrsW1Awm1JS6K0=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc=
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY=
github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.5.0 h1:AifHbc4mg0x9zW52WOpKbsHaDKuRhlI7TVl47thgQ70=
github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.5.0/go.mod h1:T5RfihdXtBDxt1Ch2wobif3TvzTdumDy29kahv6AV9A=
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.1 h1:fXPMAmuh0gDuRDey0atC8cXBuKIlqCzCkL8sm1n9Ov0=
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.3.1/go.mod h1:SUZc9YRRHfx2+FAQKNDGrssXehqLpxmwRv2mC/5ntj4=
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c h1:/IBSNwUN8+eKzUzbJPqhK839ygXJ82sde8x3ogr6R28=
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU=
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.3.2 h1:o7IhLm0Msx3BaB+n3Ag7L8EVlByGnpq14C4YWiu/gL8=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DataDog/zstd v1.5.6 h1:LbEglqepa/ipmmQJUDnSsfvA8e8IStVcGaFWDuxvGOY=
github.com/DataDog/zstd v1.5.6/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.24.1 h1:pB2F2JKCj1Znmp2rwxxt1J0Fg0wezTMgWYk5Mpbi1kg=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.24.1/go.mod h1:itPGVDKf9cC/ov4MdvJ2QZ0khw4bfoo9jzwTJlaxy2k=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.48.1 h1:UQ0AhxogsIRZDkElkblfnwjc3IaltCm2HUMvezQaL7s=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.48.1/go.mod h1:jyqM3eLpJ3IbIFDTKVz2rF9T/xWGW0rIriGwnz8l9Tk=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.48.1 h1:oTX4vsorBZo/Zdum6OKPA4o7544hm6smoRv1QjpTwGo=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.48.1/go.mod h1:0wEl7vrAD8mehJyohS9HZy+WyEOaQO2mJx86Cvh93kM=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1 h1:8nn+rsCvTq9axyEh382S0PFLBeaFwNsT43IrPWzctRU=
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1/go.mod h1:viRWSEhtMZqz1rhwmOVKkWl6SwmVowfL9O2YR5gI2PE=
github.com/IBM/go-sdk-core/v5 v5.18.5 h1:g0JRl3sYXJczB/yuDlrN6x22LJ6jIxhp0Sa4ARNW60c=
github.com/IBM/go-sdk-core/v5 v5.18.5/go.mod h1:KonTFRR+8ZSgw5cxBSYo6E4WZoY1+7n1kfHM82VcjFU=
github.com/IBM/ibm-cos-sdk-go v1.12.1 h1:pWs5c5/j9PNJE1lIQhYtzpdCxu2fpvCq9PHs6/nDjyI=
github.com/IBM/ibm-cos-sdk-go v1.12.1/go.mod h1:7vmUThyAq4+AD1eEyGZi90ir06Z9YhsEzLBsdGPfcqo=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/QcloudApi/qcloud_sign_golang v0.0.0-20141224014652-e4130a326409/go.mod h1:1pk82RBxDY/JZnPQrtqHlUFfCctgdorsd9M06fMynOM=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8=
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo=
github.com/agiledragon/gomonkey/v2 v2.6.0 h1:RzdlW1ibfVipfXKy9U4zYumdHTIY7RoZwyXY3tXLYd8=
github.com/agiledragon/gomonkey/v2 v2.6.0/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY=
github.com/alecthomas/participle v0.2.1 h1:4AVLj1viSGa4LG5HDXKXrm5xRx19SB/rS/skPQB1Grw=
github.com/alecthomas/participle v0.2.1/go.mod h1:SW6HZGeZgSIpcUWX3fXpfZhuaWHnmoD5KCVaqSaNTkk=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alex-ant/gomath v0.0.0-20160516115720-89013a210a82 h1:7dONQ3WNZ1zy960TmkxJPuwoolZwL7xKtpcM04MBnt4=
github.com/alex-ant/gomath v0.0.0-20160516115720-89013a210a82/go.mod h1:nLnM0KdK1CmygvjpDUO6m1TjSsiQtL61juhNsvV/JVI=
github.com/alibabacloud-go/debug v1.0.0/go.mod h1:8gfgZCCAC3+SCzjWtY053FrOcd4/qlH6IHTI4QyICOc=
github.com/alibabacloud-go/debug v1.0.1 h1:MsW9SmUtbb1Fnt3ieC6NNZi6aEwrXfDksD4QA6GSbPg=
github.com/alibabacloud-go/debug v1.0.1/go.mod h1:8gfgZCCAC3+SCzjWtY053FrOcd4/qlH6IHTI4QyICOc=
github.com/alibabacloud-go/tea v1.2.2 h1:aTsR6Rl3ANWPfqeQugPglfurloyBJY85eFy7Gc1+8oU=
github.com/alibabacloud-go/tea v1.2.2/go.mod h1:CF3vOzEMAG+bR4WOql8gc2G9H3EkH3ZLAQdpmpXMgwk=
github.com/aliyun/alibabacloud-oss-go-sdk-v2 v1.2.1 h1:sOhpJdR/+lbQniznp3cYSfwQlXbVkT0ccuiZScBrI6Y=
github.com/aliyun/alibabacloud-oss-go-sdk-v2 v1.2.1/go.mod h1:FTzydeQVmR24FI0D6XWUOMKckjXehM/jgMn1xC+DA9M=
github.com/aliyun/credentials-go v1.4.5 h1:O76WYKgdy1oQYYiJkERjlA2dxGuvLRrzuO2ScrtGWSk=
github.com/aliyun/credentials-go v1.4.5/go.mod h1:Jm6d+xIgwJVLVWT561vy67ZRP4lPTQxMbEYRuT2Ti1U=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/apple/foundationdb/bindings/go v0.0.0-20211207225159-47b9a81d1c10 h1:xU6bzJilZ630rLUhRsqWgJjSl2PCn5uLrehoG6ntwls=
github.com/apple/foundationdb/bindings/go v0.0.0-20211207225159-47b9a81d1c10/go.mod h1:w63jdZTFCtvdjsUj5yrdKgjxaAD5uXQX6hJ7EaiLFRs=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so=
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/aws/aws-sdk-go-v2 v1.36.1 h1:iTDl5U6oAhkNPba0e1t1hrwAo02ZMqbrGq4k5JBWM5E=
github.com/aws/aws-sdk-go-v2 v1.36.1/go.mod h1:5PMILGVKiW32oDzjj6RU52yrNrDPUHcbZQYr1sM7qmM=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 h1:lL7IfaFzngfx0ZwUGOZdsFFnQ5uLvR0hWqqhyE7Q9M8=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7/go.mod h1:QraP0UcVlQJsmHfioCrveWOC1nbiWUl3ej08h4mXWoc=
github.com/aws/aws-sdk-go-v2/config v1.29.6 h1:fqgqEKK5HaZVWLQoLiC9Q+xDlSp+1LYidp6ybGE2OGg=
github.com/aws/aws-sdk-go-v2/config v1.29.6/go.mod h1:Ft+WLODzDQmCTHDvqAH1JfC2xxbZ0MxpZAcJqmE1LTQ=
github.com/aws/aws-sdk-go-v2/credentials v1.17.59 h1:9btwmrt//Q6JcSdgJOLI98sdr5p7tssS9yAsGe8aKP4=
github.com/aws/aws-sdk-go-v2/credentials v1.17.59/go.mod h1:NM8fM6ovI3zak23UISdWidyZuI1ghNe2xjzUZAyT+08=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28 h1:KwsodFKVQTlI5EyhRSugALzsV6mG/SGrdjlMXSZSdso=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28/go.mod h1:EY3APf9MzygVhKuPXAc5H+MkGb8k/DOSQjWS0LgkKqI=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32 h1:BjUcr3X3K0wZPGFg2bxOWW3VPN8rkE3/61zhP+IHviA=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32/go.mod h1:80+OGC/bgzzFFTUmcuwD0lb4YutwQeKLFpmt6hoWapU=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32 h1:m1GeXHVMJsRsUAqG6HjZWx9dj7F5TR+cF1bjyfYyBd4=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32/go.mod h1:IitoQxGfaKdVLNg0hD8/DXmAqNy0H4K2H2Sf91ti8sI=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 h1:Pg9URiobXy85kgFev3og2CuOZ8JZUBENF+dcgWBaYNk=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.27 h1:AmB5QxnD+fBFrg9LcqzkgF/CaYvMyU/BTlejG4t1S7Q=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.27/go.mod h1:Sai7P3xTiyv9ZUYO3IFxMnmiIP759/67iQbU4kdmkyU=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 h1:D4oz8/CzT9bAEYtVhSBmFj2dNOtaHOtMKc2vHBwYizA=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2/go.mod h1:Za3IHqTQ+yNcRHxu1OFucBh0ACZT4j4VQFF0BqpZcLY=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.8 h1:iwYS40JnrBeA9e9aI5S6KKN4EB2zR4iUVYN0nwVivz4=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.8/go.mod h1:Fm9Mi+ApqmFiknZtGpohVcBGvpTu542VC4XO9YudRi0=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13 h1:SYVGSFQHlchIcy6e7x12bsrxClCXSP5et8cqVhL8cuw=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13/go.mod h1:kizuDaLX37bG5WZaoxGPQR/LNFXpxp0vsUnqfkWXfNE=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.8 h1:/Mn7gTedG86nbpjT4QEKsN1D/fThiYe1qvq7WsBGNHg=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.8/go.mod h1:Ae3va9LPmvjj231ukHB6UeT8nS7wTPfC3tMZSZMwNYg=
github.com/aws/aws-sdk-go-v2/service/s3 v1.72.3 h1:WZOmJfCDV+4tYacLxpiojoAdT5sxTfB3nTqQNtZu+J4=
github.com/aws/aws-sdk-go-v2/service/s3 v1.72.3/go.mod h1:xMekrnhmJ5aqmyxtmALs7mlvXw5xRh+eYjOjvrIIFJ4=
github.com/aws/aws-sdk-go-v2/service/sso v1.24.15 h1:/eE3DogBjYlvlbhd2ssWyeuovWunHLxfgw3s/OJa4GQ=
github.com/aws/aws-sdk-go-v2/service/sso v1.24.15/go.mod h1:2PCJYpi7EKeA5SkStAmZlF6fi0uUABuhtF8ILHjGc3Y=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14 h1:M/zwXiL2iXUrHputuXgmO94TVNmcenPHxgLXLutodKE=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14/go.mod h1:RVwIw3y/IqxC2YEXSIkAzRDdEU1iRabDPaYjpGCbCGQ=
github.com/aws/aws-sdk-go-v2/service/sts v1.33.14 h1:TzeR06UCMUq+KA3bDkujxK1GVGy+G8qQN/QVYzGLkQE=
github.com/aws/aws-sdk-go-v2/service/sts v1.33.14/go.mod h1:dspXf/oYWGWo6DEvj98wpaTeqt5+DMidZD0A9BYTizc=
github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ=
github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
github.com/baidubce/bce-sdk-go v0.9.221 h1:x5uTXND33m5TE3UBXYhlePuXcJi5rxNnBBt+bP7kPe0=
github.com/baidubce/bce-sdk-go v0.9.221/go.mod h1:zbYJMQwE4IZuyrJiFO8tO8NbtYiKTFTbwh4eIsqjVdg=
github.com/bcicen/jstream v1.0.1 h1:BXY7Cu4rdmc0rhyTVyT3UkxAiX3bnLpKLas9btbH5ck=
github.com/bcicen/jstream v1.0.1/go.mod h1:9ielPxqFry7Y4Tg3j4BfjPocfJ3TbsRtXOAYXYmRuAQ=
github.com/beevik/ntp v0.3.0 h1:xzVrPrE4ziasFXgBVBZJDP0Wg/KpMwk2KHJ4Ba8GrDw=
github.com/beevik/ntp v0.3.0/go.mod h1:hIHWr+l3+/clUnF44zdK+CWW7fO8dR5cIylAQ76NRpg=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A=
github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/bytedance/mockey v1.2.14 h1:KZaFgPdiUwW+jOWFieo3Lr7INM1P+6adO3hxZhDswY8=
github.com/bytedance/mockey v1.2.14/go.mod h1:1BPHF9sol5R1ud/+0VEHGQq/+i2lN+GTsr3O2Q9IENY=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
github.com/ceph/go-ceph v0.18.0 h1:4WM6yAq/iqBDaeeADDiPKLqKiP0iZ4fffdgCr1lnOL4=
github.com/ceph/go-ceph v0.18.0/go.mod h1:cflETVTBNAQM6jdr7hpNHHFHKYiJiWWcAeRDrRx/1ng=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/charlievieth/fastwalk v1.0.14 h1:3Eh5uaFGwHZd8EGwTjJnSpBkfwfsak9h6ICgnWlhAyg=
github.com/charlievieth/fastwalk v1.0.14/go.mod h1:diVcUreiU1aQ4/Wu3NbxxH4/KYdKpLDojrQ1Bb2KgNY=
github.com/cheggaaa/pb v1.0.29 h1:FckUN5ngEk2LpvuG0fw1GEFx6LtyY2pWI/Z2QgCnEYo=
github.com/cheggaaa/pb v1.0.29/go.mod h1:W40334L7FMC5JKWldsTWbdGjLo0RxUKK73K+TuPxX30=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/clbanning/mxj v1.8.4 h1:HuhwZtbyvyOw+3Z1AowPkU87JkJUSv751ELWaiTpj8I=
github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudsoda/sddl v0.0.0-20250224235906-926454e91efc h1:0xCWmFKBmarCqqqLeM7jFBSw/Or81UEElFqO8MY+GDs=
github.com/cloudsoda/sddl v0.0.0-20250224235906-926454e91efc/go.mod h1:uvR42Hb/t52HQd7x5/ZLzZEK8oihrFpgnodIJ1vte2E=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78 h1:QVw89YDxXxEe+l8gU8ETbOasdwEV+avkR75ZzsVV9WI=
github.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
github.com/colinmarc/hdfs/v2 v2.4.0 h1:v6R8oBx/Wu9fHpdPoJJjpGSUxo8NhHIwrwsfhFvU9W0=
github.com/colinmarc/hdfs/v2 v2.4.0/go.mod h1:0NAO+/3knbMx6+5pCv+Hcbaz4xn/Zzbn9+WIib2rKVI=
github.com/coredns/coredns v1.4.0 h1:RubBkYmkByUqZWWkjRHvNLnUHgkRVqAWgSMmRFvpE1A=
github.com/coredns/coredns v1.4.0/go.mod h1:zASH/MVDgR6XZTbxvOnsZfffS+31vg6Ackf/wo1+AM0=
github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA=
github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb h1:GIzvVQ9UkUlOhSDlqmrQAAAUd6R3E+caIisNEyWXvNE=
github.com/coreos/pkg v0.0.0-20240122114842-bbd7aa9bf6fb/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 h1:iwZdTE0PVqJCos1vaoKsclOGD3ADKpshg3SRtYBbwso=
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM=
github.com/dave/jennifer v1.6.1/go.mod h1:nXbxhEmQfOZhWml3D1cDK5M1FLnMSozpbFN/m3RmGZc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davies/groupcache v0.0.0-20230821031435-e4e8362f58e1 h1:m8crlQg+91orxncf8Xt6utYTKi9N2PqbPvOnVmb2p24=
github.com/davies/groupcache v0.0.0-20230821031435-e4e8362f58e1/go.mod h1:rUkViuo3izQae5A7J4apO+ALkf5DqvVwKGzAbROmZUE=
github.com/dchest/siphash v1.2.1 h1:4cLinnzVJDKxTCl9B01807Yiy+W7ZzVHj/KIroQRvT4=
github.com/dchest/siphash v1.2.1/go.mod h1:q+IRvb2gOSrUnYoPqHiyHXS0FOBBOdl6tONBlVnOnt4=
github.com/denisenkom/go-mssqldb v0.0.0-20200428022330-06a60b6afbbc/go.mod h1:xbL0rPBG9cCiLr28tMa8zpbdarY27NDyej4t/EjAShU=
github.com/dgraph-io/badger/v4 v4.5.1 h1:7DCIXrQjo1LKmM96YD+hLVJ2EEsyyoWxJfpdd56HLps=
github.com/dgraph-io/badger/v4 v4.5.1/go.mod h1:qn3Be0j3TfV4kPbVoK0arXCD1/nr1ftth6sbL5jxdoA=
github.com/dgraph-io/ristretto/v2 v2.1.0 h1:59LjpOJLNDULHh8MC4UaegN52lC4JnO2dITsie/Pa8I=
github.com/dgraph-io/ristretto/v2 v2.1.0/go.mod h1:uejeqfYXpUomfse0+lO+13ATz4TypQYLJZzBSAemuB4=
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WAFKLNi6ZS0675eEUC9y3AlwSbQu1Y=
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/djherbis/atime v1.0.0 h1:ySLvBAM0EvOGaX7TI4dAM5lWj+RdJUCKtGSEHN8SGBg=
github.com/djherbis/atime v1.0.0/go.mod h1:5W+KBIuTwVGcqjIfaTwt+KSYX1o6uep8dtevevQP/f8=
github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d h1:QK8IYltsNy+5QZcDFbVkyInrs98/wHy1tfUTGG91sps=
github.com/dswarbrick/smart v0.0.0-20190505152634-909a45200d6d/go.mod h1:apXo4PA/BgBPrt66j0N45O2stlBTRowdip2igwcUWVc=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4=
github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/emmansun/gmsm v0.34.1 h1:7eMyHjB0AeoSZ+sB3FZE9gZOJBZFbtY0tmWJdVFkfc0=
github.com/emmansun/gmsm v0.34.1/go.mod h1:NtH8X3s0ywBIICiOHD6Jj6P4brHHN6qUOI/nSK/x1jQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.13.0 h1:HzkeUz1Knt+3bK+8LG1bxOO/jzWZmdxpwC51i202les=
github.com/envoyproxy/go-control-plane v0.13.0/go.mod h1:GRaKG3dwvFoTg4nj7aXdZnvMg4d7nvT/wl9WgVXn3Q8=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM=
github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4=
github.com/erikdubbelboer/gspt v0.0.0-20210805194459-ce36a5128377 h1:gT+RM6gdTIAzMT7HUvmT5mL8SyG8Wx7iS3+L0V34Km4=
github.com/erikdubbelboer/gspt v0.0.0-20210805194459-ce36a5128377/go.mod h1:v6o7m/E9bfvm79dE1iFiF+3T7zLBnrjYjkWMa1J+Hv0=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo=
github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M=
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/gammazero/toposort v0.1.1 h1:OivGxsWxF3U3+U80VoLJ+f50HcPU1MIqE1JlKzoJ2Eg=
github.com/gammazero/toposort v0.1.1/go.mod h1:H2cozTnNpMw0hg2VHAYsAxmkHXBYroNangj2NTBQDvw=
github.com/geoffgarside/ber v1.1.0 h1:qTmFG4jJbwiSzSXoNJeHcOprVzZ8Ulde2Rrrifu5U9w=
github.com/geoffgarside/ber v1.1.0/go.mod h1:jVPKeCbj6MvQZhwLYsGwaGI52oUorHoHKNecGT85ZCc=
github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8=
github.com/go-asn1-ber/asn1-ber v1.5.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
github.com/go-http-utils/headers v0.0.0-20181008091004-fed159eddc2a h1:v6zMvHuY9yue4+QkG/HQ/W67wvtQmWJ4SDo9aK/GIno=
github.com/go-http-utils/headers v0.0.0-20181008091004-fed159eddc2a/go.mod h1:I79BieaU4fxrw4LMXby6q5OS9XnoR9UIKLOzDFjUmuw=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-ldap/ldap/v3 v3.2.4 h1:PFavAq2xTgzo/loE8qNXcQaofAaqIpI4WgaLdv+1l3E=
github.com/go-ldap/ldap/v3 v3.2.4/go.mod h1:iYS1MdmrmceOJ1QOTnRXrIs7i3kloqtmGQjRvjKpyMg=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/errors v0.22.0 h1:c4xY/OLxUBSTiepAg3j/MHuAv5mJhnf53LLMWFB+u/w=
github.com/go-openapi/errors v0.22.0/go.mod h1:J3DmZScxCDufmIMsdOuDHxJbdOGC0xtUynjIx092vXE=
github.com/go-openapi/strfmt v0.23.0 h1:nlUS6BCqcnAk0pyhi9Y+kdDVZdZMHfEKQiS4HaMgO/c=
github.com/go-openapi/strfmt v0.23.0/go.mod h1:NrtIpfKtWIygRkKVsxh7XQMDQW5HKQl6S5ik2elW+K4=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.7.0/go.mod h1:xm76BBt941f7yWdGnI2DVPFFg1UK3YY04qifoXU3lOk=
github.com/go-playground/validator/v10 v10.19.0 h1:ol+5Fu+cSq9JD7SoSqe04GMI92cbn0+wvQ3bZ8b/AU4=
github.com/go-playground/validator/v10 v10.19.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/go-resty/resty/v2 v2.13.1 h1:x+LHXBI2nMB1vqndymf26quycC4aggYJ7DECYbiz03g=
github.com/go-resty/resty/v2 v2.13.1/go.mod h1:GznXlLxkq6Nh4sU59rPmUw3VtgpO3aS96ORAI6Q7d+0=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-sql-driver/mysql v1.9.1 h1:FrjNGn/BsJQjVRuSa8CBrM5BWA9BWoXXat3KrtSb/iI=
github.com/go-sql-driver/mysql v1.9.1/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw=
github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
github.com/gofrs/uuid v4.3.0+incompatible h1:CaSVZxm5B+7o45rtab4jC2G37WGYX1zQfuU2i6DSvnc=
github.com/gofrs/uuid v4.3.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY=
github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU=
github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/flatbuffers v24.12.23+incompatible h1:ubBKR94NR4pXUCY/MUsRVzd9umNW7ht7EG9hHfS9FX8=
github.com/google/flatbuffers v24.12.23+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc=
github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0=
github.com/google/readahead v0.0.0-20161222183148-eaceba169032 h1:6Be3nkuJFyRfCgr6qTIzmRp8y9QwDIbqy/nYr9WDPos=
github.com/google/readahead v0.0.0-20161222183148-eaceba169032/go.mod h1:qYysrqQXuV4tzsizt4oOQ6mrBZQ0xnQXP3ylXX8Jk5Y=
github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM=
github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw=
github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA=
github.com/googleapis/gax-go/v2 v2.14.0 h1:f+jMrjBPl+DL9nI4IQzLUxMq7XrAqFYB7hBPqMNIe8o=
github.com/googleapis/gax-go/v2 v2.14.0/go.mod h1:lhBCnjdLrWRaPvLWhmc8IS24m9mr07qSYnHncrgo+zk=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/gopherjs/gopherjs v1.12.80 h1:aC68NT6VK715WeUapxcPSFq/a3gZdS32HdtghdOIgAo=
github.com/gopherjs/gopherjs v1.12.80/go.mod h1:d55Q4EjGQHeJVms+9LGtXul6ykz5Xzx1E1gaXQXdimY=
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI=
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
github.com/grafana/pyroscope-go v1.2.1 h1:ewi38pE6XMnoHlZYhGxS3uH5TGKA7vDhkT1T3RVkjq0=
github.com/grafana/pyroscope-go v1.2.1/go.mod h1:zzT9QXQAp2Iz2ZdS216UiV8y9uXJYQiGE1q8v1FyhqU=
github.com/grafana/pyroscope-go/godeltaprof v0.1.8 h1:iwOtYXeeVSAeYefJNaxDytgjKtUuKQbJqgAIjlnicKg=
github.com/grafana/pyroscope-go/godeltaprof v0.1.8/go.mod h1:2+l7K7twW49Ct4wFluZD3tZ6e0SjanjcUUBPVD/UuGU=
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
github.com/grpc-ecosystem/go-grpc-middleware v1.1.0 h1:THDBEeQ9xZ8JEaCLyLQqXMMdRqNr0QAUJTIkQAUtFjg=
github.com/grpc-ecosystem/go-grpc-middleware v1.1.0/go.mod h1:f5nM7jw/oeRSadq3xCzHAvxcr8HZnzsqU6ILg/0NiiE=
github.com/hashicorp/consul/api v1.29.2 h1:aYyRn8EdE2mSfG14S1+L9Qkjtz8RzmaWh6AcNGRNwPw=
github.com/hashicorp/consul/api v1.29.2/go.mod h1:0YObcaLNDSbtlgzIRtmRXI1ZkeuK0trCBxwZQ4MYnIk=
github.com/hashicorp/consul/proto-public v0.6.2 h1:+DA/3g/IiKlJZb88NBn0ZgXrxJp2NlvCZdEyl+qxvL0=
github.com/hashicorp/consul/proto-public v0.6.2/go.mod h1:cXXbOg74KBNGajC+o8RlA502Esf0R9prcoJgiOX/2Tg=
github.com/hashicorp/consul/sdk v0.16.1 h1:V8TxTnImoPD5cj0U9Spl0TUxcytjcbbJeADFF07KdHg=
github.com/hashicorp/consul/sdk v0.16.1/go.mod h1:fSXvwxB2hmh1FMZCNl6PwX0Q/1wdWtHJcZ7Ea5tns0s=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc=
github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI=
github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-sockaddr v1.0.2 h1:ztczhD1jLxIRjVejw8gFomI1BQZOe2WoVOu0SyteCQc=
github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.2.1 h1:zEfKbn2+PDgroKdiOzqiE8rsmLqU2uwi5PB5pBJ3TkI=
github.com/hashicorp/go-version v1.2.1/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4=
github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc=
github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM=
github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0=
github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY=
github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/hungys/go-lz4 v0.0.0-20170805124057-19ff7f07f099 h1:heHZCso/ytvpYr+hp2cDxlZfA/jTw46aHSvT9kZnJ7o=
github.com/hungys/go-lz4 v0.0.0-20170805124057-19ff7f07f099/go.mod h1:h44tqw4M3GN0Woo9KBStxJxm8huNi+9+tOHoeqSvhaY=
github.com/iancoleman/strcase v0.3.0/go.mod h1:iwCmte+B7n89clKwxIoIXy/HfoL7AsD47ZCWhYzw7ho=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.7.3 h1:PO1wNKj/bTAwxSJnO1Z4Ai8j4magtqg2SLNjEDzcXQo=
github.com/jackc/pgx/v5 v5.7.3/go.mod h1:ncY89UGWxg82EykZUwSpUKEfccBGGYq1xjrOpsbsfGQ=
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM=
github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg=
github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo=
github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o=
github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg=
github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8=
github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs=
github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY=
github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/juicedata/cli/v2 v2.19.4-0.20230605075551-9c9c5c0dce83 h1:RyHTka3jCnTaUqfRYjlwcQlr53aasmkvHEbYLXthqr8=
github.com/juicedata/cli/v2 v2.19.4-0.20230605075551-9c9c5c0dce83/go.mod h1:1CNUng3PtjQMtRzJO4FMXBQvkGtuYRxxiR9xMa7jMwI=
github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db h1:esc0bVXkjEuyPLn7JXFhKBDztpM0dT0GYQn7CqaBB6w=
github.com/juicedata/go-colorable v0.0.0-20250208072043-a97a0c2023db/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/juicedata/go-fuse/v2 v2.1.1-0.20250807045235-112198daa7df h1:H3/AM/YZGPitgptMKBn3WrvWj7UrlhJSMHx4BrjuXMo=
github.com/juicedata/go-fuse/v2 v2.1.1-0.20250807045235-112198daa7df/go.mod h1:xKwi1cF7nXAOBCXujD5ie0ZKsxc8GGSA1rlMJc+8IJs=
github.com/juicedata/go-nfs-client v0.0.0-20250220101412-d3a8c1ca64a1 h1:GgH2ZG9inMYSme7zZb79z3QeOW70YusbJIVYjvqd508=
github.com/juicedata/go-nfs-client v0.0.0-20250220101412-d3a8c1ca64a1/go.mod h1:xOMqi3lOrcGe9uZLnSzgaq94Vc3oz6VPCNDLJUnXpKs=
github.com/juicedata/go-smb2 v0.0.0-20260310064141-58f27d06634e h1:M4iUt9qotJuRbZgD1TLMzy4BkVGidsOXh4YvHDNKhdY=
github.com/juicedata/go-smb2 v0.0.0-20260310064141-58f27d06634e/go.mod h1:CgWpFCFWzzEA5hVkhAc6DZZzGd3czx+BblvOzjmg6KA=
github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d h1:kpQMvNZJKGY3PTt7OSoahYc4nM0HY67SvK0YyS0GLwA=
github.com/juicedata/godaemon v0.0.0-20210629045518-3da5144a127d/go.mod h1:dlxKkLh3qAIPtgr2U/RVzsZJDuXA1ffg+Njikfmhvgw=
github.com/juicedata/gogfapi v0.0.0-20241204082332-ecd102647f80 h1:EPg/f3lhbAOjE2M0WpVi47Fk62mEmmPejRuGVdOFQww=
github.com/juicedata/gogfapi v0.0.0-20241204082332-ecd102647f80/go.mod h1:Ho5G4KgrgbMKW0buAJdOmYoJcOImkzznJQaLiATrsx4=
github.com/juicedata/golang-lru/v2 v2.0.8-0.20251126062551-1b321869f904 h1:oNtkL1jwrNMMcBlHNW1fhdl4quK7p1EdR7o1Rja5xpM=
github.com/juicedata/golang-lru/v2 v2.0.8-0.20251126062551-1b321869f904/go.mod h1:qnbgnNzfydwuHjSCApF4bdul+tZ8T3y1MkZG/OFczLA=
github.com/juicedata/huaweicloud-sdk-go-obs v3.22.12-0.20230228031208-386e87b5c091+incompatible h1:2/ttSmYoX+QMegpNyAJR0Y6aHcVk57F7RJit5xN2T/s=
github.com/juicedata/huaweicloud-sdk-go-obs v3.22.12-0.20230228031208-386e87b5c091+incompatible/go.mod h1:Ukwa8ffRQLV6QRwpqGioPjn2Wnf7TBDA4DbennDOqHE=
github.com/juicedata/minio v0.0.0-20251120043259-079fa6a601db h1:yGKlGEz3nOD2IovjI+V4O+eY1TPgOp/T6gOxMl9/xKI=
github.com/juicedata/minio v0.0.0-20251120043259-079fa6a601db/go.mod h1:1/4WHQKDOsWA1dd3ADrq9IE/jtFec9MHLy656kIXjNg=
github.com/juicedata/mpb/v7 v7.0.4-0.20231024073412-2b8d31be510b h1:0/6suPNZnrOlRlBaU/Bnitu8HiKkkLSzQhHbwQ9AysM=
github.com/juicedata/mpb/v7 v7.0.4-0.20231024073412-2b8d31be510b/go.mod h1:NXGsfPGx6G2JssqvEcULtDqUrxuuYs4llpv8W6ZUpzk=
github.com/juju/ratelimit v1.0.2 h1:sRxmtRiajbvrcLQT7S+JbqU0ntsb9W2yhSdNN8tWfaI=
github.com/juju/ratelimit v1.0.2/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.11.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s=
github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4=
github.com/klauspost/cpuid/v2 v2.0.2/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.3/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU=
github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/klauspost/readahead v1.3.1 h1:QqXNYvm+VvqYcbrRT4LojUciM0XrznFRIDrbHiJtu/0=
github.com/klauspost/readahead v1.3.1/go.mod h1:AH9juHzNH7xqdqFHrMRSHeH2Ps+vFf+kblDqzPFiLJg=
github.com/klauspost/reedsolomon v1.9.11 h1:n2kipJFo+CPqg7fH988XJXjqEyj14RJ8BYj7UayxPNg=
github.com/klauspost/reedsolomon v1.9.11/go.mod h1:nLvuzNvy1ZDNQW30IuMc2ZWCbiqrJgdLoUS2X8HAUVg=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/ks3sdklib/aws-sdk-go v1.6.0 h1:ejTeQ+l5l5mok7MM3Bz8WW4/kUVjGkPSSKqllgp1uMc=
github.com/ks3sdklib/aws-sdk-go v1.6.0/go.mod h1:jGcsV0dJgMmStAyqjkKVUu6F167pAXYZAS3LqoZMmtM=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/l0wl3vel/bunny-storage-go-sdk v0.0.10 h1:Vy8I4nGazW1QvwdIR3b/viHmBVFBf2i4RgR0dV0wJ/c=
github.com/l0wl3vel/bunny-storage-go-sdk v0.0.10/go.mod h1:2kvY9oZnsZR4QAvtkj8s7MuEl37dTARhQz7ICLpyD2M=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/lib/pq v1.7.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/magefile/mage v1.10.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
github.com/minio/cli v1.24.2 h1:J+fCUh9mhPLjN3Lj/YhklXvxj8mnyE/D6FpFduXJ2jg=
github.com/minio/cli v1.24.2/go.mod h1:bYxnK0uS629N3Bq+AOZZ+6lwF77Sodk4+UL9vNuXhOY=
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
github.com/minio/md5-simd v1.1.0/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw=
github.com/minio/md5-simd v1.1.1 h1:9ojcLbuZ4gXbB2sX53MKn8JUZ0sB/2wfwsEcRw+I08U=
github.com/minio/md5-simd v1.1.1/go.mod h1:XpBqgZULrMYD3R+M28PcmP0CkI7PEMzB3U77ZrKZ0Gw=
github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78 h1:v7OMbUnWkyRlO2MZ5AuYioELhwXF/BgZEznrQ1drBEM=
github.com/minio/minio-go/v7 v7.0.11-0.20210302210017-6ae69c73ce78/go.mod h1:mTh2uJuAbEqdhMVl6CMIIZLUeiMiWtJR4JB8/5g2skw=
github.com/minio/selfupdate v0.3.1 h1:BWEFSNnrZVMUWXbXIgLDNDjbejkmpAmZvy/nCz1HlEs=
github.com/minio/selfupdate v0.3.1/go.mod h1:b8ThJzzH7u2MkF6PcIra7KaXO9Khf6alWPvMSyTDCFM=
github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM=
github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
github.com/minio/simdjson-go v0.2.1 h1:nxYlp4Qd0w2pwLlif00l5vTFL6PcNAKpyHq27/pageg=
github.com/minio/simdjson-go v0.2.1/go.mod h1:JPUSkRykfSPS+AhO0YPA1h0l5vY7NqrF4zel2b12wxc=
github.com/minio/sio v0.2.1 h1:NjzKiIMSMcHediVQR0AFVx2tp7Wxh9tKPfDI3kH7aHQ=
github.com/minio/sio v0.2.1/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw=
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mmcloughlin/avo v0.0.0-20201105074841-5d2f697d268f/go.mod h1:6aKT4zZIrpGqB3RpFU14ByCSSyKY6LfJz4J/JJChHfI=
github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/montanaflynn/stats v0.7.0 h1:r3y12KyNxj/Sb/iOE46ws+3mS1+MZca1wlHQFPsY/JU=
github.com/montanaflynn/stats v0.7.0/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
github.com/mozillazg/go-httpheader v0.2.1 h1:geV7TrjbL8KXSyvghnFm+NyTux/hxwueTSrwhe88TQQ=
github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/ncw/directio v1.0.5 h1:JSUBhdjEvVaJvOoyPAbcW0fnd0tvRXD76wEfZ1KcQz4=
github.com/ncw/directio v1.0.5/go.mod h1:rX/pKEYkOXBGOggmcyJeJGloCkleSvphPx2eV3t6ROk=
github.com/ncw/swift/v2 v2.0.3 h1:8R9dmgFIWs+RiVlisCEfiQiik1hjuR0JnOkLxaP9ihg=
github.com/ncw/swift/v2 v2.0.3/go.mod h1:cbAO76/ZwcFrFlHdXPjaqWZ9R7Hdar7HpjRXBfbjigk=
github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/oliverisaac/shellescape v0.0.0-20220131224704-1b6c6b87b668 h1:WUilXdVrxYH+fFkmstviAOj1o9CfoW5O/Sd0LWPIVUA=
github.com/oliverisaac/shellescape v0.0.0-20220131224704-1b6c6b87b668/go.mod h1:EDgl+cvbmeOQUMTTH94gjXVtFHr8xDe5BiXhWn7Hf1E=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 h1:XeOYlK9W1uCmhjJSsY78Mcuh7MVkNjTzmHx1yBzizSU=
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14/go.mod h1:jVblp62SafmidSkvWrXyxAme3gaTfEtWwRPGz5cpvHg=
github.com/philhofer/fwd v1.1.1 h1:GdGcTjf5RNAxwS4QLsiMzJYj5KEvPJD3Abr261yRQXQ=
github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pierrec/lz4 v2.5.2+incompatible h1:WCjObylUIOlKy/+7Abdn34TLIkXiA4UWUMhxq9m9ZXI=
github.com/pierrec/lz4 v2.5.2+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTmyFqUwr+jcCvpVkK7sumiz+ko5H9eq4=
github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg=
github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c h1:CgbKAHto5CQgWM9fSBIvaxsJHuGP0uM74HXtv3MyyGQ=
github.com/pingcap/failpoint v0.0.0-20220801062533-2eaa32854a6c/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E=
github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw=
github.com/pingcap/kvproto v0.0.0-20230403051650-e166ae588106 h1:lOtHtTItLlc9R+Vg/hU2klOOs+pjKLT2Cq+CEJgjvIQ=
github.com/pingcap/kvproto v0.0.0-20230403051650-e166ae588106/go.mod h1:guCyM5N+o+ru0TsoZ1hi9lDjUMs2sIBjW3ARTEpVbnk=
github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw=
github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/sftp v1.13.5 h1:a3RLUqkyjYRtBTZJZ1VRrKbN3zhuPLlUc3sphVz81go=
github.com/pkg/sftp v1.13.5/go.mod h1:wHDZ0IZX6JcBYRK1TH9bcVq8G7TLpVHYIGJRFnmPfxg=
github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE=
github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo=
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7 h1:xoIK0ctDddBMnc74udxJYBqlo9Ylnsp1waqjLsnef20=
github.com/pquerna/ffjson v0.0.0-20190930134022-aa0246cd15f7/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M=
github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g=
github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk=
github.com/prometheus/client_golang v1.21.1/go.mod h1:U9NM32ykUErtVBxdvD3zfi+EuFkkaBvMb09mIfe0Zgg=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/prometheus/prometheus v0.54.1 h1:vKuwQNjnYN2/mDoWfHXDhAsz/68q/dQDb+YbcEqU7MQ=
github.com/prometheus/prometheus v0.54.1/go.mod h1:xlLByHhk2g3ycakQGrMaU8K7OySZx98BzeCR99991NY=
github.com/qingstor/qingstor-sdk-go/v4 v4.4.0 h1:tbItWtGB1TDfYzqK8dtm6tV+xWU5iYMwL37C6AL5dDs=
github.com/qingstor/qingstor-sdk-go/v4 v4.4.0/go.mod h1:mDVFtA7+bXQ5xoELTWkoFy1Ad13wtp8jtlnl/RU+zzM=
github.com/qiniu/dyn v1.3.0/go.mod h1:E8oERcm8TtwJiZvkQPbcAh0RL8jO1G0VXJMW3FAWdkk=
github.com/qiniu/go-sdk/v7 v7.25.2 h1:URwgZpxySdiwu2yQpHk93X4LXWHyFRp1x3Vmlk/YWvo=
github.com/qiniu/go-sdk/v7 v7.25.2/go.mod h1:dmKtJ2ahhPWFVi9o1D5GemmWoh/ctuB9peqTowyTO8o=
github.com/qiniu/x v1.10.5/go.mod h1:03Ni9tj+N2h2aKnAz+6N0Xfl8FwMEDRC2PAlxekASDs=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93 h1:UVArwN/wkKjMVhh2EQGC0tEc1+FqiLlvYXY5mQ2f8Wg=
github.com/rasky/go-xdr v0.0.0-20170124162913-1a41d1a06c93/go.mod h1:Nfe4efndBz4TibWycNE+lqyJZiMX4ycx+QKV8Ta0f/o=
github.com/redis/go-redis/v9 v9.16.0 h1:OotgqgLSRCmzfqChbQyG1PHC3tLNR89DG4jdOERSEP4=
github.com/redis/go-redis/v9 v9.16.0/go.mod h1:u410H11HMLoB+TP67dz8rL9s6QW2j76l0//kSOd3370=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rjeczalik/notify v0.9.3 h1:6rJAzHTGKXGj76sbRgDiDcYj/HniypXmSJo1SWakZeY=
github.com/rjeczalik/notify v0.9.3/go.mod h1:gF3zSOrafR9DQEWSE8TjfI9NkooDxbyT4UgRGKZA0lc=
github.com/rogpeppe/go-internal v1.0.1-alpha.1/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik=
github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
github.com/rs/xid v1.2.1 h1:mhH9Nq+C1fY2l1XIpgxIiUOfNpRBYH1kKcr+qfKgjRc=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/secure-io/sio-go v0.3.1 h1:dNvY9awjabXTYGsTF1PiCySl9Ltofk9GA3VdWlo7rRc=
github.com/secure-io/sio-go v0.3.1/go.mod h1:+xbkjDzPjwh4Axd07pRKSNriS9SCiYksWnZqdnfpQxs=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shirou/gopsutil/v3 v3.23.11 h1:i3jP9NjCPUz7FiZKxlMnODZkdSIp2gnzfrvsu9CuWEQ=
github.com/shirou/gopsutil/v3 v3.23.11/go.mod h1:1FrWgea594Jp7qmjHUUPlJDTPgcsb9mGnXDxavtikzM=
github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM=
github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ=
github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU=
github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k=
github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk=
github.com/shurcooL/httpfs v0.0.0-20181222201310-74dc9339e414/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg=
github.com/shurcooL/vfsgen v0.0.0-20180915214035-33ae1944be3f/go.mod h1:TrYk7fJVaAttu97ZZKrO9UbRa8izdowaMIZcxYMbVaw=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.8.0/go.mod h1:4GuYW9TZmE769R5STWrRakJc4UqQ3+QQ95fyz7ENv1A=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/assertions v1.2.0 h1:42S6lae5dvLc7BrLu/0ugRtcFVjoJNMC/N3yZFZkDFs=
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hgR6gDIPg=
github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/studio-b12/gowebdav v0.10.0 h1:Yewz8FFiadcGEu4hxS/AAJQlHelndqln1bns3hcJIYc=
github.com/studio-b12/gowebdav v0.10.0/go.mod h1:bHA7t77X/QFExdeAnDzK6vKM34kEZAcE1OX4MfiwjkE=
github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.563/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/kms v1.0.563/go.mod h1:uom4Nvi9W+Qkom0exYiJ9VWJjXwyxtPYTkKkaLMlfE0=
github.com/tencentyun/cos-go-sdk-v5 v0.7.63 h1:A+FH9HU8a2ozcd36VkrtiCooyzDPEOupIGWKgATtGlQ=
github.com/tencentyun/cos-go-sdk-v5 v0.7.63/go.mod h1:8+hG+mQMuRP/OIS9d83syAvXvrMj9HhkND6Q1fLghw0=
github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a h1:J/YdBZ46WKpXsxsW93SG+q0F8KI+yFrcIDT4c/RNoc4=
github.com/tiancaiamao/gp v0.0.0-20221230034425-4025bc8a4d4a/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM=
github.com/tidwall/gjson v1.6.7 h1:Mb1M9HZCRWEcXQ8ieJo7auYyyiSux6w9XN3AdTpxJrE=
github.com/tidwall/gjson v1.6.7/go.mod h1:zeFuBCIqD4sN/gmqBzZ4j7Jd6UcA2Fc56x7QFsv+8fI=
github.com/tidwall/match v1.0.3 h1:FQUVvBImDutD8wJLN6c5eMzWtjgONK9MwIBCOrUJKeE=
github.com/tidwall/match v1.0.3/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.0.2 h1:Z7S3cePv9Jwm1KwS0513MRaoUe3S01WPbLNV40pwWZU=
github.com/tidwall/pretty v1.0.2/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg=
github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y=
github.com/tikv/client-go/v2 v2.0.7 h1:nNTx/AR6n8Ew5VtHanFPG8NkFLLXbaNs5/K43DDma04=
github.com/tikv/client-go/v2 v2.0.7/go.mod h1:9JNUWtHN8cx8eynHZ9xzdPi5YY6aiN1ILQyhfPUBcMo=
github.com/tikv/pd/client v0.0.0-20230329114254-1948c247c2b1 h1:bzlSSzw+6qTwPs8pMcPI1bt27TAOhSdAEwdPCz6eBlg=
github.com/tikv/pd/client v0.0.0-20230329114254-1948c247c2b1/go.mod h1:3cTcfo8GRA2H/uSttqA3LvMfMSHVBJaXk3IgkFXFVxo=
github.com/tinylib/msgp v1.1.3 h1:3giwAkmtaEDLSV0MdO1lDLuPgklgPzmk8H9+So2BVfA=
github.com/tinylib/msgp v1.1.3/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk=
github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.52.0 h1:wqBQpxH71XW0e2g+Og4dzQM8pk34aFYlA1Ga8db7gU0=
github.com/valyala/fasthttp v1.52.0/go.mod h1:hf5C4QnVMkNXMspnsUlfM3WitlgYflyhHYoKol/szxQ=
github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8 h1:EVObHAr8DqpoJCVv6KYTle8FEImKhtkfcZetNqxDoJQ=
github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE=
github.com/vimeo/go-util v1.4.1 h1:UbNoaYH1eHv4LqBSH6zIItj+zKqbln0i01oY3iA/QPM=
github.com/vimeo/go-util v1.4.1/go.mod h1:r+yspV//C48HeMXV8nEvtUeNiIiGfVv3bbEHzOgudwE=
github.com/volcengine/ve-tos-golang-sdk/v2 v2.7.8 h1:/vB6jop4i70Ys8KAzK0xZfbMzMggJsTnIp6gZYnnSFM=
github.com/volcengine/ve-tos-golang-sdk/v2 v2.7.8/go.mod h1:IrjK84IJJTuOZOTMv/P18Ydjy/x+ow7fF7q11jAxXLM=
github.com/willf/bitset v1.1.11 h1:N7Z7E9UvjW+sGsEl7k/SJrvY2reP1A07MrGuCjIOjRE=
github.com/willf/bitset v1.1.11/go.mod h1:83CECat5yLh5zVOf4P1ErAgKA5UDvKtgyUABdr3+MjI=
github.com/willf/bloom v2.0.3+incompatible h1:QDacWdqcAUI1MPOwIQZRy9kOR7yxfyEmxX8Wdm2/JPA=
github.com/willf/bloom v2.0.3+incompatible/go.mod h1:MmAltL9pDMNTrvUkxdg0k0q5I0suxmuwp3KbyrZLOZ8=
github.com/winfsp/cgofuse v1.6.0 h1:re3W+HTd0hj4fISPBqfsrwyvPFpzqhDu8doJ9nOPDB0=
github.com/winfsp/cgofuse v1.6.0/go.mod h1:uxjoF2jEYT3+x+vC2KJddEGdk/LU8pRowXmyVMHSV5I=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw=
github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0=
go.etcd.io/etcd v3.3.27+incompatible h1:5hMrpf6REqTHV2LW2OclNpRtxI0k9ZplMemJsMSWju0=
go.etcd.io/etcd v3.3.27+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI=
go.etcd.io/etcd/api/v3 v3.5.9 h1:4wSsluwyTbGGmyjJktOf3wFQoTBIURXHnq9n/G/JQHs=
go.etcd.io/etcd/api/v3 v3.5.9/go.mod h1:uyAal843mC8uUVSLWz6eHa/d971iDGnCRpmKd2Z+X8k=
go.etcd.io/etcd/client/pkg/v3 v3.5.9 h1:oidDC4+YEuSIQbsR94rY9gur91UPL6DnxDCIYd2IGsE=
go.etcd.io/etcd/client/pkg/v3 v3.5.9/go.mod h1:y+CzeSmkMpWN2Jyu1npecjB9BBnABxGM4pN8cGuJeL4=
go.etcd.io/etcd/client/v3 v3.5.9 h1:r5xghnU7CwbUxD/fbUtRyJGaYNfDun8sp/gTr1hew6E=
go.etcd.io/etcd/client/v3 v3.5.9/go.mod h1:i/Eo5LrZ5IKqpbtpPDuaUnDOUv471oDg8cjQaUr2MbA=
go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80=
go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/contrib/detectors/gcp v1.29.0 h1:TiaiXB4DpGD3sdzNlYQxruQngn5Apwzi1X0DRhuGvDQ=
go.opentelemetry.io/contrib/detectors/gcp v1.29.0/go.mod h1:GW2aWZNwR2ZxDLdv8OyC2G8zkRoQBuURgV7RPQgcPoU=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0 h1:WDdP9acbMYjbKIyJUhTvtzj601sVJOqgWdUxSdR/Ysc=
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.29.0/go.mod h1:BLbf7zbNIONBLPwvFnwNHGj4zge8uTCM/UPIVW1Mq2I=
go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY=
go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ=
go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
go.uber.org/multierr v1.7.0/go.mod h1:7EAYxJLBy9rStEaz58O2t4Uvip6FSURkq8/ppBp95ak=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI=
go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60=
go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg=
golang.org/x/arch v0.0.0-20201008161808-52c3e6f60cff/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4=
golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4=
golang.org/x/arch v0.11.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.0.0-20180807104621-f027049dab0a/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200709230013-948cd5f35899/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA=
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.26.0 h1:EGMPT//Ezu+ylkCijjPc+f4Aih7sZvaAr+O3EHBxvZg=
golang.org/x/mod v0.26.0/go.mod h1:/j6NAhSk8iQ723BGAUyoAcn7SlD7s15Dp9Nd/SfeaFQ=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180807162357-acbc56fc7007/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190308142131-b40df0fb21c3/go.mod h1:25r3+/G6/xytQM8iWZKq3Hn0kr0rgFKPUNVEL/dr3z4=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20201105001634-bc3cf281b174/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0=
golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.210.0 h1:HMNffZ57OoZCRYSbdWVRoqOa8V8NIHLL0CzdBPLztWk=
google.golang.org/api v0.210.0/go.mod h1:B9XDZGnx2NtyjzVkOVTGrFSAVZgPcbedzKg/gTLwqBs=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 h1:ToEetK57OidYuqD4Q5w+vfEnPvPpuTwedCNVohYJfNk=
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697/go.mod h1:JJrvXBWRZaFMxBufik1a4RpFw4HhgVtBBWQeQgUj2cc=
google.golang.org/genproto/googleapis/api v0.0.0-20241113202542-65e8d215514f h1:M65LEviCfuZTfrfzwwEoxVtgvfkFkBUbFnRbxCXuXhU=
google.golang.org/genproto/googleapis/api v0.0.0-20241113202542-65e8d215514f/go.mod h1:Yo94eF2nj7igQt+TiJ49KxjIH8ndLYPZMIRSiRcEbg0=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241118233622-e639e219e697 h1:LWZqQOEjDyONlF1H6afSWpAL/znlREo2tHfLoe+8LMA=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241118233622-e639e219e697/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.67.2 h1:Lq11HW1nr5m4OYV+ZVy2BjOK78/zqnTx24vyDBP1JcQ=
google.golang.org/grpc v1.67.2/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
google.golang.org/grpc/stats/opentelemetry v0.0.0-20240907200651-3ffb98b2c93a h1:UIpYSuWdWHSzjwcAFRLjKcPXFZVVLXGEM23W+NWqipw=
google.golang.org/grpc/stats/opentelemetry v0.0.0-20240907200651-3ffb98b2c93a/go.mod h1:9i1T9n4ZinTUZGgzENMi8MDDgbGC5mqTS75JAv6xN3A=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.36.3 h1:82DV7MYdb8anAVi3qge1wSnMDrnKK7ebr+I0hHRN1BU=
google.golang.org/protobuf v1.36.3/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/ini.v1 v1.57.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/kothar/go-backblaze.v0 v0.0.0-20210124194846-35409b867216 h1:2TSTkQ8PMvGOD5eeqqRVv6Z9+BYI+bowK97RCr3W+9M=
gopkg.in/kothar/go-backblaze.v0 v0.0.0-20210124194846-35409b867216/go.mod h1:zJ2QpyDCYo1KvLXlmdnFlQAyF/Qfth0fB8239Qg7BIE=
gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc=
gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/urfave/cli.v1 v1.20.0/go.mod h1:vuBzUtMdQeixQj8LVd+/98pzhxNGQoyuPBlsXHOQNO0=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
modernc.org/fileutil v1.0.0 h1:Z1AFLZwl6BO8A5NldQg/xTSjGLetp+1Ubvl4alfGx8w=
modernc.org/fileutil v1.0.0/go.mod h1:JHsWpkrk/CnVV1H/eGlFf85BEpfkrp56ro8nojIq9Q8=
pgregory.net/rapid v0.5.3 h1:163N50IHFqr1phZens4FQOdPgfJscR7a562mjQqeo4M=
pgregory.net/rapid v0.5.3/go.mod h1:PY5XlDGj0+V1FCq0o192FdRhpKHGTRIWBgqjDBTrq04=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
xorm.io/builder v0.3.7 h1:2pETdKRK+2QG4mLX4oODHEhn5Z8j1m8sXa7jfu+/SZI=
xorm.io/builder v0.3.7/go.mod h1:aUW0S9eb9VCaPohFCH3j7czOx1PMW3i1HrSzbLYGBSE=


================================================
FILE: hack/autocomplete/bash_autocomplete
================================================
#! /bin/bash

_cli_bash_autocomplete() {
  if [[ "${COMP_WORDS[0]}" != "source" ]]; then
    local cur opts base
    COMPREPLY=()
    cur="${COMP_WORDS[COMP_CWORD]}"
    if [[ "$cur" == "-"* ]]; then
      opts=$( ${COMP_WORDS[@]:0:$COMP_CWORD} ${cur} --generate-bash-completion )
    else
      opts=$( ${COMP_WORDS[@]:0:$COMP_CWORD} --generate-bash-completion )
    fi
    COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
    return 0
  fi
}

complete -o bashdefault -o default -o nospace -F _cli_bash_autocomplete juicefs


================================================
FILE: hack/autocomplete/zsh_autocomplete
================================================
#compdef juicefs

_cli_zsh_autocomplete() {
  local -a opts
  local cur
  cur=${words[-1]}
  if [[ "$cur" == "-"* ]]; then
    opts=("${(@f)$(_CLI_ZSH_AUTOCOMPLETE_HACK=1 ${words[@]:0:#words[@]-1} ${cur} --generate-bash-completion)}")
  else
    opts=("${(@f)$(_CLI_ZSH_AUTOCOMPLETE_HACK=1 ${words[@]:0:#words[@]-1} --generate-bash-completion)}")
  fi

  if [[ "${opts[1]}" != "" ]]; then
    _describe 'values' opts
  else
    _files
  fi

  return
}

compdef _cli_zsh_autocomplete juicefs


================================================
FILE: hack/builder/Dockerfile
================================================
FROM ghcr.io/gythialy/golang-cross:v1.21.9-0

RUN apt-get update && apt-get install -y musl-tools && apt-get -y autoremove && \
    apt-get clean && rm -rf /var/cache/apt/* /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    git config --global --add safe.directory /go/src/github.com/juicedata/juicefs && \
    curl -fsSL -o /tmp/aarch64-linux-musl-cross.tgz https://musl.cc/aarch64-linux-musl-cross.tgz && \
    tar -xf /tmp/aarch64-linux-musl-cross.tgz -C /usr/local/ && rm -f /tmp/aarch64-linux-musl-cross.tgz


================================================
FILE: hack/builder/sdk.Dockerfile
================================================
FROM centos:7

RUN yum install -y java-1.8.0-openjdk maven git gcc make \
  && ln -s /go/bin/go /usr/local/bin/go \
  && rm -rf /var/cache/yum


================================================
FILE: hack/winfsp_headers/fuse.h
================================================
/**
 * @file fuse/fuse.h
 * WinFsp FUSE compatible API.
 *
 * This file is derived from libfuse/include/fuse.h:
 *     FUSE: Filesystem in Userspace
 *     Copyright 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
 *
 * @copyright 2015-2020 Bill Zissimopoulos
 */
/*
 * This file is part of WinFsp.
 *
 * You can redistribute it and/or modify it under the terms of the GNU
 * General Public License version 3 as published by the Free Software
 * Foundation.
 *
 * Licensees holding a valid commercial license may use this software
 * in accordance with the commercial license agreement provided in
 * conjunction with the software.  The terms and conditions of any such
 * commercial license agreement shall govern, supersede, and render
 * ineffective any application of the GPLv3 license to this software,
 * notwithstanding of any reference thereto in the software or
 * associated repository.
 */

#ifndef FUSE_H_
#define FUSE_H_

#include "fuse_common.h"

#ifdef __cplusplus
extern "C" {
#endif

struct fuse;

typedef int (*fuse_fill_dir_t)(void *buf, const char *name,
    const struct fuse_stat *stbuf, fuse_off_t off);
typedef struct fuse_dirhandle *fuse_dirh_t;
typedef int (*fuse_dirfil_t)(fuse_dirh_t h, const char *name,
    int type, fuse_ino_t ino);

struct fuse_operations
{
    /* S - supported by WinFsp */
    /* S */ int (*getattr)(const char *path, struct fuse_stat *stbuf);
    /* S */ int (*getdir)(const char *path, fuse_dirh_t h, fuse_dirfil_t filler);
    /* S */ int (*readlink)(const char *path, char *buf, size_t size);
    /* S */ int (*mknod)(const char *path, fuse_mode_t mode, fuse_dev_t dev);
    /* S */ int (*mkdir)(const char *path, fuse_mode_t mode);
    /* S */ int (*unlink)(const char *path);
    /* S */ int (*rmdir)(const char *path);
    /* S */ int (*symlink)(const char *dstpath, const char *srcpath);
    /* S */ int (*rename)(const char *oldpath, const char *newpath);
    /* _ */ int (*link)(const char *srcpath, const char *dstpath);
    /* S */ int (*chmod)(const char *path, fuse_mode_t mode);
    /* S */ int (*chown)(const char *path, fuse_uid_t uid, fuse_gid_t gid);
    /* S */ int (*truncate)(const char *path, fuse_off_t size);
    /* S */ int (*utime)(const char *path, struct fuse_utimbuf *timbuf);
    /* S */ int (*open)(const char *path, struct fuse_file_info *fi);
    /* S */ int (*read)(const char *path, char *buf, size_t size, fuse_off_t off,
        struct fuse_file_info *fi);
    /* S */ int (*write)(const char *path, const char *buf, size_t size, fuse_off_t off,
        struct fuse_file_info *fi);
    /* S */ int (*statfs)(const char *path, struct fuse_statvfs *stbuf);
    /* S */ int (*flush)(const char *path, struct fuse_file_info *fi);
    /* S */ int (*release)(const char *path, struct fuse_file_info *fi);
    /* S */ int (*fsync)(const char *path, int datasync, struct fuse_file_info *fi);
    /* S */ int (*setxattr)(const char *path, const char *name, const char *value, size_t size,
        int flags);
    /* S */ int (*getxattr)(const char *path, const char *name, char *value, size_t size);
    /* S */ int (*listxattr)(const char *path, char *namebuf, size_t size);
    /* S */ int (*removexattr)(const char *path, const char *name);
    /* S */ int (*opendir)(const char *path, struct fuse_file_info *fi);
    /* S */ int (*readdir)(const char *path, void *buf, fuse_fill_dir_t filler, fuse_off_t off,
        struct fuse_file_info *fi);
    /* S */ int (*releasedir)(const char *path, struct fuse_file_info *fi);
    /* S */ int (*fsyncdir)(const char *path, int datasync, struct fuse_file_info *fi);
    /* S */ void *(*init)(struct fuse_conn_info *conn);
    /* S */ void (*destroy)(void *data);
    /* _ */ int (*access)(const char *path, int mask);
    /* S */ int (*create)(const char *path, fuse_mode_t mode, struct fuse_file_info *fi);
    /* S */ int (*ftruncate)(const char *path, fuse_off_t off, struct fuse_file_info *fi);
    /* S */ int (*fgetattr)(const char *path, struct fuse_stat *stbuf, struct fuse_file_info *fi);
    /* _ */ int (*lock)(const char *path,
        struct fuse_file_info *fi, int cmd, struct fuse_flock *lock);
    /* S */ int (*utimens)(const char *path, const struct fuse_timespec tv[2]);
    /* _ */ int (*bmap)(const char *path, size_t blocksize, uint64_t *idx);
    /* _ */ unsigned int flag_nullpath_ok:1;
    /* _ */ unsigned int flag_nopath:1;
    /* _ */ unsigned int flag_utime_omit_ok:1;
    /* _ */ unsigned int flag_reserved:29;
    /* S */ int (*ioctl)(const char *path, int cmd, void *arg, struct fuse_file_info *fi,
        unsigned int flags, void *data);
    /* _ */ int (*poll)(const char *path, struct fuse_file_info *fi,
        struct fuse_pollhandle *ph, unsigned *reventsp);
    /* FUSE 2.9 */
    /* _ */ int (*write_buf)(const char *path,
        struct fuse_bufvec *buf, fuse_off_t off, struct fuse_file_info *fi);
    /* _ */ int (*read_buf)(const char *path,
        struct fuse_bufvec **bufp, size_t size, fuse_off_t off, struct fuse_file_info *fi);
    /* _ */ int (*flock)(const char *path, struct fuse_file_info *, int op);
    /* _ */ int (*fallocate)(const char *path, int mode, fuse_off_t off, fuse_off_t len,
        struct fuse_file_info *fi);
    /* OSXFUSE */
    /* _ */ int (*reserved00)();
    /* _ */ int (*reserved01)();
    /* _ */ int (*reserved02)();
    /* _ */ int (*statfs_x)(const char *path, struct fuse_statfs *stbuf);
    /* _ */ int (*setvolname)(const char *volname);
    /* _ */ int (*exchange)(const char *oldpath, const char *newpath, unsigned long flags);
    /* _ */ int (*getxtimes)(const char *path,
        struct fuse_timespec *bkuptime, struct fuse_timespec *crtime);
    /* _ */ int (*setbkuptime)(const char *path, const struct fuse_timespec *tv);
    /* S */ int (*setchgtime)(const char *path, const struct fuse_timespec *tv);
    /* S */ int (*setcrtime)(const char *path, const struct fuse_timespec *tv);
    /* S */ int (*chflags)(const char *path, uint32_t flags);
    /* _ */ int (*setattr_x)(const char *path, struct fuse_setattr_x *attr);
    /* _ */ int (*fsetattr_x)(const char *path, struct fuse_setattr_x *attr,
        struct fuse_file_info *fi);
};

struct fuse_context
{
    struct fuse *fuse;
    fuse_uid_t uid;
    fuse_gid_t gid;
    fuse_pid_t pid;
    void *private_data;
    fuse_mode_t umask;
};

#define fuse_main(argc, argv, ops, data)\
    fuse_main_real(argc, argv, ops, sizeof *(ops), data)

FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_main_real)(struct fsp_fuse_env *env,
    int argc, char *argv[],
    const struct fuse_operations *ops, size_t opsize, void *data);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_is_lib_option)(struct fsp_fuse_env *env,
    const char *opt);
FSP_FUSE_API struct fuse *FSP_FUSE_API_NAME(fsp_fuse_new)(struct fsp_fuse_env *env,
    struct fuse_chan *ch, struct fuse_args *args,
    const struct fuse_operations *ops, size_t opsize, void *data);
FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_destroy)(struct fsp_fuse_env *env,
    struct fuse *f);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_loop)(struct fsp_fuse_env *env,
    struct fuse *f);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_loop_mt)(struct fsp_fuse_env *env,
    struct fuse *f);
FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_exit)(struct fsp_fuse_env *env,
    struct fuse *f);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_exited)(struct fsp_fuse_env *env,
    struct fuse *f);
FSP_FUSE_API struct fuse_context *FSP_FUSE_API_NAME(fsp_fuse_get_context)(struct fsp_fuse_env *env);

FSP_FUSE_SYM(
int fuse_main_real(int argc, char *argv[],
    const struct fuse_operations *ops, size_t opsize, void *data),
{
    return FSP_FUSE_API_CALL(fsp_fuse_main_real)
        (fsp_fuse_env(), argc, argv, ops, opsize, data);
})

FSP_FUSE_SYM(
int fuse_is_lib_option(const char *opt),
{
    return FSP_FUSE_API_CALL(fsp_fuse_is_lib_option)
        (fsp_fuse_env(), opt);
})

FSP_FUSE_SYM(
struct fuse *fuse_new(struct fuse_chan *ch, struct fuse_args *args,
    const struct fuse_operations *ops, size_t opsize, void *data),
{
    return FSP_FUSE_API_CALL(fsp_fuse_new)
        (fsp_fuse_env(), ch, args, ops, opsize, data);
})

FSP_FUSE_SYM(
void fuse_destroy(struct fuse *f),
{
    FSP_FUSE_API_CALL(fsp_fuse_destroy)
        (fsp_fuse_env(), f);
})

FSP_FUSE_SYM(
int fuse_loop(struct fuse *f),
{
    return FSP_FUSE_API_CALL(fsp_fuse_loop)
        (fsp_fuse_env(), f);
})

FSP_FUSE_SYM(
int fuse_loop_mt(struct fuse *f),
{
    return FSP_FUSE_API_CALL(fsp_fuse_loop_mt)
        (fsp_fuse_env(), f);
})

FSP_FUSE_SYM(
void fuse_exit(struct fuse *f),
{
    FSP_FUSE_API_CALL(fsp_fuse_exit)
        (fsp_fuse_env(), f);
})

FSP_FUSE_SYM(
int fuse_exited(struct fuse *f),
{
    return FSP_FUSE_API_CALL(fsp_fuse_exited)
        (fsp_fuse_env(), f);
})

FSP_FUSE_SYM(
struct fuse_context *fuse_get_context(void),
{
    return FSP_FUSE_API_CALL(fsp_fuse_get_context)
        (fsp_fuse_env());
})

FSP_FUSE_SYM(
int fuse_getgroups(int size, fuse_gid_t list[]),
{
    (void)size;
    (void)list;
    return -ENOSYS;
})

FSP_FUSE_SYM(
int fuse_interrupted(void),
{
    return 0;
})

FSP_FUSE_SYM(
int fuse_invalidate(struct fuse *f, const char *path),
{
    (void)f;
    (void)path;
    return -EINVAL;
})

FSP_FUSE_SYM(
int fuse_notify_poll(struct fuse_pollhandle *ph),
{
    (void)ph;
    return 0;
})

FSP_FUSE_SYM(
struct fuse_session *fuse_get_session(struct fuse *f),
{
    return (struct fuse_session *)f;
})

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: hack/winfsp_headers/fuse_common.h
================================================
/**
 * @file fuse/fuse_common.h
 * WinFsp FUSE compatible API.
 *
 * This file is derived from libfuse/include/fuse_common.h:
 *     FUSE: Filesystem in Userspace
 *     Copyright 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
 *
 * @copyright 2015-2020 Bill Zissimopoulos
 */
/*
 * This file is part of WinFsp.
 *
 * You can redistribute it and/or modify it under the terms of the GNU
 * General Public License version 3 as published by the Free Software
 * Foundation.
 *
 * Licensees holding a valid commercial license may use this software
 * in accordance with the commercial license agreement provided in
 * conjunction with the software.  The terms and conditions of any such
 * commercial license agreement shall govern, supersede, and render
 * ineffective any application of the GPLv3 license to this software,
 * notwithstanding of any reference thereto in the software or
 * associated repository.
 */

#ifndef FUSE_COMMON_H_
#define FUSE_COMMON_H_

#include "winfsp_fuse.h"
#include "fuse_opt.h"

#ifdef __cplusplus
extern "C" {
#endif

#define FUSE_MAJOR_VERSION              2
#define FUSE_MINOR_VERSION              8
#define FUSE_MAKE_VERSION(maj, min)     ((maj) * 10 + (min))
#define FUSE_VERSION                    FUSE_MAKE_VERSION(FUSE_MAJOR_VERSION, FUSE_MINOR_VERSION)

#define FUSE_CAP_ASYNC_READ             (1 << 0)
#define FUSE_CAP_POSIX_LOCKS            (1 << 1)
#define FUSE_CAP_ATOMIC_O_TRUNC         (1 << 3)
#define FUSE_CAP_EXPORT_SUPPORT         (1 << 4)
#define FUSE_CAP_BIG_WRITES             (1 << 5)
#define FUSE_CAP_DONT_MASK              (1 << 6)
#define FUSE_CAP_ALLOCATE               (1 << 27)   /* reserved (OSXFUSE) */
#define FUSE_CAP_EXCHANGE_DATA          (1 << 28)   /* reserved (OSXFUSE) */
#define FUSE_CAP_CASE_INSENSITIVE       (1 << 29)   /* file system is case insensitive */
#define FUSE_CAP_VOL_RENAME             (1 << 30)   /* reserved (OSXFUSE) */
#define FUSE_CAP_XTIMES                 (1 << 31)   /* reserved (OSXFUSE) */

#define FSP_FUSE_CAP_READDIR_PLUS       (1 << 21)   /* file system supports enhanced readdir */
#define FSP_FUSE_CAP_READ_ONLY          (1 << 22)   /* file system is marked read-only */
#define FSP_FUSE_CAP_STAT_EX            (1 << 23)   /* file system supports fuse_stat_ex */
#define FSP_FUSE_CAP_CASE_INSENSITIVE   FUSE_CAP_CASE_INSENSITIVE

#define FUSE_IOCTL_COMPAT               (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED         (1 << 1)
#define FUSE_IOCTL_RETRY                (1 << 2)
#define FUSE_IOCTL_MAX_IOV              256

/* from FreeBSD */
#define FSP_FUSE_UF_HIDDEN              0x00008000
#define FSP_FUSE_UF_READONLY            0x00001000
#define FSP_FUSE_UF_SYSTEM              0x00000080
#define FSP_FUSE_UF_ARCHIVE             0x00000800
#if !defined(UF_HIDDEN)
#define UF_HIDDEN                       FSP_FUSE_UF_HIDDEN
#endif
#if !defined(UF_READONLY)
#define UF_READONLY                     FSP_FUSE_UF_READONLY
#endif
#if !defined(UF_SYSTEM)
#define UF_SYSTEM                       FSP_FUSE_UF_SYSTEM
#endif
#if !defined(UF_ARCHIVE)
#define UF_ARCHIVE                      FSP_FUSE_UF_ARCHIVE
#endif

struct fuse_file_info
{
    int flags;
    unsigned int fh_old;
    int writepage;
    unsigned int direct_io:1;
    unsigned int keep_cache:1;
    unsigned int flush:1;
    unsigned int nonseekable:1;
    unsigned int padding:28;
    uint64_t fh;
    uint64_t lock_owner;
};

struct fuse_conn_info
{
    unsigned proto_major;
    unsigned proto_minor;
    unsigned async_read;
    unsigned max_write;
    unsigned max_readahead;
    unsigned capable;
    unsigned want;
    unsigned reserved[25];
};

struct fuse_session;
struct fuse_chan;
struct fuse_pollhandle;
struct fuse_bufvec;
struct fuse_statfs;
struct fuse_setattr_x;

FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_version)(struct fsp_fuse_env *env);
FSP_FUSE_API struct fuse_chan *FSP_FUSE_API_NAME(fsp_fuse_mount)(struct fsp_fuse_env *env,
    const char *mountpoint, struct fuse_args *args);
FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_unmount)(struct fsp_fuse_env *env,
    const char *mountpoint, struct fuse_chan *ch);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_parse_cmdline)(struct fsp_fuse_env *env,
    struct fuse_args *args,
    char **mountpoint, int *multithreaded, int *foreground);
FSP_FUSE_API int32_t FSP_FUSE_API_NAME(fsp_fuse_ntstatus_from_errno)(struct fsp_fuse_env *env,
    int err);

FSP_FUSE_SYM(
int fuse_version(void),
{
    return FSP_FUSE_API_CALL(fsp_fuse_version)
        (fsp_fuse_env());
})

FSP_FUSE_SYM(
struct fuse_chan *fuse_mount(const char *mountpoint, struct fuse_args *args),
{
    return FSP_FUSE_API_CALL(fsp_fuse_mount)
        (fsp_fuse_env(), mountpoint, args);
})

FSP_FUSE_SYM(
void fuse_unmount(const char *mountpoint, struct fuse_chan *ch),
{
    FSP_FUSE_API_CALL(fsp_fuse_unmount)
        (fsp_fuse_env(), mountpoint, ch);
})

FSP_FUSE_SYM(
int fuse_parse_cmdline(struct fuse_args *args,
    char **mountpoint, int *multithreaded, int *foreground),
{
    return FSP_FUSE_API_CALL(fsp_fuse_parse_cmdline)
        (fsp_fuse_env(), args, mountpoint, multithreaded, foreground);
})

FSP_FUSE_SYM(
void fuse_pollhandle_destroy(struct fuse_pollhandle *ph),
{
    (void)ph;
})

FSP_FUSE_SYM(
int fuse_daemonize(int foreground),
{
    return fsp_fuse_daemonize(foreground);
})

FSP_FUSE_SYM(
int fuse_set_signal_handlers(struct fuse_session *se),
{
    return fsp_fuse_set_signal_handlers(se);
})

FSP_FUSE_SYM(
void fuse_remove_signal_handlers(struct fuse_session *se),
{
    (void)se;
    fsp_fuse_set_signal_handlers(0);
})

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: hack/winfsp_headers/fuse_opt.h
================================================
/**
 * @file fuse/fuse_opt.h
 * WinFsp FUSE compatible API.
 *
 * This file is derived from libfuse/include/fuse_opt.h:
 *     FUSE: Filesystem in Userspace
 *     Copyright 2001-2007  Miklos Szeredi <miklos@szeredi.hu>
 *
 * @copyright 2015-2020 Bill Zissimopoulos
 */
/*
 * This file is part of WinFsp.
 *
 * You can redistribute it and/or modify it under the terms of the GNU
 * General Public License version 3 as published by the Free Software
 * Foundation.
 *
 * Licensees holding a valid commercial license may use this software
 * in accordance with the commercial license agreement provided in
 * conjunction with the software.  The terms and conditions of any such
 * commercial license agreement shall govern, supersede, and render
 * ineffective any application of the GPLv3 license to this software,
 * notwithstanding of any reference thereto in the software or
 * associated repository.
 */

#ifndef FUSE_OPT_H_
#define FUSE_OPT_H_

#include "winfsp_fuse.h"

#ifdef __cplusplus
extern "C" {
#endif

#define FUSE_OPT_KEY(templ, key)        { templ, -1, key }
#define FUSE_OPT_END                    { NULL, 0, 0 }

#define FUSE_OPT_KEY_OPT                -1
#define FUSE_OPT_KEY_NONOPT             -2
#define FUSE_OPT_KEY_KEEP               -3
#define FUSE_OPT_KEY_DISCARD            -4

#define FUSE_ARGS_INIT(argc, argv)      { argc, argv, 0 }

struct fuse_opt
{
    const char *templ;
    unsigned int offset;
    int value;
};

struct fuse_args
{
    int argc;
    char **argv;
    int allocated;
};

typedef int (*fuse_opt_proc_t)(void *data, const char *arg, int key,
    struct fuse_args *outargs);

FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_parse)(struct fsp_fuse_env *env,
    struct fuse_args *args, void *data,
    const struct fuse_opt opts[], fuse_opt_proc_t proc);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_add_arg)(struct fsp_fuse_env *env,
    struct fuse_args *args, const char *arg);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_insert_arg)(struct fsp_fuse_env *env,
    struct fuse_args *args, int pos, const char *arg);
FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_opt_free_args)(struct fsp_fuse_env *env,
    struct fuse_args *args);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_add_opt)(struct fsp_fuse_env *env,
    char **opts, const char *opt);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_add_opt_escaped)(struct fsp_fuse_env *env,
    char **opts, const char *opt);
FSP_FUSE_API int FSP_FUSE_API_NAME(fsp_fuse_opt_match)(struct fsp_fuse_env *env,
    const struct fuse_opt opts[], const char *opt);

FSP_FUSE_SYM(
int fuse_opt_parse(struct fuse_args *args, void *data,
    const struct fuse_opt opts[], fuse_opt_proc_t proc),
{
    return FSP_FUSE_API_CALL(fsp_fuse_opt_parse)
        (fsp_fuse_env(), args, data, opts, proc);
})

FSP_FUSE_SYM(
int fuse_opt_add_arg(struct fuse_args *args, const char *arg),
{
    return FSP_FUSE_API_CALL(fsp_fuse_opt_add_arg)
        (fsp_fuse_env(), args, arg);
})

FSP_FUSE_SYM(
int fuse_opt_insert_arg(struct fuse_args *args, int pos, const char *arg),
{
    return FSP_FUSE_API_CALL(fsp_fuse_opt_insert_arg)
        (fsp_fuse_env(), args, pos, arg);
})

FSP_FUSE_SYM(
void fuse_opt_free_args(struct fuse_args *args),
{
    FSP_FUSE_API_CALL(fsp_fuse_opt_free_args)
        (fsp_fuse_env(), args);
})

FSP_FUSE_SYM(
int fuse_opt_add_opt(char **opts, const char *opt),
{
    return FSP_FUSE_API_CALL(fsp_fuse_opt_add_opt)
        (fsp_fuse_env(), opts, opt);
})

FSP_FUSE_SYM(
int fuse_opt_add_opt_escaped(char **opts, const char *opt),
{
    return FSP_FUSE_API_CALL(fsp_fuse_opt_add_opt_escaped)
        (fsp_fuse_env(), opts, opt);
})

FSP_FUSE_SYM(
int fuse_opt_match(const struct fuse_opt opts[], const char *opt),
{
    return FSP_FUSE_API_CALL(fsp_fuse_opt_match)
        (fsp_fuse_env(), opts, opt);
})

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: hack/winfsp_headers/winfsp_fuse.h
================================================
/**
 * @file fuse/winfsp_fuse.h
 * WinFsp FUSE compatible API.
 *
 * @copyright 2015-2020 Bill Zissimopoulos
 */
/*
 * This file is part of WinFsp.
 *
 * You can redistribute it and/or modify it under the terms of the GNU
 * General Public License version 3 as published by the Free Software
 * Foundation.
 *
 * Licensees holding a valid commercial license may use this software
 * in accordance with the commercial license agreement provided in
 * conjunction with the software.  The terms and conditions of any such
 * commercial license agreement shall govern, supersede, and render
 * ineffective any application of the GPLv3 license to this software,
 * notwithstanding of any reference thereto in the software or
 * associated repository.
 */

#ifndef FUSE_WINFSP_FUSE_H_INCLUDED
#define FUSE_WINFSP_FUSE_H_INCLUDED

#include <errno.h>
#include <stdint.h>
#if !defined(WINFSP_DLL_INTERNAL)
#include <stdlib.h>
#endif

#ifdef __cplusplus
extern "C" {
#endif

#if !defined(FSP_FUSE_API)
#if defined(WINFSP_DLL_INTERNAL)
#define FSP_FUSE_API                    __declspec(dllexport)
#else
#define FSP_FUSE_API                    __declspec(dllimport)
#endif
#endif

#if !defined(FSP_FUSE_API_NAME)
#define FSP_FUSE_API_NAME(n)            (n)
#endif

#if !defined(FSP_FUSE_API_CALL)
#define FSP_FUSE_API_CALL(n)            (n)
#endif

#if !defined(FSP_FUSE_SYM)
#if !defined(CYGFUSE)
#define FSP_FUSE_SYM(proto, ...)        static inline proto { __VA_ARGS__ }
#else
#define FSP_FUSE_SYM(proto, ...)        proto;
#endif
#endif

#define FSP_FUSE_DEVICE_TYPE            (0x8000 | 'W' | 'F' * 0x100) /* DeviceIoControl -> ioctl */
#define FSP_FUSE_CTLCODE_FROM_IOCTL(cmd)\
    (FSP_FUSE_DEVICE_TYPE << 16) | (((cmd) & 0x0fff) << 2)
#define FSP_FUSE_IOCTL(cmd, isiz, osiz) \
    (                                   \
        (((osiz) != 0) << 31) |         \
        (((isiz) != 0) << 30) |         \
        (((isiz) | (osiz)) << 16) |     \
        (cmd)                           \
    )

/*
 * FUSE uses a number of types (notably: struct stat) that are OS specific.
 * Furthermore there are sometimes multiple definitions of the same type even
 * within the same OS. This is certainly true on Windows, where these types
 * are not even native.
 *
 * For this reason we will define our own fuse_* types which represent the
 * types as the WinFsp DLL expects to see them. We will define these types
 * to be compatible with the equivalent Cygwin types as we want WinFsp-FUSE
 * to be usable from Cygwin.
 */

#define FSP_FUSE_STAT_FIELD_DEFN        \
    fuse_dev_t st_dev;                  \
    fuse_ino_t st_ino;                  \
    fuse_mode_t st_mode;                \
    fuse_nlink_t st_nlink;              \
    fuse_uid_t st_uid;                  \
    fuse_gid_t st_gid;                  \
    fuse_dev_t st_rdev;                 \
    fuse_off_t st_size;                 \
    struct fuse_timespec st_atim;       \
    struct fuse_timespec st_mtim;       \
    struct fuse_timespec st_ctim;       \
    fuse_blksize_t st_blksize;          \
    fuse_blkcnt_t st_blocks;            \
    struct fuse_timespec st_birthtim;
#define FSP_FUSE_STAT_EX_FIELD_DEFN     \
    FSP_FUSE_STAT_FIELD_DEFN            \
    uint32_t st_flags;                  \
    uint32_t st_reserved32[3];          \
    uint64_t st_reserved64[2];

#if defined(_WIN64) || defined(_WIN32)

typedef uint32_t fuse_uid_t;
typedef uint32_t fuse_gid_t;
typedef int32_t fuse_pid_t;

typedef uint32_t fuse_dev_t;
typedef uint64_t fuse_ino_t;
typedef uint32_t fuse_mode_t;
typedef uint16_t fuse_nlink_t;
typedef int64_t fuse_off_t;

#if defined(_WIN64)
typedef uint64_t fuse_fsblkcnt_t;
typedef uint64_t fuse_fsfilcnt_t;
#else
typedef uint32_t fuse_fsblkcnt_t;
typedef uint32_t fuse_fsfilcnt_t;
#endif
typedef int32_t fuse_blksize_t;
typedef int64_t fuse_blkcnt_t;

#if defined(_WIN64)
struct fuse_utimbuf
{
    int64_t actime;
    int64_t modtime;
};
struct fuse_timespec
{
    int64_t tv_sec;
    int64_t tv_nsec;
};
#else
struct fuse_utimbuf
{
    int32_t actime;
    int32_t modtime;
};
struct fuse_timespec
{
    int32_t tv_sec;
    int32_t tv_nsec;
};
#endif

#if !defined(FSP_FUSE_USE_STAT_EX)
struct fuse_stat
{
    FSP_FUSE_STAT_FIELD_DEFN
};
#else
struct fuse_stat
{
    FSP_FUSE_STAT_EX_FIELD_DEFN
};
#endif

#if defined(_WIN64)
struct fuse_statvfs
{
    uint64_t f_bsize;
    uint64_t f_frsize;
    fuse_fsblkcnt_t f_blocks;
    fuse_fsblkcnt_t f_bfree;
    fuse_fsblkcnt_t f_bavail;
    fuse_fsfilcnt_t f_files;
    fuse_fsfilcnt_t f_ffree;
    fuse_fsfilcnt_t f_favail;
    uint64_t f_fsid;
    uint64_t f_flag;
    uint64_t f_namemax;
};
#else
struct fuse_statvfs
{
    uint32_t f_bsize;
    uint32_t f_frsize;
    fuse_fsblkcnt_t f_blocks;
    fuse_fsblkcnt_t f_bfree;
    fuse_fsblkcnt_t f_bavail;
    fuse_fsfilcnt_t f_files;
    fuse_fsfilcnt_t f_ffree;
    fuse_fsfilcnt_t f_favail;
    uint32_t f_fsid;
    uint32_t f_flag;
    uint32_t f_namemax;
};
#endif

struct fuse_flock
{
    int16_t l_type;
    int16_t l_whence;
    fuse_off_t l_start;
    fuse_off_t l_len;
    fuse_pid_t l_pid;
};

#if defined(WINFSP_DLL_INTERNAL)
#define FSP_FUSE_ENV_INIT               \
    {                                   \
        'W',                            \
        MemAlloc, MemFree,              \
        fsp_fuse_daemonize,             \
        fsp_fuse_set_signal_handlers,   \
        0/*conv_to_win_path*/,          \
        0/*winpid_to_pid*/,             \
        { 0 },                          \
    }
#else
#define FSP_FUSE_ENV_INIT               \
    {                                   \
        'W',                            \
        malloc, free,                   \
        fsp_fuse_daemonize,             \
        fsp_fuse_set_signal_handlers,   \
        0/*conv_to_win_path*/,          \
        0/*winpid_to_pid*/,             \
        { 0 },                          \
    }
#endif

#elif defined(__CYGWIN__)

#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/types.h>
#include <utime.h>

#define fuse_uid_t                      uid_t
#define fuse_gid_t                      gid_t
#define fuse_pid_t                      pid_t

#define fuse_dev_t                      dev_t
#define fuse_ino_t                      ino_t
#define fuse_mode_t                     mode_t
#define fuse_nlink_t                    nlink_t
#define fuse_off_t                      off_t

#define fuse_fsblkcnt_t                 fsblkcnt_t
#define fuse_fsfilcnt_t                 fsfilcnt_t
#define fuse_blksize_t                  blksize_t
#define fuse_blkcnt_t                   blkcnt_t

#define fuse_utimbuf                    utimbuf
#define fuse_timespec                   timespec

#if !defined(FSP_FUSE_USE_STAT_EX)
#define fuse_stat                       stat
#else
struct fuse_stat
{
    FSP_FUSE_STAT_EX_FIELD_DEFN
};
#endif
#define fuse_statvfs                    statvfs
#define fuse_flock                      flock

#define FSP_FUSE_ENV_INIT               \
    {                                   \
        'C',                            \
        malloc, free,                   \
        fsp_fuse_daemonize,             \
        fsp_fuse_set_signal_handlers,   \
        fsp_fuse_conv_to_win_path,      \
        fsp_fuse_winpid_to_pid,         \
        { 0 },                          \
    }

/*
 * Note that long is 8 bytes long in Cygwin64 and 4 bytes long in Win64.
 * For this reason we avoid using long anywhere in these headers.
 */

#else
#error unsupported environment
#endif

struct fuse_stat_ex
{
    FSP_FUSE_STAT_EX_FIELD_DEFN
};

struct fsp_fuse_env
{
    unsigned environment;
    void *(*memalloc)(size_t);
    void (*memfree)(void *);
    int (*daemonize)(int);
    int (*set_signal_handlers)(void *);
    char *(*conv_to_win_path)(const char *);
    fuse_pid_t (*winpid_to_pid)(uint32_t);
    void (*reserved[2])();
};

FSP_FUSE_API void FSP_FUSE_API_NAME(fsp_fuse_signal_handler)(int sig);

#if defined(_WIN64) || defined(_WIN32)

static inline int fsp_fuse_daemonize(int foreground)
{
    (void)foreground;
    return 0;
}

static inline int fsp_fuse_set_signal_handlers(void *se)
{
    (void)se;
    return 0;
}

#elif defined(__CYGWIN__)

static inline int fsp_fuse_daemonize(int foreground)
{
    int daemon(int nochdir, int noclose);
    int chdir(const char *path);

    if (!foreground)
    {
        if (-1 == daemon(0, 0))
            return -1;
    }
    else
        chdir("/");

    return 0;
}

static inline void *fsp_fuse_signal_thread(void *psigmask)
{
    int sig;

    if (0 == sigwait((sigset_t *)psigmask, &sig))
        FSP_FUSE_API_CALL(fsp_fuse_signal_handler)(sig);

    return 0;
}

static inline int fsp_fuse_set_signal_handlers(void *se)
{
#define FSP_FUSE_SET_SIGNAL_HANDLER(sig, newha)\
    if (-1 != sigaction((sig), 0, &oldsa) &&\
        oldsa.sa_handler == (se ? SIG_DFL : (newha)))\
    {\
        newsa.sa_handler = se ? (newha) : SIG_DFL;\
        sigaction((sig), &newsa, 0);\
    }
#define FSP_FUSE_SIGADDSET(sig)\
    if (-1 != sigaction((sig), 0, &oldsa) &&\
        oldsa.sa_handler == SIG_DFL)\
        sigaddset(&sigmask, (sig));

    static sigset_t sigmask;
    static pthread_t sigthr;
    struct sigaction oldsa, newsa;

    // memset instead of initializer to avoid GCC -Wmissing-field-initializers warning
    memset(&newsa, 0, sizeof newsa);

    if (0 != se)
    {
        if (0 == sigthr)
        {
            FSP_FUSE_SET_SIGNAL_HANDLER(SIGPIPE, SIG_IGN);

            sigemptyset(&sigmask);
            FSP_FUSE_SIGADDSET(SIGHUP);
            FSP_FUSE_SIGADDSET(SIGINT);
            FSP_FUSE_SIGADDSET(SIGTERM);
            if (0 != pthread_sigmask(SIG_BLOCK, &sigmask, 0))
                return -1;

            if (0 != pthread_create(&sigthr, 0, fsp_fuse_signal_thread, &sigmask))
                return -1;
        }
    }
    else
    {
        if (0 != sigthr)
        {
            pthread_cancel(sigthr);
            pthread_join(sigthr, 0);
            sigthr = 0;

            if (0 != pthread_sigmask(SIG_UNBLOCK, &sigmask, 0))
                return -1;
            sigemptyset(&sigmask);

            FSP_FUSE_SET_SIGNAL_HANDLER(SIGPIPE, SIG_IGN);
        }
    }

    return 0;

#undef FSP_FUSE_SIGADDSET
#undef FSP_FUSE_SET_SIGNAL_HANDLER
}

static inline char *fsp_fuse_conv_to_win_path(const char *path)
{
    void *cygwin_create_path(unsigned, const void *);
    return (char *)cygwin_create_path(
        0/*CCP_POSIX_TO_WIN_A*/ | 0x100/*CCP_RELATIVE*/,
        path);
}

static inline fuse_pid_t fsp_fuse_winpid_to_pid(uint32_t winpid)
{
    pid_t cygwin_winpid_to_pid(int winpid);
    pid_t pid = cygwin_winpid_to_pid(winpid);
    return -1 != pid ? pid : (fuse_pid_t)winpid;
}
#endif


static inline struct fsp_fuse_env *fsp_fuse_env(void)
{
    static struct fsp_fuse_env env = FSP_FUSE_ENV_INIT;
    return &env;
}

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: integration/Makefile
================================================

all: s3test webdav ioctl

s3test:
	pip install awscli==1.27.153
	bash s3gateway_test.sh

webdav:
	cd /home/travis/.m2/litmus-0.13 ; for i in "basic" "copymove" "http"; do sudo ./$${i} http://127.0.0.1:9009 root 1234; done

ioctl:
	bash ioctl_test.sh /tmp/jfs-unit-test/ioctl_test 2>/dev/null


================================================
FILE: integration/ioctl_test.sh
================================================
#!/bin/bash

#  JuiceFS, Copyright 2021 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

test_dir=$1
if [ ! -d "$test_dir" ]; then
    mkdir "$test_dir"
fi

function cleanup() {
    code=$?
    if [ $code -eq 0 ]; then
      echo "ioctl test passed"
    else
      echo "ioctl test failed"
    fi
    trap - EXIT
    sudo chattr -R "=" "$test_dir"
    rm -rf "$test_dir"
    exit $code
}

function exec_should_failed() {
  eval "$1"
  if [ $? -eq 0 ]; then
      echo "$1 should fail"
      exit 1
  fi
}

function exec_should_success() {
  eval "$1"
  if [ $? -ne 0 ]; then
      echo "$1 should success"
      exit 1
  fi
}

a_test_dir="$test_dir"/a
sudo chattr -R "=" "${test_dir:?}"
sudo rm -rf "${test_dir:?}"/*
mkdir "$a_test_dir"

trap cleanup INT EXIT

{
  touch "$a_test_dir"/afile
  exec_should_failed 'sudo chattr "+u" $a_test_dir/afile'
  exec_should_success 'sudo chattr "+a" $a_test_dir/afile'
  exec_should_success '[[ "$(lsattr $a_test_dir/afile | awk -F " " "{print \$1}")" =~ "a" ]]'
  exec_should_failed "echo aa > $a_test_dir/afile"
  exec_should_failed "rm -rf $a_test_dir/afile"
  touch "$a_test_dir/tmpfile"
  exec_should_failed "mv -f $a_test_dir/tmpfile $a_test_dir/afile"
  exec_should_failed "mv -f $a_test_dir/afile $a_test_dir/tmpfile"
  exec_should_failed "ln $a_test_dir/afile $a_test_dir/linkfile"
  echo "12345" >> "$a_test_dir"/afile
  exec_should_success '[ "$(cat "$a_test_dir"/afile)" == "12345" ]'

  # FIXME: sudo chattr "+a" $a_test_dir/fallocatefile random failed
  touch "$a_test_dir"/fallocatefile
  exec_should_success 'sudo chattr "+a" $a_test_dir/fallocatefile'
  exec_should_success '[[ "$(lsattr $a_test_dir/fallocatefile | awk -F " " "{print \$1}")" =~ "a" ]]'
  exec_should_failed 'fallocate -l 1k -n $a_test_dir/fallocatefile'
}


{
  mkdir -p "$a_test_dir"/adir/child_dir1/child_dir2
  touch "$a_test_dir"/adir/file
  exec_should_success 'sudo chattr "+a" $a_test_dir/adir'
  exec_should_success '[[ "$(lsattr -d $a_test_dir/adir | awk -F " " "{print \$1}")" =~ "a" ]]'
  exec_should_failed 'rm -rf $a_test_dir/adir'
  exec_should_failed 'rm -rf $a_test_dir/adir/file'
  exec_should_success 'touch "$a_test_dir"/adir/child_dir1/child_file'
  exec_should_success 'rm -rf $a_test_dir/adir/child_dir1/child_dir2'
  exec_should_success 'rm -rf $a_test_dir/adir/child_dir1/child_file'
  exec_should_failed 'rm -rf $a_test_dir/adir/child_dir1'

  exec_should_success 'touch $a_test_dir/adir/tmpfile'
  exec_should_success 'echo 123 > $a_test_dir/adir/tmpfile'
  exec_should_success 'echo 123 >> $a_test_dir/adir/tmpfile'

  exec_should_failed 'mv -f $a_test_dir/adir/tmpfile $a_test_dir/adir/file'
  exec_should_failed 'mv -f $a_test_dir/adir/file $a_test_dir/adir/tmpfile'
  touch "$a_test_dir"/tfile
  exec_should_success 'mv -f $a_test_dir/tfile $a_test_dir/adir/file2'
}


i_test_dir="$test_dir"/i
sudo chattr -R "=" "${i_test_dir:?}"
sudo rm -rf "${i_test_dir:?}"/*
mkdir "$i_test_dir"

{
  touch "$i_test_dir"/ifile
  exec_should_success 'sudo chattr "+i" "$i_test_dir"/ifile'
  exec_should_success '[[ "$(lsattr $i_test_dir/ifile | awk -F " " "{print \$1}")" =~ "i" ]]'

  exec_should_failed "echo aa > $i_test_dir/ifile"
  exec_should_failed "echo aa >> $i_test_dir/ifile"
  exec_should_failed "rm -rf $i_test_dir/ifile"
  touch "$i_test_dir/tmpfile"
  exec_should_failed "mv -f $i_test_dir/tmpfile $i_test_dir/ifile"
  exec_should_failed "mv -f $i_test_dir/ifile $a_test_dir/tmpfile"
  exec_should_failed "ln $i_test_dir/ifile $i_test_dir/linkfile"

  touch "$i_test_dir"/fallocatefile
  exec_should_success 'sudo chattr "+i" $i_test_dir/fallocatefile'
  exec_should_success '[[ "$(lsattr $i_test_dir/fallocatefile | awk -F " " "{print \$1}")" =~ "i" ]]'
  exec_should_failed 'fallocate -l 1k -n $i_test_dir/fallocatefile'
}

{
  mkdir -p "$i_test_dir"/idir/child_dir1/child_dir2
  touch "$i_test_dir"/idir/file

  exec_should_success 'sudo chattr "+i" $i_test_dir/idir'
  exec_should_success '[[ "$(lsattr -d $i_test_dir/idir | awk -F " " "{print \$1}")" =~ "i" ]]'
  exec_should_success 'touch "$i_test_dir"/idir/child_dir1/child_file'
  exec_should_success 'rm -rf $i_test_dir/idir/child_dir1/child_dir2'
  exec_should_success 'rm -rf $i_test_dir/idir/child_dir1/child_file'
  exec_should_failed 'rm -rf $i_test_dir/idir'
  exec_should_failed 'rm -rf $i_test_dir/idir/file'
  exec_should_failed 'rm -rf $i_test_dir/idir/child_dir1'

  exec_should_failed 'touch $i_test_dir/idir/tmpfile'
  exec_should_success 'echo 123 > $i_test_dir/idir/file'
  exec_should_success 'echo 123 >> $i_test_dir/idir/file'

  exec_should_failed 'mv -f $i_test_dir/idir/tmpfile $i_test_dir/idir/file'
  exec_should_failed 'mv -f $i_test_dir/idir/file $i_test_dir/idir/tmpfile'
  touch "$i_test_dir"/tfile
  exec_should_failed 'mv -f $i_test_dir/tfile $i_test_dir/idir/file2'
}


================================================
FILE: integration/s3gateway_test.sh
================================================
#!/bin/bash

#  Mint (C) 2017-2020 Minio, Inc.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

# environment

os="linux"
if [[ `uname  -a` =~ "Darwin" ]];then
    os="mac"
fi
echo "os=$os"

set -x

MINT_DATA_DIR=testdata
SERVER_ENDPOINT="127.0.0.1:9008"
ACCESS_KEY="testUser"
SECRET_KEY="testUserPassword"
ENABLE_HTTPS=0
SERVER_REGION=us-east-1
ENABLE_VIRTUAL_STYLE=0

# macos need bash 4.0+
# create testdata
declare -A data_file_map
data_file_map["datafile-0-b"]="0"
data_file_map["datafile-1-b"]="1"
data_file_map["datafile-1-kB"]="1K"
data_file_map["datafile-10-kB"]="10K"
data_file_map["datafile-33-kB"]="33K"
data_file_map["datafile-100-kB"]="100K"
data_file_map["datafile-1.03-MB"]="1056K"
data_file_map["datafile-1-MB"]="1M"
data_file_map["datafile-5-MB"]="5M"
data_file_map["datafile-5243880-b"]="5243880"
data_file_map["datafile-6-MB"]="6M"
data_file_map["datafile-10-MB"]="10M"
data_file_map["datafile-11-MB"]="11M"
data_file_map["datafile-65-MB"]="65M"
data_file_map["datafile-129-MB"]="129M"

mkdir -p "$MINT_DATA_DIR"


if [ ! "$(ls $MINT_DATA_DIR)" ]; then
    for filename in "${!data_file_map[@]}"; do
        echo "creating $MINT_DATA_DIR/$filename"
        if ! shred -n 1 -s "${data_file_map[$filename]}" - 1>"$MINT_DATA_DIR/$filename" 2>/dev/null; then
            echo "unable to create data file $MINT_DATA_DIR/$filename"
            exit 1
        fi
    done
fi

# configuration
aws configure set aws_access_key_id "$ACCESS_KEY"
aws configure set aws_secret_access_key "$SECRET_KEY"
aws configure set default.region "$SERVER_REGION"

# run tests for virtual style if provided
if [ "$ENABLE_VIRTUAL_STYLE" -eq 1 ]; then
   # Setup endpoint scheme
   endpoint="http://$DOMAIN:$SERVER_PORT"
   if [ "$ENABLE_HTTPS" -eq 1 ]; then
       endpoint="https://$DOMAIN:$SERVER_PORT"
   fi
   dnsmasq --address="/$DOMAIN/$SERVER_IP" --user=root
   echo -e "nameserver 127.0.0.1\n$(cat /etc/resolv.conf)" > /etc/resolv.conf
   aws configure set default.s3.addressing_style virtual
#    ./test.sh "$endpoint"  1>>"$output_log_file" 2>"$error_log_file"
   ./test.sh "$endpoint"
   aws configure set default.s3.addressing_style path
fi

endpoint="http://$SERVER_ENDPOINT"
if [ "$ENABLE_HTTPS" -eq 1 ]; then
    endpoint="https://$SERVER_ENDPOINT"
fi
# run path style tests
# ./test.sh "$endpoint"  1>>"$output_log_file" 2>"$error_log_file"


# test
function get_md5() {
    if [ $os == "mac" ]; then
        md5rt=$(md5 "$1" | awk '{print $4}')
    else
        md5rt=$(md5sum "$1" | awk '{print $1}')
    fi
}

get_md5 "${MINT_DATA_DIR}/datafile-1-kB"
HASH_1_KB=$md5rt

get_md5 "${MINT_DATA_DIR}/datafile-65-MB"
HASH_65_MB=$md5rt

_init() {
    AWS="aws --endpoint-url $1"
}


function get_time() {
    date +%s%N
}

function get_duration() {
    start_time=$1
    end_time=$(get_time)

    echo $(( (end_time - start_time) / 1000000 ))
}

function log_success() {
    function=$(python -c 'import sys,json; print(json.dumps(sys.stdin.read()))' <<<"$2")
    printf '{"name": "awscli", "duration": %d, "function": %s, "status": "PASS"}\n' "$1" "$function"
}

function log_failure() {
    function=$(python -c 'import sys,json; print(json.dumps(sys.stdin.read()))' <<<"$2")
    err=$(echo "$3" | tr -d '\n')
    printf '{"name": "awscli", "duration": %d, "function": %s, "status": "FAIL", "error": "%s"}\n' "$1" "$function" "$err"
}

function log_alert() {
    function=$(python -c 'import sys,json; print(json.dumps(sys.stdin.read()))' <<<"$2")
    err=$(echo "$4" | tr -d '\n')
    printf '{"name": "awscli", "duration": %d, "function": %s, "status": "FAIL", "alert": "%s", "error": "%s"}\n' "$1" "$function" "$3" "$err"
}

function make_bucket() {
    # Make bucket
    bucket_name="awscli-mint-test-bucket-$RANDOM"
    function="${AWS} s3api create-bucket --bucket ${bucket_name}"

    # execute the test
    out=$($function 2>&1)
    rv=$?

    # if command is successful print bucket_name or print error
    if [ $rv -eq 0 ]; then
        echo "${bucket_name}"
    else
        echo "${out}"
    fi

    return $rv
}

function delete_bucket() {
    # Delete bucket
    function="${AWS} s3 rb s3://${1} --force"
    out=$($function 2>&1)
    rv=$?

    # echo the output
    echo "${out}"

    return $rv
}

# Tests creating, stat and delete on a bucket.
function test_create_bucket() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?
    # save the ref to function being tested, so it can be logged
    test_function=${function}

    # if make_bucket is successful stat the bucket
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api head-bucket --bucket ${bucket_name}"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket failes, $bucket_name has the error output
        out="${bucket_name}"
    fi

     # if stat bucket is successful remove the bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "${bucket_name}")
        rv=$?
    else
        # if make bucket failes, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests creating and deleting an object.
function test_upload_object() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # if upload succeeds download the file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB /tmp/datafile-1-kB"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        # calculate the md5 hash of downloaded file
        get_md5 "/tmp/datafile-1-kB"
        hash2=$md5rt
    fi

    # if download succeeds, verify downloaded file
    if [ $rv -eq 0 ]; then
        if [ "$HASH_1_KB" == "$hash2" ]; then
            function="delete_bucket"
            out=$(delete_bucket "$bucket_name")
            rv=$?
            # remove download file
            rm -f /tmp/datafile-1-kB
        else
            rv=1
            out="Checksum verification failed for uploaded object"
        fi
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Test lookup a directory prefix.
function test_lookup_object_prefix() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds create a directory.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --bucket ${bucket_name} --key prefix/directory/"
        # save the ref to function being tested, so it can be logged
        test_function=${function}

        out=$($function 2>&1)

        rv=$?
    else
        # if make_bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        ## Attempt an overwrite of the prefix again and should succeed as well.
        function="${AWS} s3api put-object --bucket ${bucket_name} --key prefix/directory/"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    # if upload succeeds lookup for the prefix.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api head-object --bucket ${bucket_name} --key prefix/directory/"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    # if directory create succeeds, upload the object.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key prefix/directory/datafile-1-kB"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    # Attempt a delete on prefix shouldn't delete the directory since we have an object inside it.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api delete-object --bucket ${bucket_name} --key prefix/directory/"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    # if upload succeeds lookup for the object should succeed.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api head-object --bucket ${bucket_name} --key prefix/directory/datafile-1-kB"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    # delete bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi

    if [ $rv -ne 0 ]; then
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    else
        log_success "$(get_duration "$start_time")" "${test_function}"
    fi

    return $rv
}

# Tests listing objects for both v1 and v2 API.
function test_list_objects() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # if upload objects succeeds, list objects with existing prefix
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix datafile-1-kB"
        test_function=${function}
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "datafile-1-kB" ]; then
            rv=1
            # since rv is 0, command passed, but didn't return expected value. In this case set the output
            out="list-objects with existing prefix failed"
        fi
    fi

    # if upload objects succeeds, list objects with not exist prefix
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix linux"
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "" ]; then
            rv=1
            out="list-objects without existing prefix failed"
        fi
    fi

    # put dir1/dir2/dir3/dir4/  listobject(prefix=dir1/) should return "dir1/dir2/dir3/dir4/" ...
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --bucket ${bucket_name} --key dir1/dir2/dir3/dir4/"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix dir1/dir2/dir3/dir4/"
        test_function=${function}
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[0].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "dir1/dir2/dir3/dir4/" ]; then
            rv=1
            # since rv is 0, command passed, but didn't return expected value. In this case set the output
            out="list-objects with prefix is dir failed"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix dir1/"
        test_function=${function}
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[0].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "dir1/dir2/dir3/dir4/" ]; then
            rv=1
            # since rv is 0, command passed, but didn't return expected value. In this case set the output
            out="list-objects with prefix is dir failed"
        fi
      fi

    # put dir1/dir2/  listobject(prefix=dir2/) should return "dir1/dir2/"
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --bucket ${bucket_name} --key dir1/dir2/"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # if upload objects succeeds, list objects with existing prefix
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix dir1/"
        test_function=${function}
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[0].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "dir1/dir2/" ]; then
            rv=1
            # since rv is 0, command passed, but didn't return expected value. In this case set the output
            out="list-objects with prefix is dir failed"
        fi
    fi

    # delete dir1/dir2/  listobject(prefix=dir1/) should return "dir2/"
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api delete-object --bucket ${bucket_name} --key dir1/dir2/"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix dir1/"
        test_function=${function}
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[0].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "dir1/dir2/dir3/dir4/" ]; then
            rv=1
            # since rv is 0, command passed, but didn't return expected value. In this case set the output
            out="list-objects with prefix is dir failed"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix dir1/ --delimiter /"
        test_function=${function}
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .CommonPrefixes[0].Prefix)
        if [ $rv -eq 0 ] && [ "$key_name" != "dir1/dir2/" ]; then
            rv=1
            # since rv is 0, command passed, but didn't return expected value. In this case set the output
            out="list-objects with prefix is dir failed"
        fi
    fi

    # delete dir1/dir2/dir3/dir4/  listobject(prefix=dir1/) should return nothing
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api delete-object --bucket ${bucket_name} --key dir1/dir2/dir3/dir4/"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi
    if [ $rv -eq 0 ]; then
          function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix dir1/"
          test_function=${function}
          out=$($function)
          rv=$?
          output=$(echo "$out")
          if [ $rv -eq 0 ] && [ "$output" != "" ]; then
              rv=1
              # since rv is 0, command passed, but didn't return expected value. In this case set the output
              out="list-objects with prefix is dir failed"
          fi
    fi


    # if upload objects succeeds, list objectsv2 with existing prefix
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix datafile-1-kB"
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "datafile-1-kB" ]; then
            rv=1
            out="list-objects-v2 with existing prefix failed"
        fi
    fi

    # if upload objects succeeds, list objectsv2 without existing prefix
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix linux"
        out=$($function)
        rv=$?
        key_name=$(echo "$out" | jq -r .Contents[].Key)
        if [ $rv -eq 0 ] && [ "$key_name" != "" ]; then
            rv=1
            out="list-objects-v2 without existing prefix failed"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
        # remove download file
        rm -f /tmp/datafile-1-kB
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        rm -f /tmp/datafile-1-kB
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests multipart API with 0 byte part.
function test_multipart_upload_0byte() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    object_name=${bucket_name}"-object"
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-0-b --bucket ${bucket_name} --key datafile-0-b"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        # create multipart
        function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name}"
        test_function=${function}
        out=$($function)
        rv=$?
        upload_id=$(echo "$out" | jq -r .UploadId)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 1
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-0-b --upload-id ${upload_id} --part-number 1"
        out=$($function)
        rv=$?
        etag1=$(echo "$out" | jq -r .ETag)
    fi

    if [ $rv -eq 0 ]; then
        # Create a multipart struct file for completing multipart transaction
        echo "{
            \"Parts\": [
                {
                    \"ETag\": ${etag1},
                    \"PartNumber\": 1
                }
            ]
        }" >> /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        # Use saved etags to complete the multipart transaction
        function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}"
        out=$($function)
        rv=$?
        etag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//')
        if [ "${etag}" == "" ]; then
            rv=1
            out="complete-multipart-upload failed"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api get-object --bucket ${bucket_name} --key ${object_name} /tmp/datafile-0-b"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        ret_etag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//')
        # match etag
        if [ "$etag" != "$ret_etag" ]; then
            rv=1
            out="Etag mismatch for multipart 0 byte object"
        fi
        rm -f /tmp/datafile-0-b
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
        # remove temp file
        rm -f /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        rm -f /tmp/multipart
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests multipart API by making each individual calls.
function test_multipart_upload() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    object_name=${bucket_name}"-object"
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        # create multipart
        function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name}"
        test_function=${function}
        out=$($function)
        rv=$?
        upload_id=$(echo "$out" | jq -r .UploadId)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 1
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-5-MB --upload-id ${upload_id} --part-number 1"
        out=$($function)
        rv=$?
        etag1=$(echo "$out" | jq -r .ETag)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 2
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-1-kB --upload-id ${upload_id} --part-number 2"
        out=$($function)
        rv=$?
        etag2=$(echo "$out" | jq -r .ETag)
        # Create a multipart struct file for completing multipart transaction
        echo "{
            \"Parts\": [
                {
                    \"ETag\": ${etag1},
                    \"PartNumber\": 1
                },
                {
                    \"ETag\": ${etag2},
                    \"PartNumber\": 2
                }
            ]
        }" >> /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        # Use saved etags to complete the multipart transaction
        function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}"
        out=$($function)
        rv=$?
        finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//')
        if [ "${finalETag}" == "" ]; then
            rv=1
            out="complete-multipart-upload failed"
        fi
    fi


    for key in "afile" "bfile" "bfile" "documents/report1.pdf" "documents/report2.pdf" "ebook" "photos/2021/a2.png" "photos/2021/a3.png" "photos/2022/a4.png"
    do
      if [ $rv -eq 0 ]; then
        # create multipart
        function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${key}"
        test_function=${function}
        out=$($function)
        rv=$?
        upload_id=$(echo "$out" | jq -r .UploadId)
      fi
    done

    if [ $rv -eq 0 ]; then
      function="${AWS} s3api list-multipart-uploads --bucket ${bucket_name}"
      test_function=${function}
      out=$($function)
      rv=$?
      keys=$(echo "$out" | jq -r '.Uploads | map(.Key) | join(",")')
      if [ $keys != "afile,bfile,bfile,documents/report1.pdf,documents/report2.pdf,ebook,photos/2021/a2.png,photos/2021/a3.png,photos/2022/a4.png" ]; then
       rv=1
       out="list-multipart-uploads failed"
      fi
    fi

    if [ $rv -eq 0 ]; then
      function="${AWS} s3api list-multipart-uploads --bucket ${bucket_name} --key-marker bfile"
      test_function=${function}
      out=$($function)
      rv=$?
      keys=$(echo "$out" | jq -r '.Uploads | map(.Key) | join(",")')
      if [ $keys != "bfile,bfile,documents/report1.pdf,documents/report2.pdf,ebook,photos/2021/a2.png,photos/2021/a3.png,photos/2022/a4.png" ]; then
       rv=1
       out="list-multipart-upload failed"
      fi
    fi

    if [ $rv -eq 0 ]; then
      function="${AWS} s3api list-multipart-uploads --bucket ${bucket_name} --key-marker bfile --delimiter /"
      test_function=${function}
      out=$($function)
      rv=$?
      keys=$(echo "$out" | jq -r '.Uploads | map(.Key) | join(",")')
      if [ $keys != "bfile,bfile,ebook" ]; then
       rv=1
       out="list-multipart-uploads failed"
      fi
      keys=$(echo "$out" | jq -r '.CommonPrefixes | map(.Prefix) | join(",")')
      if [ $keys != "documents/,photos/" ]; then
       rv=1
       out="list-multipart-uploads failed"
      fi
    fi

     if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-multipart-uploads --bucket ${bucket_name} --delimiter /  --max-upload 5"
        test_function=${function}
        out=$($function)
        rv=$?
        keys=$(echo "$out" | jq -r '.Uploads | map(.Key) | join(",")')
        if [ $keys != "afile,bfile,bfile,ebook" ]; then
         rv=1
         out="list-multipart-uploads failed"
        fi
        keys=$(echo "$out" | jq -r '.CommonPrefixes[0].Prefix')
        if [ $keys != "documents/" ]; then
         rv=1
         out="list-multipart-uploads failed"
        fi
     fi

    if [ $rv -eq 0 ]; then
      function="${AWS} s3api list-multipart-uploads --bucket ${bucket_name} --prefix documents/"
      test_function=${function}
      out=$($function)
      rv=$?
      keys=$(echo "$out" | jq -r '.Uploads | map(.Key) | join(",")')
        if [ $keys != "documents/report1.pdf,documents/report2.pdf" ]; then
         rv=1
         out="list-multipart-upload failed"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
        # remove temp file
        rm -f /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        rm -f /tmp/multipart
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# List number of objects based on the maxKey
# value set.
function test_max_key_list() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-b --bucket ${bucket_name} --key datafile-1-b"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # copy object server side
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api copy-object --bucket ${bucket_name} --key datafile-1-b-copy --copy-source ${bucket_name}/datafile-1-b"
        out=$($function)
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --max-keys 1"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        if [ $rv -eq 0 ]; then
            out=$(echo "$out" | jq '.KeyCount')
            rv=$?
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
        # The command passed, but the delete_bucket failed
        out="delete_bucket for test_max_key_list failed"
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Copy object tests for server side copy
# of the object, validates returned md5sum.
function test_copy_object() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # copy object server side
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api copy-object --bucket ${bucket_name} --key datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB"
        test_function=${function}
        out=$($function)
        rv=$?
        hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//')
        if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then
            # Verification failed
            rv=1
            out="Hash mismatch expected $HASH_1_KB, got $hash2"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api copy-object --bucket ${bucket_name} --key /not-exist-dir/datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB"
        test_function=${function}
        out=$($function)
        rv=$?
        hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//')
        if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then
            # Verification failed
            rv=1
            out="Hash mismatch expected $HASH_1_KB, got $hash2"
        fi
    fi

    ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Copy object tests for server side copy
# of the object, validates returned md5sum.
# validates change in storage class as well
function test_copy_object_storage_class() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # copy object server side
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api copy-object --bucket ${bucket_name} --storage-class REDUCED_REDUNDANCY --key datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        # if this functionality is not implemented return right away.
        if [ $rv -ne 0 ]; then
            if echo "$out" | grep -q "NotImplemented"; then
                ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
                return 0
            fi
        fi
        hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//')
        if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then
            # Verification failed
            rv=1
            out="Hash mismatch expected $HASH_1_KB, got $hash2"
        fi
    fi

    ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Copy object tests for server side copy
# to itself by changing storage class
function test_copy_object_storage_class_same() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # copy object server side
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api copy-object --bucket ${bucket_name} --storage-class REDUCED_REDUNDANCY --key datafile-1-kB --copy-source ${bucket_name}/datafile-1-kB"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        # if this functionality is not implemented return right away.
        if [ $rv -ne 0 ]; then
            if echo "$out" | grep -q "NotImplemented"; then
                ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
                return 0
            fi
        fi
        hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//')
        if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then
            # Verification failed
            rv=1
            out="Hash mismatch expected $HASH_1_KB, got $hash2"
        fi
    fi

    ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests for presigned URL success case, presigned URL
# is correct and accessible - we calculate md5sum of
# the object and validate it against a local files md5sum.
function test_presigned_object() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3 presign s3://${bucket_name}/datafile-1-kB"
        test_function=${function}
        url=$($function)
        rv=$?
        curl -sS -X GET "${url}" > /tmp/datafile-1-kB
        get_md5 /tmp/datafile-1-kB
        hash2=$md5rt
        if [ "$HASH_1_KB" == "$hash2" ]; then
            function="delete_bucket"
            out=$(delete_bucket "$bucket_name")
            rv=$?
            # remove download file
            rm -f /tmp/datafile-1-kB
        else
            rv=1
            out="Checksum verification failed for downloaded object"
        fi
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests creating and deleting an object - 10MiB
function test_upload_object_10() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-10-MB --bucket ${bucket_name} --key datafile-10-MB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests multipart API by making each individual calls with 10MiB part size.
function test_multipart_upload_10() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    object_name=${bucket_name}"-object"
    rv=$?

    if [ $rv -eq 0 ]; then
        # create multipart
        function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name}"
        test_function=${function}
        out=$($function)
        rv=$?
        upload_id=$(echo "$out" | jq -r .UploadId)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 1
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-10-MB --upload-id ${upload_id} --part-number 1"
        out=$($function)
        rv=$?
        etag1=$(echo "$out" | jq -r .ETag)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 2
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-10-MB --upload-id ${upload_id} --part-number 2"
        out=$($function)
        rv=$?
        etag2=$(echo "$out" | jq -r .ETag)
        # Create a multipart struct file for completing multipart transaction
        echo "{
            \"Parts\": [
                {
                    \"ETag\": ${etag1},
                    \"PartNumber\": 1
                },
                {
                    \"ETag\": ${etag2},
                    \"PartNumber\": 2
                }
            ]
        }" >> /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        # Use saved etags to complete the multipart transaction
        function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}"
        out=$($function)
        rv=$?
        finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//')
        if [ "${finalETag}" == "" ]; then
            rv=1
            out="complete-multipart-upload failed"
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
        # remove temp file
        rm -f /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        rm -f /tmp/multipart
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests lifecycle of a bucket.
function test_bucket_lifecycle() {
    # log start time
    start_time=$(get_time)

    echo "{ \"Rules\": [ { \"Expiration\": { \"Days\": 365 },\"ID\": \"Bucketlifecycle test\", \"Filter\": { \"Prefix\": \"\" }, \"Status\": \"Enabled\" } ] }" >> /tmp/lifecycle.json

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds put bucket lifecycle
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-bucket-lifecycle-configuration --bucket ${bucket_name} --lifecycle-configuration file:///tmp/lifecycle.json"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -ne 0 ]; then
        # if this functionality is not implemented return right away.
        if echo "$out" | grep -q "NotImplemented"; then
            ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
            return 0
        fi
    fi

    # if put bucket lifecycle succeeds get bucket lifecycle
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api get-bucket-lifecycle-configuration --bucket ${bucket_name}"
        out=$($function 2>&1)
        rv=$?
    fi

    # if get bucket lifecycle succeeds delete bucket lifecycle
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api delete-bucket-lifecycle --bucket ${bucket_name}"
        out=$($function 2>&1)
        rv=$?
    fi

    # delete lifecycle.json
    rm -f /tmp/lifecycle.json

    # delete bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "test_bucket_lifecycle"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests `aws s3 cp` by uploading a local file.
function test_aws_s3_cp() {
    file_name="${MINT_DATA_DIR}/datafile-65-MB"

    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file using cp
    if [ $rv -eq 0 ]; then
        function="${AWS} s3 cp $file_name s3://${bucket_name}/$(basename "$file_name")"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3 rm s3://${bucket_name}/$(basename "$file_name")"
        out=$($function 2>&1)
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3 rb s3://${bucket_name}/"
        out=$($function 2>&1)
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# Tests `aws s3 sync` by mirroring all the
# local content to remove bucket.
function test_aws_s3_sync() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds sync all the files in a directory
    if [ $rv -eq 0 ]; then
        function="${AWS} s3 sync --no-progress $MINT_DATA_DIR s3://${bucket_name}/"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # remove files recusively
    if [ $rv -eq 0 ]; then
        function="${AWS} s3 rm --recursive s3://${bucket_name}/"
        out=$($function 2>&1)
        rv=$?
    fi

    # delete bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# list objects negative test - tests for following conditions.
# v1 API with max-keys=-1 and max-keys=0
# v2 API with max-keys=-1 and max-keys=0
function test_list_objects_error() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    if [ $rv -eq 0 ]; then
        # Server replies an error for v1 with max-key=-1
        function="${AWS} s3api list-objects --bucket ${bucket_name} --prefix datafile-1-kB --max-keys=-1"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
            rv=1
        else
            rv=0
        fi
    fi

    if [ $rv -eq 0 ]; then
        # Server replies an error for v2 with max-keys=-1
        function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix datafile-1-kB --max-keys=-1"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
            rv=1
        else
            rv=0
        fi
    fi

    if [ $rv -eq 0 ]; then
        # Server returns success with no keys when max-keys=0
        function="${AWS} s3api list-objects-v2 --bucket ${bucket_name} --prefix datafile-1-kB --max-keys=0"
        out=$($function 2>&1)
        rv=$?
        if [ $rv -eq 0 ]; then
            function="delete_bucket"
            out=$(delete_bucket "$bucket_name")
            rv=$?
        fi
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# put object negative test - tests for following conditions.
# - invalid object name.
# - invalid Content-Md5
# - invalid Content-Length
function test_put_object_error() {
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload an object without content-md5.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --content-md5 invalid"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
            rv=1
        else
            rv=0
        fi
    fi

    # upload an object without content-length.
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --content-length -1"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
        if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
            rv=1
        else
            rv=0
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}
# tests server side encryption headers for get and put calls
function test_serverside_encryption() {
    #skip server side encryption tests if HTTPS disabled.
    if [ "$ENABLE_HTTPS" != "1" ]; then
        return 0
    fi
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # put object with server side encryption headers
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    # now get encrypted object from server
    if [ $rv -eq 0 ]; then
        etag1=$(echo "$out" | jq -r .ETag)
        sse_customer_key1=$(echo "$out" | jq -r .SSECustomerKeyMD5)
        sse_customer_algo1=$(echo "$out" | jq -r .SSECustomerAlgorithm)

        function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-1-kB"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    if [ $rv -eq 0 ]; then
        etag2=$(echo "$out" | jq -r .ETag)
        sse_customer_key2=$(echo "$out" | jq -r .SSECustomerKeyMD5)
        sse_customer_algo2=$(echo "$out" | jq -r .SSECustomerAlgorithm)
        get_md5 "/tmp/datafile-1-kB"
        hash2=$md5rt
        # match downloaded object's hash to original
        if [ "$HASH_1_KB" == "$hash2" ]; then
            function="delete_bucket"
            out=$(delete_bucket "$bucket_name")
            rv=$?
            # remove download file
            rm -f /tmp/datafile-1-kB
        else
            rv=1
            out="Checksum verification failed for downloaded object"
        fi
        # match etag and SSE headers
        if [ "$etag1" != "$etag2" ]; then
            rv=1
            out="Etag mismatch for object encrypted with server side encryption"
        fi
        if [ "$sse_customer_algo1" != "$sse_customer_algo2" ]; then
            rv=1
            out="sse customer algorithm mismatch"
        fi
        if [ "$sse_customer_key1" != "$sse_customer_key2" ]; then
            rv=1
            out="sse customer key mismatch"
        fi
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# tests server side encryption headers for multipart put
function test_serverside_encryption_multipart() {
    #skip server side encryption tests if HTTPS disabled.
    if [ "$ENABLE_HTTPS" != "1" ]; then
        return 0
    fi
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # put object with server side encryption headers
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-65-MB --bucket ${bucket_name} --key datafile-65-MB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    # now get encrypted object from server
    if [ $rv -eq 0 ]; then
        etag1=$(echo "$out" | jq -r .ETag)
        sse_customer_key1=$(echo "$out" | jq -r .SSECustomerKeyMD5)
        sse_customer_algo1=$(echo "$out" | jq -r .SSECustomerAlgorithm)

        function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-65-MB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-65-MB"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    if [ $rv -eq 0 ]; then
        etag2=$(echo "$out" | jq -r .ETag)
        sse_customer_key2=$(echo "$out" | jq -r .SSECustomerKeyMD5)
        sse_customer_algo2=$(echo "$out" | jq -r .SSECustomerAlgorithm)
        get_md5 "${MINT_DATA_DIR}/datafile-65-MB"
        hash2=$md5rt
        # match downloaded object's hash to original
        if [ "$HASH_65_MB" == "$hash2" ]; then
            function="delete_bucket"
            out=$(delete_bucket "$bucket_name")
            rv=$?
            # remove download file
            rm -f /tmp/datafile-65-MB
        else
            rv=1
            out="Checksum verification failed for downloaded object"
        fi
        # match etag and SSE headers
        if [ "$etag1" != "$etag2" ]; then
            rv=1
            out="Etag mismatch for object encrypted with server side encryption"
        fi
        if [ "$sse_customer_algo1" != "$sse_customer_algo2" ]; then
            rv=1
            out="sse customer algorithm mismatch"
        fi
        if [ "$sse_customer_key1" != "$sse_customer_key2" ]; then
            rv=1
            out="sse customer key mismatch"
        fi
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}

# tests encrypted copy from multipart encrypted object to
# single part encrypted object. This test in particular checks if copy
# succeeds for the case where encryption overhead for individually
# encrypted parts vs encryption overhead for the original datastream
# differs.
function test_serverside_encryption_multipart_copy() {
    #skip server side encryption tests if HTTPS disabled.
    if [ "$ENABLE_HTTPS" != "1" ]; then
        return 0
    fi
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    object_name=${bucket_name}"-object"
    rv=$?

    if [ $rv -eq 0 ]; then
        # create multipart
        function="${AWS} s3api create-multipart-upload --bucket ${bucket_name} --key ${object_name} --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        out=$($function)
        rv=$?
        upload_id=$(echo "$out" | jq -r .UploadId)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 1
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-5243880-b --upload-id ${upload_id} --part-number 1 --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        out=$($function)
        rv=$?
        etag1=$(echo "$out" | jq -r .ETag)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 2
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key ${object_name} --body ${MINT_DATA_DIR}/datafile-5243880-b --upload-id ${upload_id} --part-number 2 --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        out=$($function)
        rv=$?
        etag2=$(echo "$out" | jq -r .ETag)
        # Create a multipart struct file for completing multipart transaction
        echo "{
            \"Parts\": [
                {
                    \"ETag\": ${etag1},
                    \"PartNumber\": 1
                },
                {
                    \"ETag\": ${etag2},
                    \"PartNumber\": 2
                }
            ]
        }" >> /tmp/multipart
    fi

    if [ $rv -eq 0 ]; then
        # Use saved etags to complete the multipart transaction
        function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key ${object_name} --upload-id ${upload_id}"
        out=$($function)
        rv=$?
        finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//')
        if [ "${finalETag}" == "" ]; then
            rv=1
            out="complete-multipart-upload failed"
        fi
    fi

     # copy object server side
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api copy-object --bucket ${bucket_name} --key ${object_name}-copy --copy-source ${bucket_name}/${object_name} --copy-source-sse-customer-algorithm AES256 --copy-source-sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --copy-source-sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        test_function=${function}
        out=$($function)
        rv=$?
        if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
            rv=1
        else
            rv=0
        fi
    fi

    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        rm -f /tmp/multipart
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}
# tests server side encryption headers for range get calls
function test_serverside_encryption_get_range() {
    #skip server side encryption tests if HTTPS disabled.
    if [ "$ENABLE_HTTPS" != "1" ]; then
        return 0
    fi
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?
    # put object with server side encryption headers
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-10-kB --bucket ${bucket_name} --key datafile-10-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    # now get encrypted object from server for range 500-999
    if [ $rv -eq 0 ]; then
        etag1=$(echo "$out" | jq -r .ETag)
        sse_customer_key1=$(echo "$out" | jq -r .SSECustomerKeyMD5)
        sse_customer_algo1=$(echo "$out" | jq -r .SSECustomerAlgorithm)
        function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-10-kB --range bytes=500-999 --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-10-kB"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    if [ $rv -eq 0 ]; then
        cnt=$(stat -c%s /tmp/datafile-10-kB)
        if [ "$cnt" -ne 500 ]; then
            rv=1
        fi
    fi
    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi
    return $rv
}

# tests server side encryption error for get and put calls
function test_serverside_encryption_error() {
    #skip server side encryption tests if HTTPS disabled.
    if [ "$ENABLE_HTTPS" != "1" ]; then
        return 0
    fi
    # log start time
    start_time=$(get_time)

    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # put object with server side encryption headers  with MD5Sum mismatch for sse-customer-key-md5 header
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
        rv=1
    else
        rv=0
    fi
    # put object with missing server side encryption header sse-customer-algorithm
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB  --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
        rv=1
    else
        rv=0
    fi

    # put object with server side encryption headers successfully
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc2xvbmdzZWNyZXRrZXltdXN0cHJvdmlkZWQ= --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg=="
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi

    # now test get on encrypted object with nonmatching sse-customer-key and sse-customer-md5 headers
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api get-object --bucket ${bucket_name} --key datafile-1-kB --sse-customer-algorithm AES256 --sse-customer-key MzJieXRlc --sse-customer-key-md5 7PpPLAK26ONlVUGOWlusfg== /tmp/datafile-1-kB"
        test_function=${function}
        out=$($function 2>&1)
        rv=$?
    fi
    if [ $rv -ne 255 ] && [ $rv -ne 254 ]; then
        rv=1
    else
        rv=0
    fi
    # delete bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi
    if [ $rv -eq 0 ]; then
        log_success "$(get_duration "$start_time")" "${test_function}"
    else
        # clean up and log error
        ${AWS} s3 rb s3://"${bucket_name}" --force > /dev/null 2>&1
        log_failure "$(get_duration "$start_time")" "${function}" "${out}"
    fi

    return $rv
}


# test GetObjectInfo http code is 404
function test_get_object_error(){
    # log start time
    start_time=$(get_time)
    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # if make bucket succeeds upload a file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key /dir1/datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # if upload succeeds download the file
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api get-object --bucket ${bucket_name} --key /dir1 /tmp/datafile-1-kB"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        if [ $? -eq 255 ] || [ $? -eq 254 ];then
            rv=0
        fi
        if ! [[ "$out" =~ "The specified key does not exist" ]];then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
        fi
    fi

    if [ $rv -eq 0 ]; then
        function="${AWS} s3api get-object --bucket ${bucket_name} --key /dir1/ /tmp/datafile-1-kB"
        # save the ref to function being tested, so it can be logged
        test_function=${function}
        out=$($function 2>&1)
        if [ $? -eq 255 ] || [ $? -eq 254 ];then
            rv=0
        fi
        if ! [[ "$out" =~ "The specified key does not exist" ]];then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
        fi
    fi

    # delete bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi
    return $rv
}

function test_object_tagging(){
    # log start time
    start_time=$(get_time)
    function="make_bucket"
    bucket_name=$(make_bucket)
    rv=$?

    # put object with object tagging
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key /datafile-1-kB --tagging k1=v1&k2=v2"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # check object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB"
      out=$($function 2>&1)
      rv=$?
    fi
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq 'sort_by(.Key)' | jq -c)
      if [ "$tagSet" != '[{"Key":"k1","Value":"v1"},{"Key":"k2","Value":"v2"}]' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi

    # overwrite object tagging
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key /datafile-1-kB --tagging key1=value1&key2=value2"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # check object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB"
      out=$($function 2>&1)
      rv=$?
    fi
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq 'sort_by(.Key)' | jq -c)
      if [ "$tagSet" != '[{"Key":"key1","Value":"value1"},{"Key":"key2","Value":"value2"}]' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi

    # delete object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api delete-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB"
      out=$($function 2>&1)
      rv=$?
    fi
    # check object tagging
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq -c)
      if [ "$tagSet" != '' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi

    # create multipart upload with object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api create-multipart-upload  --bucket ${bucket_name} --key /datafile-1-kB --tagging k1=v1&k2=v2"
      out=$($function)
      rv=$?
      upload_id=$(echo "$out" | jq -r .UploadId)
    fi
    # upload part
    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 1
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key /datafile-1-kB --body ${MINT_DATA_DIR}/datafile-5243880-b --upload-id ${upload_id} --part-number 1"
        out=$($function)
        rv=$?
        etag1=$(echo "$out" | jq -r .ETag)
    fi

    if [ $rv -eq 0 ]; then
        # Capture etag for part-number 2
        function="${AWS} s3api upload-part --bucket ${bucket_name} --key /datafile-1-kB --body ${MINT_DATA_DIR}/datafile-5243880-b --upload-id ${upload_id} --part-number 2"
        out=$($function)
        rv=$?
        etag2=$(echo "$out" | jq -r .ETag)
        # Create a multipart struct file for completing multipart transaction
        echo "{
            \"Parts\": [
                {
                    \"ETag\": ${etag1},
                    \"PartNumber\": 1
                },
                {
                    \"ETag\": ${etag2},
                    \"PartNumber\": 2
                }
            ]
        }" > /tmp/multipart
    fi

    # complete multipart upload
    if [ $rv -eq 0 ]; then
        # Use saved etags to complete the multipart transaction
        function="${AWS} s3api complete-multipart-upload --multipart-upload file:///tmp/multipart --bucket ${bucket_name} --key /datafile-1-kB --upload-id ${upload_id}"
        out=$($function)
        rm -rf /tmp/multipart
        rv=$?
        finalETag=$(echo "$out" | jq -r .ETag | sed -e 's/^"//' -e 's/"$//')
        if [ "${finalETag}" == "" ]; then
            rv=1
            out="complete-multipart-upload failed"
        fi
    fi
    # check object tagging
    if [ $rv -eq 0 ]; then
          function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB"
          out=$($function 2>&1)
          rv=$?
    fi
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq 'sort_by(.Key)' | jq -c)
      if [ "$tagSet" != '[{"Key":"k1","Value":"v1"},{"Key":"k2","Value":"v2"}]' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi

    # overwrite object
    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key /datafile-1-kB"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # check object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB"
      out=$($function 2>&1)
      rv=$?
    fi
    # check object tagging
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq -c)
      if [ "$tagSet" != "[]" ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi


    if [ $rv -eq 0 ]; then
        function="${AWS} s3api put-object --body ${MINT_DATA_DIR}/datafile-1-kB --bucket ${bucket_name} --key /datafile-1-kB --tagging key1=value1&key2=value2"
        out=$($function 2>&1)
        rv=$?
    else
        # if make bucket fails, $bucket_name has the error output
        out="${bucket_name}"
    fi

    # check object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB"
      out=$($function 2>&1)
      rv=$?
    fi
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq 'sort_by(.Key)' | jq -c)
      if [ "$tagSet" != '[{"Key":"key1","Value":"value1"},{"Key":"key2","Value":"value2"}]' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi

    # copy object with tagging-directive COPY
     if [ $rv -eq 0 ]; then
         function="${AWS} s3api copy-object --bucket ${bucket_name} --key datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB"
         out=$($function)
         rv=$?
         hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//')
         if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then
             # Verification failed
             rv=1
             out="Hash mismatch expected $HASH_1_KB, got $hash2"
         fi
     fi

    # check object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key /datafile-1-kB-copy"
      out=$($function 2>&1)
      rv=$?
    fi
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq 'sort_by(.Key)' | jq -c)
      if [ "$tagSet" != '[{"Key":"key1","Value":"value1"},{"Key":"key2","Value":"value2"}]' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi


    # copy object with tagging-directive REPLACE
    if [ $rv -eq 0 ]; then
       function="${AWS} s3api copy-object --bucket ${bucket_name} --key datafile-1-kB-copy --copy-source ${bucket_name}/datafile-1-kB --tagging key=value  --tagging-directive REPLACE"
       out=$($function)
       rv=$?
       hash2=$(echo "$out" | jq -r .CopyObjectResult.ETag | sed -e 's/^"//' -e 's/"$//')
       if [ $rv -eq 0 ] && [ "$HASH_1_KB" != "$hash2" ]; then
           # Verification failed
           rv=1
           out="Hash mismatch expected $HASH_1_KB, got $hash2"
       fi
   fi

    # check object tagging
    if [ $rv -eq 0 ]; then
      function="${AWS} s3api get-object-tagging  --bucket ${bucket_name} --key datafile-1-kB-copy"
      out=$($function 2>&1)
      rv=$?
    fi
    if [ $rv -eq 0 ]; then
      tagSet=$(echo "$out" | jq -r .TagSet | jq 'sort_by(.Key)' | jq -c)
      if [ "$tagSet" != '[{"Key":"key","Value":"value"}]' ]; then
            log_failure "$(get_duration "$start_time")" "${function}" "${out}"
            rv=1
      fi
    fi

    # delete bucket
    if [ $rv -eq 0 ]; then
        function="delete_bucket"
        out=$(delete_bucket "$bucket_name")
        rv=$?
    fi
    return $rv
}
# main handler for all the tests.
main() {
    # Success tests
    test_create_bucket && \
    test_upload_object && \
    test_lookup_object_prefix && \
    test_list_objects && \
    test_multipart_upload_0byte && \
    test_multipart_upload && \
    test_max_key_list && \
    test_copy_object && \
    test_copy_object_storage_class && \
    test_copy_object_storage_class_same && \
    test_presigned_object && \
    test_upload_object_10 && \
    test_multipart_upload_10 && \
#     test_bucket_lifecycle && \
    test_serverside_encryption && \
    test_serverside_encryption_get_range && \
    test_serverside_encryption_multipart && \
    test_serverside_encryption_multipart_copy && \
    # Success cli ops.
    test_aws_s3_cp && \
    test_aws_s3_sync && \
    # Error tests
    test_list_objects_error && \
    test_put_object_error && \
    test_serverside_encryption_error && \
    # test_worm_bucket && \
    # test_legal_hold
    test_get_object_error &&  \
    test_object_tagging
    return $?
}

_init "$endpoint" && main


================================================
FILE: main.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package main

import (
	"os"

	"github.com/juicedata/juicefs/cmd"
	"github.com/juicedata/juicefs/pkg/utils"
)

var logger = utils.GetLogger("juicefs")

func main() {
	err := cmd.Main(os.Args)
	if err != nil {
		logger.Fatal(err)
	}
}


================================================
FILE: package.json
================================================
{
  "name": "juicefs",
  "version": "1.0.0",
  "author": "Juicedata",
  "license": "Apache",
  "repository": "github:juicedata/juicefs",
  "scripts": {
    "autocorrect-lint": "autocorrect --lint ./docs/ README*.md",
    "autocorrect-lint-fix": "autocorrect --fix ./docs/ README*.md",
    "check-broken-link": "./node_modules/.bin/remark --quiet --frail ./docs/ README*.md",
    "markdown-lint": "./node_modules/.bin/markdownlint-cli2 './docs/**/*.md' README*.md",
    "markdown-lint-fix": "./node_modules/.bin/markdownlint-cli2 --fix './docs/**/*.md' README*.md"
  },
  "dependencies": {
    "markdownlint-cli2": "^0.17.2",
    "markdownlint-rule-enhanced-proper-names": "^0.0.1",
    "markdownlint-rule-no-trailing-slash-in-links": "^0.0.1",
    "remark-cli": "^11.0.0",
    "remark-validate-links": "^13.0.1",
    "remark-validate-links-heading-id": "^0.0.3"
  },
  "remarkConfig": {
    "plugins": [
      "remark-validate-links-heading-id",
      "remark-validate-links"
    ]
  }
}


================================================
FILE: pkg/acl/acl.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package acl

import (
	"fmt"
	"hash/crc32"

	"github.com/juicedata/juicefs/pkg/utils"
)

const Version uint8 = 2

type Entry struct {
	Id   uint32
	Perm uint16
}

type Entries []Entry

func (es *Entries) Len() int           { return len(*es) }
func (es *Entries) Less(i, j int) bool { return (*es)[i].Id < (*es)[j].Id }
func (es *Entries) Swap(i, j int)      { (*es)[i], (*es)[j] = (*es)[j], (*es)[i] }

func (es *Entries) IsEqual(other *Entries) bool {
	if es.Len() != other.Len() {
		return false
	}
	for i := 0; i < es.Len(); i++ {
		if (*es)[i].Id != (*other)[i].Id || (*es)[i].Perm != (*other)[i].Perm {
			return false
		}
	}
	return true
}

func (es *Entries) Encode() []byte {
	w := utils.NewBuffer(uint32(es.Len() * 6))
	for _, e := range *es {
		w.Put32(e.Id)
		w.Put16(e.Perm)
	}
	return w.Bytes()
}

func (es *Entries) Decode(data []byte) {
	r := utils.ReadBuffer(data)
	for r.HasMore() {
		*es = append(*es, Entry{
			Id:   r.Get32(),
			Perm: r.Get16(),
		})
	}
}

// Rule acl rule
type Rule struct {
	Owner       uint16
	Group       uint16
	Mask        uint16
	Other       uint16
	NamedUsers  Entries
	NamedGroups Entries
}

func (r *Rule) String() string {
	return fmt.Sprintf("owner %o, group %o, mask %o, other %o, named users: %+v, named group %+v",
		r.Owner, r.Group, r.Mask, r.Other, r.NamedUsers, r.NamedGroups)
}

func (r *Rule) Dup() *Rule {
	if r != nil {
		newRule := *r
		// NamedUsers and NamedGroups are never modified
		return &newRule
	}
	return nil
}

func (r *Rule) Encode() []byte {
	w := utils.NewBuffer(uint32(16 + (len(r.NamedUsers)+len(r.NamedGroups))*6))
	w.Put16(r.Owner)
	w.Put16(r.Group)
	w.Put16(r.Mask)
	w.Put16(r.Other)
	w.Put32(uint32(len(r.NamedUsers)))
	for _, entry := range r.NamedUsers {
		w.Put32(entry.Id)
		w.Put16(entry.Perm)
	}
	w.Put32(uint32(len(r.NamedGroups)))
	for _, entry := range r.NamedGroups {
		w.Put32(entry.Id)
		w.Put16(entry.Perm)
	}
	return w.Bytes()
}

func (r *Rule) Decode(buf []byte) {
	rb := utils.ReadBuffer(buf)
	r.Owner = rb.Get16()
	r.Group = rb.Get16()
	r.Mask = rb.Get16()
	r.Other = rb.Get16()
	uCnt := rb.Get32()
	r.NamedUsers = make([]Entry, uCnt)
	for i := 0; i < int(uCnt); i++ {
		r.NamedUsers[i].Id = rb.Get32()
		r.NamedUsers[i].Perm = rb.Get16()
	}

	gCnt := rb.Get32()
	r.NamedGroups = make([]Entry, gCnt)
	for i := 0; i < int(gCnt); i++ {
		r.NamedGroups[i].Id = rb.Get32()
		r.NamedGroups[i].Perm = rb.Get16()
	}
}

func EmptyRule() *Rule {
	return &Rule{
		Owner: 0xFFFF,
		Group: 0xFFFF,
		Other: 0xFFFF,
		Mask:  0xFFFF,
	}
}

func (r *Rule) IsEmpty() bool {
	return len(r.NamedUsers)+len(r.NamedGroups) == 0 &&
		r.Owner&r.Group&r.Other&r.Mask == 0xFFFF
}

// IsMinimal just like normal permission
func (r *Rule) IsMinimal() bool {
	return len(r.NamedGroups)+len(r.NamedUsers) == 0 && r.Mask == 0xFFFF
}

func (r *Rule) IsEqual(other *Rule) bool {
	if r.Owner != other.Owner || r.Group != other.Group || r.Mask != other.Mask || r.Other != other.Other {
		return false
	}

	return r.NamedUsers.IsEqual(&other.NamedUsers) &&
		r.NamedGroups.IsEqual(&other.NamedGroups)
}

// InheritPerms from normal permission
func (r *Rule) InheritPerms(mode uint16) {
	if r.Owner == 0xFFFF {
		r.Owner = (mode >> 6) & 7
	}
	if r.Group == 0xFFFF {
		r.Group = (mode >> 3) & 7
	}
	if r.Other == 0xFFFF {
		r.Other = mode & 7
	}
}

func (r *Rule) SetMode(mode uint16) {
	r.Owner &= 0xFFF8
	r.Owner |= (mode >> 6) & 7

	if r.IsMinimal() {
		r.Group &= 0xFFF8
		r.Group |= (mode >> 3) & 7
	} else {
		r.Mask &= 0xFFF8
		r.Mask |= (mode >> 3) & 7
	}
	r.Other &= 0xFFF8
	r.Other |= mode & 7
}

func (r *Rule) GetMode() uint16 {
	if r.IsMinimal() {
		return ((r.Owner & 7) << 6) | ((r.Group & 7) << 3) | (r.Other & 7)
	}
	return ((r.Owner & 7) << 6) | ((r.Mask & 7) << 3) | (r.Other & 7)
}

// ChildAccessACL return the child node access acl with this default acl
func (r *Rule) ChildAccessACL(mode uint16) *Rule {
	cRule := &Rule{}
	cRule.Owner = (mode >> 6) & 7 & r.Owner
	cRule.Mask = (mode >> 3) & 7 & r.Mask
	cRule.Other = mode & 7 & r.Other

	cRule.Group = r.Group
	cRule.NamedUsers = r.NamedUsers
	cRule.NamedGroups = r.NamedGroups
	return cRule
}

var crc32c = crc32.MakeTable(crc32.Castagnoli)

func (r *Rule) Checksum() uint32 {
	return crc32.Checksum(r.Encode(), crc32c)
}

func (r *Rule) CanAccess(uid uint32, gids []uint32, fUid, fGid uint32, mMask uint8) bool {
	if uid == fUid {
		return uint8(r.Owner&7)&mMask == mMask
	}
	for _, nUser := range r.NamedUsers {
		if uid == nUser.Id {
			return uint8(nUser.Perm&r.Mask&7)&mMask == mMask
		}
	}

	isGrpMatched := false
	for _, gid := range gids {
		if gid == fGid {
			if uint8(r.Group&r.Mask&7)&mMask == mMask {
				return true
			}
			isGrpMatched = true
		}
	}
	for _, gid := range gids {
		for _, nGrp := range r.NamedGroups {
			if gid == nGrp.Id {
				if uint8(nGrp.Perm&r.Mask&7)&mMask == mMask {
					return true
				}
				isGrpMatched = true
			}
		}
	}
	if isGrpMatched {
		return false
	}

	return uint8(r.Other&7)&mMask == mMask
}

const (
	TypeNone = iota
	TypeAccess
	TypeDefault
)


================================================
FILE: pkg/acl/cache.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package acl

import (
	"sync"
)

const None = 0

// Cache all rules
// - cache all rules when meta init.
// - on getfacl failure, read and cache rule from meta.
// - on setfacl success, read and cache all missed rules from meta. (considered as a low-frequency operation)
// - concurrent mounts may result in duplicate rules.
type Cache interface {
	Put(id uint32, r *Rule)
	Get(id uint32) *Rule
	GetAll() map[uint32]*Rule
	GetId(r *Rule) uint32
	Size() int
	GetMissIds() []uint32
	Clear()
}

func NewCache() Cache {
	return &cache{
		lock:     sync.RWMutex{},
		maxId:    None,
		id2Rule:  make(map[uint32]*Rule),
		cksum2Id: make(map[uint32][]uint32),
	}
}

type cache struct {
	lock     sync.RWMutex
	maxId    uint32
	id2Rule  map[uint32]*Rule
	cksum2Id map[uint32][]uint32
}

func (c *cache) GetAll() map[uint32]*Rule {
	c.lock.RLock()
	defer c.lock.RUnlock()

	cpy := make(map[uint32]*Rule, len(c.id2Rule))
	for id, r := range c.id2Rule {
		cpy[id] = r
	}
	return cpy
}

func (c *cache) Clear() {
	c.lock.Lock()
	defer c.lock.Unlock()
	c.maxId = None
	c.id2Rule = make(map[uint32]*Rule)
	c.cksum2Id = make(map[uint32][]uint32)
}

// GetMissIds return all miss ids from 1 to c.maxId
func (c *cache) GetMissIds() []uint32 {
	c.lock.RLock()
	defer c.lock.RUnlock()

	if uint32(len(c.id2Rule)) == c.maxId {
		return nil
	}

	n := c.maxId + 1
	var ret []uint32
	for i := uint32(1); i < n; i++ {
		if _, ok := c.id2Rule[i]; !ok {
			ret = append(ret, i)
		}
	}
	return ret
}

func (c *cache) Size() int {
	c.lock.RLock()
	defer c.lock.RUnlock()
	return len(c.id2Rule)
}

func (c *cache) Get(id uint32) *Rule {
	c.lock.RLock()
	defer c.lock.RUnlock()
	if r, ok := c.id2Rule[id]; ok {
		return r
	}
	return nil
}

func (c *cache) Put(id uint32, r *Rule) {
	c.lock.Lock()
	defer c.lock.Unlock()

	if _, ok := c.id2Rule[id]; ok {
		return
	}

	if id > c.maxId {
		c.maxId = id
	}

	c.id2Rule[id] = r

	// empty slot
	if r == nil {
		return
	}

	cksum := r.Checksum()
	c.cksum2Id[cksum] = append(c.cksum2Id[cksum], id)
}

func (c *cache) GetId(r *Rule) uint32 {
	if r == nil {
		return None
	}

	c.lock.RLock()
	defer c.lock.RUnlock()

	if ids, ok := c.cksum2Id[r.Checksum()]; ok {
		for _, id := range ids {
			if r.IsEqual(c.id2Rule[id]) {
				return id
			}
		}
	}
	return None
}


================================================
FILE: pkg/acl/cache_test.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package acl

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestCache(t *testing.T) {
	rule := &Rule{
		Owner: 6,
		Group: 4,
		Mask:  4,
		Other: 4,
		NamedUsers: Entries{
			{
				Id:   2,
				Perm: 2,
			},
			{
				Id:   1,
				Perm: 1,
			},
		},
		NamedGroups: Entries{
			{
				Id:   4,
				Perm: 4,
			},
			{
				Id:   3,
				Perm: 3,
			},
		},
	}

	c := NewCache()
	c.Put(1, rule)
	c.Put(2, rule)
	assert.True(t, rule.IsEqual(c.Get(1)))
	assert.True(t, rule.IsEqual(c.Get(2)))
	assert.Equal(t, uint32(1), c.GetId(rule))

	rule2 := &Rule{}
	*rule2 = *rule
	rule2.Owner = 4

	c.Put(3, rule2)
	assert.Equal(t, uint32(3), c.GetId(rule2))

	c.Put(8, rule2)
	assert.Equal(t, []uint32{4, 5, 6, 7}, c.GetMissIds())

	assert.NotPanics(t, func() {
		c.Put(10, nil)
	})
}


================================================
FILE: pkg/chunk/cache_eviction.go
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"container/heap"
	"fmt"
	"math"
	"time"
)

const (
	EvictionNone    = "none"
	Eviction2Random = "2-random"
	EvictionLRU     = "lru"
)

const notInLru = math.MinInt // to trigger panic when misused

type cacheItem struct {
	size  int32
	atime uint32
}

type KeyIndex interface {
	name() string
	add(key cacheKey, item cacheItem)
	// remove removes key, staging blocks will not be removed unless explicitly requested
	remove(key cacheKey, staging bool) *cacheItem
	get(key cacheKey) *cacheItem
	peekAtime(key cacheKey) uint32
	len() int
	reset() KeyIndex
	// randomIter iterates over all items randomly
	randomIter() func(yield func(key cacheKey, item cacheItem) bool)
	// evictionIter evicts items based on different evict policies, yielding each evicted item
	evictionIter() func(yield func(key cacheKey, item cacheItem) bool)
}

func NewKeyIndex(config *Config) (KeyIndex, error) {
	switch config.CacheEviction {
	case EvictionNone:
		return &noneEviction{keys: make(map[cacheKey]cacheItem)}, nil
	case Eviction2Random:
		return &randomEviction{
			noneEviction: noneEviction{keys: make(map[cacheKey]cacheItem)},
			cacheExpire:  config.CacheExpire,
		}, nil
	case EvictionLRU:
		return &lruEviction{
			keys:    make(map[cacheKey]*lruItem),
			lruHeap: atimeHeap{},
		}, nil
	default:
		return nil, fmt.Errorf("unknown cache eviction policy: %q", config.CacheEviction)
	}
}

// noneEviction is a policy that does nothing.
type noneEviction struct {
	keys map[cacheKey]cacheItem
}

func (p *noneEviction) name() string {
	return EvictionNone
}

func (p *noneEviction) add(key cacheKey, item cacheItem) {
	p.keys[key] = item
}

func (p *noneEviction) remove(key cacheKey, staging bool) *cacheItem {
	item, ok := p.keys[key]
	if !ok {
		return nil
	}
	if item.size < 0 && !staging {
		return nil
	}
	delete(p.keys, key)
	return &item
}

func (p *noneEviction) get(key cacheKey) *cacheItem {
	if iter, ok := p.keys[key]; ok {
		// update atime
		p.keys[key] = cacheItem{iter.size, uint32(time.Now().Unix())}
		return &iter
	}
	return nil
}

func (p *noneEviction) peekAtime(key cacheKey) uint32 {
	return p.keys[key].atime
}

func (p *noneEviction) len() int {
	return len(p.keys)
}

func (p *noneEviction) reset() KeyIndex {
	snap := &noneEviction{keys: p.keys}
	p.keys = make(map[cacheKey]cacheItem, len(p.keys))
	return snap
}

func (p *noneEviction) randomIter() func(yield func(key cacheKey, item cacheItem) bool) {
	return func(yield func(key cacheKey, item cacheItem) bool) {
		for k, v := range p.keys {
			if !yield(k, v) {
				return
			}
		}
	}
}

func (p *noneEviction) evictionIter() func(yield func(key cacheKey, item cacheItem) bool) {
	panic("not implemented for " + p.name())
}

// randomEviction evicts items randomly.
type randomEviction struct {
	noneEviction
	cacheExpire time.Duration
}

func (p *randomEviction) name() string {
	return Eviction2Random
}

func (p *randomEviction) reset() KeyIndex {
	snap := &randomEviction{
		noneEviction: noneEviction{keys: p.keys},
		cacheExpire:  p.cacheExpire,
	}
	p.keys = make(map[cacheKey]cacheItem, len(p.keys))
	return snap
}

func (p *randomEviction) evictionIter() func(yield func(key cacheKey, item cacheItem) bool) {
	return func(yield func(key cacheKey, item cacheItem) bool) {
		var cnt int
		var lastK cacheKey
		var lastValue cacheItem
		var now = uint32(time.Now().Unix())
		var cutoff = now - uint32(p.cacheExpire/time.Second)
		for k, value := range p.keys {
			if value.size < 0 {
				continue // staging
			}
			if p.cacheExpire > 0 && value.atime < cutoff {
				lastK = k
				lastValue = value
				cnt++
			} else if cnt == 0 || lastValue.atime > value.atime {
				lastK = k
				lastValue = value
			}
			cnt++
			if cnt > 1 {
				delete(p.keys, lastK)
				if !yield(lastK, lastValue) {
					return
				}
				cnt = 0
			}
		}
	}
}

type lruItem struct {
	cacheItem
	pos int // Item position in lru heap, needed for updates
}

// A min-heap based on atime for cache eviction
type atimeHeap []heapItem

type heapItem struct {
	*lruItem
	key *cacheKey // key to cacheItem
}

func (h atimeHeap) Len() int { return len(h) }

func (h atimeHeap) Less(i, j int) bool { // min-heap
	if h[i].atime != h[j].atime {
		return h[i].atime < h[j].atime
	}
	if h[i].size != h[j].size {
		return h[i].size > h[j].size // prefer deleting larger blocks
	}
	return h[i].key.id < h[j].key.id
}

func (h atimeHeap) Swap(i, j int) {
	h[i], h[j] = h[j], h[i]
	h[i].pos = i
	h[j].pos = j
}

func (h *atimeHeap) Push(x any) {
	item := x.(heapItem)
	item.pos = len(*h)
	*h = append(*h, item)
}

func (h *atimeHeap) Pop() any {
	old := *h
	n := len(old)
	item := old[n-1]
	item.pos = notInLru
	*h = old[0 : n-1]
	return item
}

// lruEviction evicts items based on least recent use (atime).
type lruEviction struct {
	keys    map[cacheKey]*lruItem
	lruHeap atimeHeap
}

func (p *lruEviction) name() string {
	return EvictionLRU
}

func (p *lruEviction) add(key cacheKey, item cacheItem) {
	if iter, ok := p.keys[key]; !ok {
		iter = &lruItem{cacheItem: item, pos: notInLru}
		p.keys[key] = iter
		if iter.size > 0 { // don't add staging blocks to lru as they should not be evicted in `cleanupFull`
			heap.Push(&p.lruHeap, heapItem{iter, &key})
		}
	} else {
		iter.cacheItem = item
		if iter.pos == notInLru {
			if iter.size > 0 {
				heap.Push(&p.lruHeap, heapItem{iter, &key})
			}
		} else {
			heap.Fix(&p.lruHeap, iter.pos)
		}
	}
}

func (p *lruEviction) remove(key cacheKey, staging bool) *cacheItem {
	item, ok := p.keys[key]
	if !ok {
		return nil
	}
	if item.size < 0 && !staging {
		return nil
	}
	delete(p.keys, key)
	if item.pos != notInLru {
		heap.Remove(&p.lruHeap, item.pos)
	}
	return &item.cacheItem
}

func (p *lruEviction) get(key cacheKey) *cacheItem {
	if iter, ok := p.keys[key]; ok {
		// update atime
		iter.atime = uint32(time.Now().Unix())
		if iter.pos != notInLru {
			heap.Fix(&p.lruHeap, iter.pos)
		}
		return &iter.cacheItem
	}
	return nil
}

func (p *lruEviction) peekAtime(key cacheKey) uint32 {
	if item, ok := p.keys[key]; ok {
		return item.cacheItem.atime
	}
	return 0
}

func (p *lruEviction) len() int {
	return len(p.keys)
}

func (p *lruEviction) reset() KeyIndex {
	snap := &lruEviction{
		keys:    p.keys,
		lruHeap: p.lruHeap,
	}
	p.keys = make(map[cacheKey]*lruItem, len(p.keys))
	p.lruHeap = make(atimeHeap, 0, len(p.lruHeap))
	return snap
}

func (p *lruEviction) randomIter() func(yield func(key cacheKey, item cacheItem) bool) {
	return func(yield func(key cacheKey, item cacheItem) bool) {
		for k, v := range p.keys {
			if !yield(k, v.cacheItem) {
				return
			}
		}
	}
}

func (p *lruEviction) evictionIter() func(yield func(key cacheKey, item cacheItem) bool) {
	return func(yield func(key cacheKey, item cacheItem) bool) {
		for p.lruHeap.Len() > 0 {
			item := heap.Pop(&p.lruHeap).(heapItem)
			if item.size < 0 {
				logger.Warnf("Got a staging block in LRU: %s", item.key) // should not happen
				continue
			}
			delete(p.keys, *item.key)
			if !yield(*item.key, item.lruItem.cacheItem) {
				return
			}
		}
	}
}

// nolint:unused
func (p *lruEviction) verifyHeap() bool {
	cacheKeys := 0
	for k, v := range p.keys {
		if v.size > 0 {
			cacheKeys += 1
		} else if v.pos != notInLru {
			logger.Warnf("Staging block %s has size %d but index %d in lruHeap", k, v.size, v.pos)
			return false
		}
	}
	if p.lruHeap.Len() != cacheKeys {
		logger.Warnf("atime heap length %d does not match keys length %d", p.lruHeap.Len(), len(p.keys))
		return false
	}
	for i, item := range p.lruHeap {
		if item.pos != i {
			logger.Warnf("atime heap item %d index %d does not match its position %d", i, item.pos, i)
			return false
		}
		if it, ok := p.keys[*item.key]; !ok {
			logger.Warnf("heap item %d key %s not found in keys map", i, item.key)
			return false
		} else if it.cacheItem != item.cacheItem {
			logger.Warnf("heap item %d key %s does not match cacheItem in keys map", i, item.key)
			return false
		}
	}
	// Also validate the min-heap property based on atime
	n := p.lruHeap.Len()
	for i := 0; i < n/2; i++ {
		left := 2*i + 1
		right := 2*i + 2
		if left < n && p.lruHeap[i].atime > p.lruHeap[left].atime {
			logger.Warnf("heap property violated: parent atime %d > left child atime %d at index %d", p.lruHeap[i].atime, p.lruHeap[left].atime, i)
			return false
		}
		if right < n && p.lruHeap[i].atime > p.lruHeap[right].atime {
			logger.Warnf("heap property violated: parent atime %d > right child atime %d at index %d", p.lruHeap[i].atime, p.lruHeap[right].atime, i)
			return false
		}
	}
	return true
}


================================================
FILE: pkg/chunk/cached_store.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/juicedata/juicefs/pkg/compress"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juju/ratelimit"
	"github.com/prometheus/client_golang/prometheus"
)

const chunkSize = 1 << 26 // 64M
const pageSize = 1 << 16  // 64K
const SlowRequest = time.Second * time.Duration(10)

var (
	logger = utils.GetLogger("juicefs")
)

type pendingItem struct {
	key       string
	fpath     string    // full path of local file corresponding to the key
	ts        time.Time // timestamp when this item is added
	uploading atomic.Bool
}

// slice for read and remove
type rSlice struct {
	id     uint64
	length int
	store  *cachedStore
}

func sliceForRead(id uint64, length int, store *cachedStore) *rSlice {
	return &rSlice{id, length, store}
}

func (s *rSlice) blockSize(indx int) int {
	bsize := s.length - indx*s.store.conf.BlockSize
	if bsize > s.store.conf.BlockSize {
		bsize = s.store.conf.BlockSize
	}
	return bsize
}

func (s *rSlice) key(indx int) string {
	if s.store.conf.HashPrefix {
		return fmt.Sprintf("chunks/%02X/%v/%v_%v_%v", s.id%256, s.id/1000/1000, s.id, indx, s.blockSize(indx))
	}
	return fmt.Sprintf("chunks/%v/%v/%v_%v_%v", s.id/1000/1000, s.id/1000, s.id, indx, s.blockSize(indx))
}

func (s *rSlice) index(off int) int {
	return off / s.store.conf.BlockSize
}

func (s *rSlice) keys() []string {
	if s.length <= 0 {
		return nil
	}
	lastIndx := (s.length - 1) / s.store.conf.BlockSize
	keys := make([]string, lastIndx+1)
	for i := 0; i <= lastIndx; i++ {
		keys[i] = s.key(i)
	}
	return keys
}

func (s *rSlice) ReadAt(ctx context.Context, page *Page, off int) (n int, err error) {
	p := page.Data
	if len(p) == 0 {
		return 0, nil
	}
	if off >= s.length {
		return 0, io.EOF
	}

	indx := s.index(off)
	boff := off % s.store.conf.BlockSize
	blockSize := s.blockSize(indx)
	if boff+len(p) > blockSize {
		// read beyond current page
		var got int
		for got < len(p) {
			// aligned to current page
			l := min(len(p)-got, s.blockSize(s.index(off))-off%s.store.conf.BlockSize)
			pp := page.Slice(got, l)
			n, err = s.ReadAt(ctx, pp, off)
			pp.Release()
			if err != nil {
				return got + n, err
			}
			if n == 0 {
				return got, io.EOF
			}
			got += n
			off += n
		}
		return got, nil
	}

	key := s.key(indx)
	if s.store.conf.CacheEnabled() {
		start := time.Now()
		r, err := s.store.bcache.load(key)
		if err == nil {
			n, err = r.ReadAt(p, int64(boff))
			if !s.store.conf.OSCache {
				dropOSCache(r)
			}
			_ = r.Close()
			if err == nil {
				s.store.cacheHits.Add(1)
				s.store.cacheHitBytes.Add(float64(n))
				s.store.cacheReadHist.Observe(time.Since(start).Seconds())
				return n, nil
			}
			logger.Warnf("remove partial cached block %s: %d %s", key, n, err)
			s.store.bcache.remove(key, false)
		}
	}

	s.store.cacheMiss.Add(1)
	s.store.cacheMissBytes.Add(float64(len(p)))

	if s.store.seekable &&
		(!s.store.conf.CacheEnabled() || (boff > 0 && len(p) <= blockSize/4)) {
		n, err = s.store.loadRange(ctx, key, page, boff)
		if err == nil || !errors.Is(err, errTryFullRead) {
			return n, err
		}
	}

	block, err := s.store.group.Execute(key, func() (*Page, error) {
		tmp := page
		if boff > 0 || len(p) < blockSize {
			tmp = NewOffPage(blockSize)
		} else {
			tmp.Acquire()
		}
		err = s.store.load(ctx, key, tmp, s.store.shouldCache(blockSize), false)
		return tmp, err
	})
	defer block.Release()
	if err != nil {
		return 0, err
	}
	if block != page {
		copy(p, block.Data[boff:])
	}
	return len(p), nil
}

func (s *rSlice) delete(indx int) error {
	key := s.key(indx)
	return s.store.delete(key)
}

func (s *rSlice) Remove() error {
	if s.length == 0 {
		// no block
		return nil
	}

	lastIndx := (s.length - 1) / s.store.conf.BlockSize
	for i := 0; i <= lastIndx; i++ {
		// there could be multiple clients try to remove the same chunk in the same time,
		// any of them should succeed if any blocks is removed
		key := s.key(i)
		s.store.removePending(key)
		s.store.bcache.remove(key, true)
	}

	var err error
	for i := 0; i <= lastIndx; i++ {
		if e := s.delete(i); e != nil {
			err = e
		}
	}
	return err
}

var pagePool = make(chan *Page, 128)

func allocPage(sz int) *Page {
	if sz != pageSize {
		return NewOffPage(sz)
	}
	select {
	case p := <-pagePool:
		return p
	default:
		return NewOffPage(pageSize)
	}
}

func freePage(p *Page) {
	if cap(p.Data) != pageSize {
		p.Release()
		return
	}
	select {
	case pagePool <- p:
	default:
		p.Release()
	}
}

// slice for write only
type wSlice struct {
	rSlice
	pages       [][]*Page
	uploaded    int
	errors      chan error
	uploadError error
	pendings    int
	writeback   bool
}

func sliceForWrite(id uint64, store *cachedStore) *wSlice {
	return &wSlice{
		rSlice:    rSlice{id, 0, store},
		pages:     make([][]*Page, chunkSize/store.conf.BlockSize),
		errors:    make(chan error, chunkSize/store.conf.BlockSize),
		writeback: store.conf.Writeback,
	}
}

func (s *wSlice) SetID(id uint64) {
	s.id = id
}

func (s *wSlice) SetWriteback(enabled bool) {
	s.writeback = enabled
}

func (s *wSlice) WriteAt(p []byte, off int64) (n int, err error) {
	if int(off)+len(p) > chunkSize {
		return 0, fmt.Errorf("write out of chunk boudary: %d > %d", int(off)+len(p), chunkSize)
	}
	if off < int64(s.uploaded) {
		return 0, fmt.Errorf("Cannot overwrite uploaded block: %d < %d", off, s.uploaded)
	}

	// Fill previous blocks with zeros
	if s.length < int(off) {
		zeros := make([]byte, int(off)-s.length)
		_, _ = s.WriteAt(zeros, int64(s.length))
	}

	for n < len(p) {
		indx := s.index(int(off) + n)
		boff := (int(off) + n) % s.store.conf.BlockSize
		var bs = pageSize
		if indx > 0 || bs > s.store.conf.BlockSize {
			bs = s.store.conf.BlockSize
		}
		bi := boff / bs
		bo := boff % bs
		var page *Page
		if bi < len(s.pages[indx]) {
			page = s.pages[indx][bi]
		} else {
			page = allocPage(bs)
			page.Data = page.Data[:0]
			s.pages[indx] = append(s.pages[indx], page)
		}
		left := len(p) - n
		if bo+left > bs {
			page.Data = page.Data[:bs]
		} else if len(page.Data) < bo+left {
			page.Data = page.Data[:bo+left]
		}
		n += copy(page.Data[bo:], p[n:])
	}
	if int(off)+n > s.length {
		s.length = int(off) + n
	}
	return n, nil
}

func (store *cachedStore) put(key string, p *Page) error {
	if store.upLimit != nil {
		store.upLimit.Wait(int64(len(p.Data)))
	}
	p.Acquire()
	var (
		reqID string
		sc    = object.DefaultStorageClass
	)
	return utils.WithTimeout(context.TODO(), func(ctx context.Context) error {
		defer p.Release()
		st := time.Now()
		err := store.storage.Put(ctx, key, bytes.NewReader(p.Data), object.WithRequestID(&reqID), object.WithStorageClass(&sc))
		used := time.Since(st)
		logRequest("PUT", key, "", reqID, err, used)
		store.objectDataBytes.WithLabelValues("PUT", sc).Add(float64(len(p.Data)))
		store.objectReqsHistogram.WithLabelValues("PUT", sc).Observe(used.Seconds())
		if err != nil {
			store.objectReqErrors.Add(1)
		}
		return err
	}, store.conf.PutTimeout)
}

func (store *cachedStore) delete(key string) error {
	st := time.Now()
	var reqID string
	err := utils.WithTimeout(context.TODO(), func(ctx context.Context) error {
		return store.storage.Delete(ctx, key, object.WithRequestID(&reqID))
	}, store.conf.PutTimeout)
	used := time.Since(st)
	if err != nil && (strings.Contains(err.Error(), "NoSuchKey") ||
		strings.Contains(err.Error(), "not found") ||
		strings.Contains(err.Error(), "No such file")) {
		err = nil
	}
	logRequest("DELETE", key, "", reqID, err, used)
	store.objectReqsHistogram.WithLabelValues("DELETE", "").Observe(used.Seconds())
	if err != nil {
		store.objectReqErrors.Add(1)
	}
	return err
}

func (store *cachedStore) upload(key string, block *Page, s *wSlice) error {
	sync := s != nil
	blen := len(block.Data)
	bufSize := store.compressor.CompressBound(blen)
	var buf *Page
	if bufSize > blen {
		buf = NewOffPage(bufSize)
	} else {
		buf = block
		buf.Acquire()
	}
	defer buf.Release()
	if sync && (blen < store.conf.BlockSize || store.conf.CacheLargeWrite) {
		// block will be freed after written into disk
		store.bcache.cache(key, block, false, false)
	}
	n, err := store.compressor.Compress(buf.Data, block.Data)
	block.Release()
	if err != nil {
		return fmt.Errorf("Compress block key %s: %s", key, err)
	}
	buf.Data = buf.Data[:n]

	try, max := 0, 3
	if sync {
		max = store.conf.MaxRetries + 1
	}
	for ; try < max; try++ {
		time.Sleep(time.Second * time.Duration(try*try))
		if s != nil && s.uploadError != nil {
			err = fmt.Errorf("(cancelled) upload block %s: %s (after %d tries)", key, err, try)
			break
		}
		if err = store.put(key, buf); err == nil {
			break
		}
		logger.Debugf("Upload %s: %s (try %d)", key, err, try+1)
	}
	if err != nil && try >= max {
		err = fmt.Errorf("(max tries) upload block %s: %s (after %d tries)", key, err, try)
	}
	return err
}

func (s *wSlice) upload(indx int) {
	blen := s.blockSize(indx)
	key := s.key(indx)
	pages := s.pages[indx]
	s.pages[indx] = nil
	s.pendings++

	go func() {
		var block *Page
		var off int
		if len(pages) == 1 {
			block = pages[0]
			off = len(block.Data)
		} else {
			block = NewOffPage(blen)
			for _, b := range pages {
				off += copy(block.Data[off:], b.Data)
				freePage(b)
			}
		}
		if off != blen {
			panic(fmt.Sprintf("block length does not match: %v != %v", off, blen))
		}
		if s.writeback && blen < s.store.conf.WritebackThresholdSize {
			stagingPath := "unknown"
			stageFailed := false
			block.Acquire()
			err := utils.WithTimeout(context.TODO(), func(context.Context) (err error) { // In case it hangs for more than 5 minutes(see fileWriter.flush), fallback to uploading directly to avoid `EIO`
				defer block.Release()
				stagingPath, err = s.store.bcache.stage(key, block.Data)
				if err == nil && stageFailed { // upload thread already marked me as failed because of timeout
					_ = s.store.bcache.removeStage(key)
				}
				return err
			}, s.store.conf.PutTimeout)
			if err != nil {
				stageFailed = true
				if !errors.Is(err, errStageConcurrency) {
					s.store.stageBlockErrors.Add(1)
					logger.Warnf("write %s to disk: %s, upload it directly", key, err)
				}
			} else {
				s.errors <- nil
				if s.store.conf.UploadDelay == 0 && s.store.canUpload() {
					select {
					case s.store.currentUpload <- struct{}{}:
						defer func() { <-s.store.currentUpload }()
						if err = s.store.upload(key, block, nil); err == nil {
							s.store.bcache.uploaded(key, blen)
							if err := s.store.bcache.removeStage(key); err != nil {
								logger.Warnf("failed to remove stage %s in upload", stagingPath)
							}
						} else { // add to delay list and wait for later scanning
							s.store.addDelayedStaging(key, stagingPath, time.Now(), false)
						}
						return
					default:
					}
				}
				block.Release()
				s.store.addDelayedStaging(key, stagingPath, time.Now(), false)
				return
			}
		}
		s.store.currentUpload <- struct{}{}
		defer func() { <-s.store.currentUpload }()
		s.errors <- s.store.upload(key, block, s)
	}()
}

func (s *wSlice) ID() uint64 {
	return s.id
}

func (s *wSlice) Len() int {
	return s.length
}

func (s *wSlice) FlushTo(offset int) error {
	if offset < s.uploaded {
		panic(fmt.Sprintf("Invalid offset: %d < %d", offset, s.uploaded))
	}
	for i, block := range s.pages {
		start := i * s.store.conf.BlockSize
		end := start + s.store.conf.BlockSize
		if start >= s.uploaded && end <= offset {
			if block != nil {
				s.upload(i)
			}
			s.uploaded = end
		}
	}

	return nil
}

func (s *wSlice) Finish(length int) error {
	if s.length != length {
		return fmt.Errorf("Length mismatch: %v != %v", s.length, length)
	}

	n := (length-1)/s.store.conf.BlockSize + 1
	if err := s.FlushTo(n * s.store.conf.BlockSize); err != nil {
		return err
	}
	for i := 0; i < s.pendings; i++ {
		if err := <-s.errors; err != nil {
			s.uploadError = err
			return err
		}
	}
	return nil
}

func (s *wSlice) Abort() {
	for i := range s.pages {
		for _, b := range s.pages[i] {
			freePage(b)
		}
		s.pages[i] = nil
	}
	// delete uploaded blocks
	s.length = s.uploaded
	_ = s.Remove()
}

// Config contains options for cachedStore
type Config struct {
	CacheDir               string
	CacheMode              os.FileMode
	CacheSize              uint64
	CacheItems             int64
	CacheChecksum          string
	CacheEviction          string
	CacheScanInterval      time.Duration
	CacheExpire            time.Duration
	OSCache                bool
	FreeSpace              float32
	AutoCreate             bool
	Compress               string
	MaxUpload              int
	MaxDownload            int
	MaxStageWrite          int
	MaxRetries             int
	UploadLimit            int64 // bytes per second
	DownloadLimit          int64 // bytes per second
	Writeback              bool
	WritebackThresholdSize int
	UploadDelay            time.Duration
	UploadHours            string
	HashPrefix             bool
	BlockSize              int
	GetTimeout             time.Duration
	PutTimeout             time.Duration
	CacheFullBlock         bool
	CacheLargeWrite        bool
	BufferSize             uint64
	Readahead              int
	Prefetch               int
}

func (c *Config) SelfCheck(uuid string) {
	if !c.CacheEnabled() {
		if c.Writeback || c.Prefetch > 0 {
			logger.Warnf("cache-size is 0, writeback and prefetch will be disabled")
			c.Writeback = false
			c.Prefetch = 0
		}
		c.CacheDir = "memory"
	}
	if c.MaxUpload <= 0 {
		logger.Warnf("max-uploads should be greater than 0, set it to 1")
		c.MaxUpload = 1
	}
	if c.UploadLimit > 0 && int64(c.MaxUpload*c.BlockSize) > c.UploadLimit*int64(c.GetTimeout/time.Second)/2 {
		logger.Warnf("max-upload %d may exceed bandwidth limit (bw: %d Mbps)", c.MaxUpload, c.UploadDelay*8>>20)
	}
	if c.MaxDownload <= 0 {
		logger.Warnf("max-downloads should be greater than 0, set it to 200")
		c.MaxDownload = 200
	}
	if c.DownloadLimit > 0 && int64(c.MaxDownload*c.BlockSize) > c.DownloadLimit*int64(c.GetTimeout/time.Second)/2 {
		logger.Warnf("max-download %d may exceed bandwidth limit (bw: %d Mbps)", c.MaxDownload, (c.DownloadLimit*8)>>20)
	}
	if c.BufferSize <= 32<<20 {
		logger.Warnf("buffer-size is too small, setting it to 32 MiB")
		c.BufferSize = 32 << 20
	}
	if c.CacheDir != "memory" {
		ds := utils.SplitDir(c.CacheDir)
		for i := range ds {
			ds[i] = filepath.Join(ds[i], uuid)
		}
		c.CacheDir = strings.Join(ds, string(os.PathListSeparator))
		if cs := []string{CsNone, CsFull, CsShrink, CsExtend}; !utils.StringContains(cs, c.CacheChecksum) {
			logger.Warnf("verify-cache-checksum should be one of %v", cs)
			c.CacheChecksum = CsExtend
		}
	} else if c.Writeback {
		logger.Warnf("writeback is not supported in memory cache mode")
		c.Writeback = false
	}
	if c.Writeback {
		if !c.CacheFullBlock {
			logger.Warnf("cache-partial-only is ineffective for stage blocks with writeback enabled")
		}
		if c.WritebackThresholdSize == 0 {
			c.WritebackThresholdSize = c.BlockSize + 1
		}
	} else {
		if c.UploadDelay > 0 || c.UploadHours != "" {
			logger.Warnf("delayed upload is disabled in non-writeback mode")
			c.UploadDelay = 0
			c.UploadHours = ""
		}
	}
	if _, _, err := c.parseHours(); err != nil {
		logger.Warnf("invalid value (%s) for upload-hours: %s", c.UploadHours, err)
		c.UploadHours = ""
	}
	if c.CacheEviction == "" {
		c.CacheEviction = Eviction2Random
	} else if c.CacheEviction != Eviction2Random && c.CacheEviction != EvictionNone && c.CacheEviction != EvictionLRU {
		logger.Warnf("cache-eviction should be one of [%s, %s, %s]", EvictionNone, Eviction2Random, EvictionLRU)
		c.CacheEviction = Eviction2Random
	}
	if c.CacheDir == "memory" && c.CacheEviction == EvictionLRU {
		logger.Warnf("LRU eviction is not supported in memory cache mode yet, setting it to 2-random")
		c.CacheEviction = Eviction2Random
	}
	if c.CacheExpire > 0 && c.CacheExpire < time.Second {
		logger.Warnf("cache-expire it too short, setting it to 1 second")
		c.CacheExpire = time.Second
	}
}

func (c *Config) parseHours() (start, end int, err error) {
	if c.UploadHours == "" {
		return
	}
	split := ","
	if strings.Contains(c.UploadHours, "-") {
		split = "-"
	}
	ps := strings.Split(c.UploadHours, split)
	if len(ps) != 2 {
		err = errors.New("unexpected number of fields")
		return
	}
	if start, err = strconv.Atoi(ps[0]); err != nil {
		return
	}
	if end, err = strconv.Atoi(ps[1]); err != nil {
		return
	}
	if start < 0 || start > 23 || end < 0 || end > 23 {
		err = errors.New("invalid hour number")
	}
	return
}

func (c *Config) CacheEnabled() bool {
	return c.CacheSize > 0
}

type cachedStore struct {
	storage         object.ObjectStorage
	bcache          CacheManager
	fetcher         *prefetcher
	conf            Config
	group           *Controller
	currentUpload   chan struct{}
	currentDownload chan struct{}
	pendingCh       chan *pendingItem
	pendingKeys     map[string]*pendingItem
	pendingMutex    sync.Mutex
	startHour       int
	endHour         int
	compressor      compress.Compressor
	seekable        bool
	upLimit         *ratelimit.Bucket
	downLimit       *ratelimit.Bucket

	cacheHits           prometheus.Counter
	cacheMiss           prometheus.Counter
	cacheHitBytes       prometheus.Counter
	cacheMissBytes      prometheus.Counter
	cacheReadHist       prometheus.Histogram
	objectReqsHistogram *prometheus.HistogramVec
	objectReqErrors     prometheus.Counter
	objectDataBytes     *prometheus.CounterVec
	stageBlockDelay     prometheus.Counter
	stageBlockErrors    prometheus.Counter
}

func logRequest(typeStr, key, param, reqID string, err error, used time.Duration) {
	if used > SlowRequest {
		logger.Warnf("slow request: %s %s %s(req_id: %q, err: %v, cost: %s)", typeStr, key, param, reqID, err, used)
	} else {
		logger.Debugf("%s %s %s(req_id: %q, err: %v, cost: %s)", typeStr, key, param, reqID, err, used)
	}
}

var errTryFullRead = errors.New("try full read")

func (store *cachedStore) loadRange(ctx context.Context, key string, page *Page, off int) (n int, err error) {
	p := page.Data
	fullPage, err := store.group.TryPiggyback(key)
	if fullPage != nil {
		defer fullPage.Release()
		if err == nil { // piggybacked a full read
			n = copy(p, fullPage.Data[off:])
			return n, nil
		}
	}

	store.currentDownload <- struct{}{}
	defer func() { <-store.currentDownload }()
	if store.downLimit != nil {
		store.downLimit.Wait(int64(len(p)))
	}

	start := time.Now()
	var (
		reqID string
		sc    = object.DefaultStorageClass
	)
	page.Acquire()
	err = utils.WithTimeout(ctx, func(cCtx context.Context) error {
		defer page.Release()
		in, err := store.storage.Get(cCtx, key, int64(off), int64(len(p)), object.WithRequestID(&reqID), object.WithStorageClass(&sc))
		if err == nil {
			n, err = io.ReadFull(in, p)
			_ = in.Close()
		}
		return err
	}, store.conf.GetTimeout)

	used := time.Since(start)
	logRequest("GET", key, fmt.Sprintf("RANGE(%d,%d) ", off, len(p)), reqID, err, used)
	if errors.Is(err, context.Canceled) {
		return 0, err
	}
	store.objectDataBytes.WithLabelValues("GET", sc).Add(float64(n))
	store.objectReqsHistogram.WithLabelValues("GET", sc).Observe(used.Seconds())
	if err == nil {
		store.fetcher.fetch(key)
		return n, nil
	}
	store.objectReqErrors.Add(1)
	// fall back to full read
	return 0, errTryFullRead
}

func (store *cachedStore) load(ctx context.Context, key string, page *Page, cache bool, forceCache bool) (err error) {
	defer func() {
		e := recover()
		if e != nil {
			err = fmt.Errorf("recovered from %s", e)
		}
	}()
	store.currentDownload <- struct{}{}
	defer func() { <-store.currentDownload }()
	needed := store.compressor.CompressBound(len(page.Data))
	compressed := needed > len(page.Data)
	// we don't know the actual size for compressed block
	if store.downLimit != nil && !compressed {
		store.downLimit.Wait(int64(len(page.Data)))
	}
	var (
		in    io.ReadCloser
		n     int
		p     *Page
		reqID string
		sc    = object.DefaultStorageClass
		start = time.Now()
	)
	if compressed {
		c := NewOffPage(needed)
		defer c.Release()
		p = c
	} else {
		p = page
	}
	p.Acquire()
	err = utils.WithTimeout(ctx, func(cCtx context.Context) error {
		defer p.Release()
		// it will be retried in the upper layer.
		in, err = store.storage.Get(cCtx, key, 0, -1, object.WithRequestID(&reqID), object.WithStorageClass(&sc))
		if err == nil {
			n, err = io.ReadFull(in, p.Data)
			_ = in.Close()
		}
		if compressed && err == io.ErrUnexpectedEOF {
			err = nil
		}
		return err
	}, store.conf.GetTimeout)
	if errors.Is(err, context.Canceled) {
		return err
	}
	used := time.Since(start)
	logRequest("GET", key, "", reqID, err, used)
	if store.downLimit != nil && compressed {
		store.downLimit.Wait(int64(n))
	}
	store.objectDataBytes.WithLabelValues("GET", sc).Add(float64(n))
	store.objectReqsHistogram.WithLabelValues("GET", sc).Observe(used.Seconds())
	if err != nil {
		store.objectReqErrors.Add(1)
		return fmt.Errorf("get %s: %s", key, err)
	}
	if compressed {
		n, err = store.compressor.Decompress(page.Data, p.Data[:n])
	}
	if err != nil || n < len(page.Data) {
		return fmt.Errorf("read %s fully: %v (%d < %d) after %s", key, err, n, len(page.Data), used)
	}
	if cache {
		store.bcache.cache(key, page, forceCache, !store.conf.OSCache)
	}
	return nil
}

// NewCachedStore create a cached store.
func NewCachedStore(storage object.ObjectStorage, config Config, reg prometheus.Registerer) ChunkStore {
	compressor := compress.NewCompressor(config.Compress)
	if compressor == nil {
		logger.Fatalf("unknown compress algorithm: %s", config.Compress)
	}
	if config.MaxRetries == 0 {
		config.MaxRetries = 10
	}
	if config.GetTimeout == 0 {
		config.GetTimeout = time.Second * 60
	}
	if config.PutTimeout == 0 {
		config.PutTimeout = time.Second * 60
	}
	store := &cachedStore{
		storage:         storage,
		conf:            config,
		currentUpload:   make(chan struct{}, config.MaxUpload),
		currentDownload: make(chan struct{}, config.MaxDownload),
		compressor:      compressor,
		seekable:        compressor.CompressBound(0) == 0,
		pendingCh:       make(chan *pendingItem, 100*config.MaxUpload),
		pendingKeys:     make(map[string]*pendingItem),
		group:           NewController(),
	}
	if config.UploadLimit > 0 {
		// there are overheads coming from HTTP/TCP/IP
		store.upLimit = ratelimit.NewBucketWithRate(float64(config.UploadLimit)*0.85, config.UploadLimit/10)
	}
	if config.DownloadLimit > 0 {
		store.downLimit = ratelimit.NewBucketWithRate(float64(config.DownloadLimit)*0.85, config.DownloadLimit/10)
	}
	store.initMetrics()
	if store.conf.Writeback {
		store.startHour, store.endHour, _ = config.parseHours()
		if store.startHour != store.endHour {
			logger.Infof("background upload at %d:00 ~ %d:00", store.startHour, store.endHour)
		}
	}
	store.bcache = newCacheManager(&config, reg, func(key, fpath string, force bool) bool {
		if fi, err := os.Stat(fpath); err == nil {
			return store.addDelayedStaging(key, fpath, fi.ModTime(), force)
		} else {
			logger.Warnf("Stat staging block %s: %s", fpath, err)
			return false
		}
	})

	go func() {
		for {
			if store.bcache.isEmpty() {
				logger.Warn("cache store is empty, use memory cache")
				config.CacheSize = 100 << 20
				config.CacheDir = "memory"
				store.bcache = newMemStore(&config, store.bcache.getMetrics())
			}
			time.Sleep(time.Second)
		}
	}()

	if !config.CacheEnabled() {
		config.Prefetch = 0 // disable prefetch if cache is disabled
	}
	store.fetcher = newPrefetcher(config.Prefetch, func(key string) {
		size := parseObjOrigSize(key)
		if size == 0 || size > store.conf.BlockSize {
			return
		}
		p := NewOffPage(size)
		defer p.Release()
		block, err := store.group.Execute(key, func() (*Page, error) { // dedup requests with full read
			p.Acquire()
			err := store.load(context.TODO(), key, p, false, false) // delay writing cache until singleflight ends to prevent blocking waiters
			return p, err
		})
		defer block.Release()
		if err == nil && block == p {
			store.bcache.cache(key, block, true, !store.conf.OSCache)
		}
	})

	if store.conf.Writeback {
		for i := 0; i < store.conf.MaxUpload; i++ {
			go store.uploader()
		}
		interval := time.Minute
		if d := store.conf.UploadDelay; d > 0 {
			if d < time.Minute {
				interval = d
				logger.Warnf("delay uploading by %s (this value is too small, and is not recommended)", d)
			} else {
				logger.Infof("delay uploading by %s", d)
			}
		}
		go func() {
			for {
				time.Sleep(interval)
				store.scanDelayedStaging()
			}
		}()
	}
	store.regMetrics(reg)
	return store
}

func (store *cachedStore) initMetrics() {
	store.cacheHits = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_hits",
		Help: "read from cached block",
	})
	store.cacheMiss = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_miss",
		Help: "missed read from cached block",
	})
	store.cacheHitBytes = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_hit_bytes",
		Help: "read bytes from cached block",
	})
	store.cacheMissBytes = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_miss_bytes",
		Help: "missed bytes from cached block",
	})
	store.cacheReadHist = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "blockcache_read_hist_seconds",
		Help:    "read cached block latency distribution",
		Buckets: prometheus.ExponentialBuckets(0.00001, 2, 20),
	})
	store.objectReqsHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
		Name:    "object_request_durations_histogram_seconds",
		Help:    "Object requests latency distributions.",
		Buckets: prometheus.ExponentialBuckets(0.01, 1.5, 25),
	}, []string{"method", "storage_class"})
	store.objectReqErrors = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "object_request_errors",
		Help: "failed requests to object store",
	})
	store.objectDataBytes = prometheus.NewCounterVec(prometheus.CounterOpts{
		Name: "object_request_data_bytes",
		Help: "Object requests size in bytes.",
	}, []string{"method", "storage_class"})
	store.stageBlockDelay = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "staging_block_delay_seconds",
		Help: "Total seconds of delay for staging blocks",
	})
	store.stageBlockErrors = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "staging_block_errors",
		Help: "Total errors when staging blocks",
	})
}

func (store *cachedStore) regMetrics(reg prometheus.Registerer) {
	if reg == nil {
		return
	}
	reg.MustRegister(store.cacheHits)
	reg.MustRegister(store.cacheHitBytes)
	reg.MustRegister(store.cacheMiss)
	reg.MustRegister(store.cacheMissBytes)
	reg.MustRegister(store.cacheReadHist)
	reg.MustRegister(store.objectReqsHistogram)
	reg.MustRegister(store.objectReqErrors)
	reg.MustRegister(store.objectDataBytes)
	reg.MustRegister(store.stageBlockDelay)
	reg.MustRegister(store.stageBlockErrors)
	reg.MustRegister(prometheus.NewGaugeFunc(
		prometheus.GaugeOpts{
			Name: "blockcache_blocks",
			Help: "number of cached blocks",
		},
		func() float64 {
			cnt, _ := store.bcache.stats()
			return float64(cnt)
		}))
	reg.MustRegister(prometheus.NewGaugeFunc(
		prometheus.GaugeOpts{
			Name: "blockcache_bytes",
			Help: "number of cached bytes",
		},
		func() float64 {
			_, used := store.bcache.stats()
			return float64(used)
		}))
	reg.MustRegister(prometheus.NewGaugeFunc(
		prometheus.GaugeOpts{
			Name: "object_request_uploading",
			Help: "number of uploading requests",
		},
		func() float64 {
			return float64(len(store.currentUpload))
		}))
}

func (store *cachedStore) shouldCache(size int) bool {
	return store.conf.CacheFullBlock || size < store.conf.BlockSize
}

func parseObjOrigSize(key string) int {
	p := strings.LastIndexByte(key, '_')
	l, _ := strconv.Atoi(key[p+1:])
	return l
}

func (store *cachedStore) uploadStagingFile(key string, stagingPath string) {
	store.currentUpload <- struct{}{}
	defer func() {
		<-store.currentUpload
	}()

	store.pendingMutex.Lock()
	item, ok := store.pendingKeys[key]
	store.pendingMutex.Unlock()
	if !ok {
		logger.Debugf("Key %s is not needed, drop it", key)
		return
	}
	defer func() {
		item.uploading.Store(false)
	}()

	if !store.canUpload() {
		return
	}

	blen := parseObjOrigSize(key)
	f, err := openCacheFile(stagingPath, blen, store.conf.CacheChecksum)
	if err != nil {
		if store.isPendingValid(key) {
			logger.Errorf("Open staging file %s: %s", stagingPath, err)
		} else {
			logger.Debugf("Key %s is not needed, drop it", key)
		}
		return
	}
	block := NewOffPage(blen)
	_, err = f.ReadAt(block.Data, 0)
	_ = f.Close()
	if err != nil {
		block.Release()
		logger.Errorf("Read staging file %s: %s", stagingPath, err)
		return
	}
	if !store.isPendingValid(key) {
		block.Release()
		logger.Debugf("Key %s is not needed, drop it", key)
		return
	}

	store.stageBlockDelay.Add(time.Since(item.ts).Seconds())
	if err = store.upload(key, block, nil); err == nil {
		if !store.isPendingValid(key) { // Delete leaked objects if it's already deleted by other goroutines
			err := store.delete(key)
			logger.Infof("Key %s is not needed, abandoned, err: %v", key, err)
		} else {
			store.bcache.uploaded(key, blen)
			store.removePending(key)
			if err := store.bcache.removeStage(key); err != nil {
				logger.Warnf("failed to remove stage %s, in upload staging file", stagingPath)
			}
		}
	}
}

func (store *cachedStore) addDelayedStaging(key, stagingPath string, added time.Time, force bool) bool {
	store.pendingMutex.Lock()
	item := store.pendingKeys[key]
	if item == nil {
		item = &pendingItem{key, stagingPath, added, atomic.Bool{}}
		store.pendingKeys[key] = item
	}
	store.pendingMutex.Unlock()
	if force || store.canUpload() && time.Since(added) > store.conf.UploadDelay {
		if item.uploading.CompareAndSwap(false, true) {
			select {
			case store.pendingCh <- item:
				return true
			default:
				item.uploading.Store(false)
			}
		} else {
			return true
		}
	}
	return false
}

func (store *cachedStore) removePending(key string) {
	store.pendingMutex.Lock()
	delete(store.pendingKeys, key)
	store.pendingMutex.Unlock()
}

func (store *cachedStore) isPendingValid(key string) bool {
	store.pendingMutex.Lock()
	defer store.pendingMutex.Unlock()
	_, ok := store.pendingKeys[key]
	return ok
}

func (store *cachedStore) scanDelayedStaging() {
	if !store.canUpload() {
		return
	}
	cutoff := time.Now().Add(-store.conf.UploadDelay)
	store.pendingMutex.Lock()
	defer store.pendingMutex.Unlock()
	for _, item := range store.pendingKeys {
		store.pendingMutex.Unlock()
		if item.ts.Before(cutoff) && item.uploading.CompareAndSwap(false, true) {
			store.pendingCh <- item
		}
		store.pendingMutex.Lock()
	}
}

func (store *cachedStore) uploader() {
	for it := range store.pendingCh {
		store.uploadStagingFile(it.key, it.fpath)
	}
}

func (store *cachedStore) canUpload() bool {
	if store.startHour == store.endHour {
		return true
	}
	h := time.Now().Hour()
	return store.startHour < store.endHour && h >= store.startHour && h < store.endHour ||
		store.startHour > store.endHour && (h >= store.startHour || h < store.endHour)
}

func (store *cachedStore) NewReader(id uint64, length int) Reader {
	return sliceForRead(id, length, store)
}

func (store *cachedStore) NewWriter(id uint64) Writer {
	return sliceForWrite(id, store)
}

func (store *cachedStore) Remove(id uint64, length int) error {
	r := sliceForRead(id, length, store)
	return r.Remove()
}

func (store *cachedStore) FillCache(id uint64, length uint32) error {
	r := sliceForRead(id, int(length), store)
	keys := r.keys()
	var err error
	for _, k := range keys {
		if _, existed := store.bcache.exist(k); existed { // already cached
			continue
		}
		size := parseObjOrigSize(k)
		if size == 0 || size > store.conf.BlockSize {
			logger.Warnf("Invalid size: %s %d", k, size)
			continue
		}
		p := NewOffPage(size)
		if e := store.load(context.TODO(), k, p, true, true); e != nil {
			logger.Warnf("Failed to load key: %s %s", k, e)
			err = e
		}
		p.Release()
	}
	return err
}

func (store *cachedStore) EvictCache(id uint64, length uint32) error {
	r := sliceForRead(id, int(length), store)
	keys := r.keys()
	for _, k := range keys {
		store.bcache.remove(k, false)
	}
	return nil
}

func (store *cachedStore) CheckCache(id uint64, length uint32, handler func(exists bool, loc string, size int)) error {
	r := sliceForRead(id, int(length), store)
	keys := r.keys()
	var loc string
	var existed bool
	for i, k := range keys {
		loc, existed = store.bcache.exist(k)
		if handler != nil {
			handler(existed, loc, r.blockSize(i))
		}
	}
	return nil
}

func (store *cachedStore) UsedMemory() int64 {
	return store.bcache.usedMemory()
}

func (store *cachedStore) UpdateLimit(upload, download int64) {
	if upload = upload * 1e6 / 8; upload != store.conf.UploadLimit {
		logger.Infof("Upload limit changed from %d to %d", store.conf.UploadLimit, upload)
		store.conf.UploadLimit = upload
		if upload > 0 {
			store.upLimit = ratelimit.NewBucketWithRate(float64(upload)*0.85, upload/10)
		} else {
			store.upLimit = nil
		}
	}
	if download = download * 1e6 / 8; download != store.conf.DownloadLimit {
		logger.Infof("Download limit changed from %d to %d", store.conf.DownloadLimit, download)
		store.conf.DownloadLimit = download
		if download > 0 {
			store.downLimit = ratelimit.NewBucketWithRate(float64(download)*0.85, download/10)
		} else {
			store.downLimit = nil
		}
	}
}

var _ ChunkStore = (*cachedStore)(nil)


================================================
FILE: pkg/chunk/cached_store_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//nolint:errcheck
package chunk

import (
	"bytes"
	"context"
	"errors"
	"io"
	"os"
	"path/filepath"
	"sync/atomic"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/object"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

func forgetSlice(store ChunkStore, sliceId uint64, size int) error {
	w := store.NewWriter(sliceId)
	buf := bytes.Repeat([]byte{0x41}, size)
	if _, err := w.WriteAt(buf, 0); err != nil {
		return err
	}
	return w.Finish(size)
}

func testStore(t *testing.T, store ChunkStore) {
	writer := store.NewWriter(1)
	data := []byte("hello world")
	if n, err := writer.WriteAt(data, 0); n != 11 || err != nil {
		t.Fatalf("write fail: %d %s", n, err)
	}
	offset := defaultConf.BlockSize - 3
	if n, err := writer.WriteAt(data, int64(offset)); err != nil || n != 11 {
		t.Fatalf("write fail: %d %s", n, err)
	}
	if err := writer.FlushTo(defaultConf.BlockSize + 3); err != nil {
		t.Fatalf("flush fail: %s", err)
	}
	size := offset + len(data)
	if err := writer.Finish(size); err != nil {
		t.Fatalf("finish fail: %s", err)
	}
	defer store.Remove(1, size)

	reader := store.NewReader(1, size)
	p := NewPage(make([]byte, 5))
	if n, err := reader.ReadAt(context.Background(), p, 6); n != 5 || err != nil {
		t.Fatalf("read failed: %d %s", n, err)
	} else if string(p.Data[:n]) != "world" {
		t.Fatalf("not expected: %s", string(p.Data[:n]))
	}
	p = NewPage(make([]byte, 5))
	if n, err := reader.ReadAt(context.Background(), p, 0); n != 5 || err != nil {
		t.Fatalf("read failed: %d %s", n, err)
	} else if string(p.Data[:n]) != "hello" {
		t.Fatalf("not expected: %s", string(p.Data[:n]))
	}
	p = NewPage(make([]byte, 20))
	if n, err := reader.ReadAt(context.Background(), p, offset); n != 11 || err != nil && err != io.EOF {
		t.Fatalf("read failed: %d %s", n, err)
	} else if string(p.Data[:n]) != "hello world" {
		t.Fatalf("not expected: %s", string(p.Data[:n]))
	}

	bsize := defaultConf.BlockSize / 2
	errs := make(chan error, 3)
	for i := 2; i < 5; i++ {
		go func(sliceId uint64) {
			if err := forgetSlice(store, sliceId, bsize); err != nil {
				errs <- err
				return
			}
			time.Sleep(time.Millisecond * 100) // waiting for flush
			errs <- store.Remove(sliceId, bsize)
		}(uint64(i))
	}
	for i := 0; i < 3; i++ {
		if err := <-errs; err != nil {
			t.Fatalf("test concurrent write failed: %s", err)
		}
	}
}

var defaultConf = Config{
	BlockSize:         1 << 20,
	CacheDir:          filepath.Join(os.TempDir(), "diskCache"),
	CacheMode:         0600,
	CacheSize:         10 << 20,
	CacheChecksum:     CsNone,
	CacheScanInterval: time.Second * 300,
	MaxUpload:         1,
	MaxDownload:       200,
	MaxRetries:        10,
	PutTimeout:        time.Second,
	GetTimeout:        time.Second * 2,
	AutoCreate:        true,
	BufferSize:        10 << 20,
}

var ctx = context.Background()

func TestStoreDefault(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	_ = os.RemoveAll(defaultConf.CacheDir)
	store := NewCachedStore(mem, defaultConf, nil)
	testStore(t, store)
	if used := store.UsedMemory(); used != 0 {
		t.Fatalf("used memory %d != expect 0", used)
	}
	if cnt, used := store.(*cachedStore).bcache.stats(); cnt != 0 || used != 0 {
		t.Fatalf("cache cnt %d used %d, expect both 0", cnt, used)
	}
}

func TestStoreMemCache(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.CacheDir = "memory"
	store := NewCachedStore(mem, conf, nil)
	testStore(t, store)
	if used := store.UsedMemory(); used != 0 {
		t.Fatalf("used memory %d != expect 0", used)
	}
	if cnt, used := store.(*cachedStore).bcache.stats(); cnt != 0 || used != 0 {
		t.Fatalf("cache cnt %d used %d, expect both 0", cnt, used)
	}
}
func TestStoreCompressed(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.Compress = "lz4"
	conf.AutoCreate = false
	store := NewCachedStore(mem, conf, nil)
	testStore(t, store)
}

func TestStoreLimited(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.UploadLimit = 1e6
	conf.DownloadLimit = 1e6
	store := NewCachedStore(mem, conf, nil)
	testStore(t, store)
}

func TestStoreFull(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.FreeSpace = 0.9999
	store := NewCachedStore(mem, conf, nil)
	testStore(t, store)
}

func TestStoreSmallBuffer(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.BufferSize = 1 << 20
	store := NewCachedStore(mem, conf, nil)
	testStore(t, store)
}

func TestStoreAsync(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.Writeback = true
	p := filepath.Join(conf.CacheDir, stagingDir, "chunks/0/0/123_0_4")
	os.MkdirAll(filepath.Dir(p), 0744)
	f, _ := os.Create(p)
	f.WriteString("good")
	f.Close()
	store := NewCachedStore(mem, conf, nil)
	time.Sleep(time.Millisecond * 50) // wait for scan to finish
	in, err := mem.Get(ctx, "chunks/0/0/123_0_4", 0, -1)
	if err != nil {
		t.Fatalf("staging object should be upload")
	}
	data, _ := io.ReadAll(in)
	if string(data) != "good" {
		t.Fatalf("data %s != expect good", data)
	}
	testStore(t, store)
}

func TestForceUpload(t *testing.T) {
	blob, _ := object.CreateStorage("mem", "", "", "", "")
	config := defaultConf
	_ = os.RemoveAll(config.CacheDir)
	config.Writeback = true
	config.WritebackThresholdSize = config.BlockSize + 1
	config.UploadDelay = time.Hour
	config.BlockSize = 4 << 20
	store := NewCachedStore(blob, config, nil)
	cleanCache := func() {
		rSlice := sliceForRead(1, 1024, store.(*cachedStore))
		keys := rSlice.keys()
		for _, k := range keys {
			store.(*cachedStore).bcache.remove(k, true)
		}
	}
	readSlice := func(id uint64, length int) error {
		p := NewPage(make([]byte, length))
		r := store.NewReader(id, length)
		_, err := r.ReadAt(context.Background(), p, 0)
		return err
	}

	// write to cache
	w := store.NewWriter(1)
	if _, err := w.WriteAt(make([]byte, 1024), 0); err != nil {
		t.Fatalf("write fail: %s", err)
	}
	if err := w.Finish(1024); err != nil {
		t.Fatalf("write fail: %s", err)
	}
	cleanCache()
	if readSlice(1, 1024) == nil {
		t.Fatalf("read slice 1 should fail")
	}

	// write to os
	w = store.NewWriter(2)
	w.SetWriteback(false)
	if _, err := w.WriteAt(make([]byte, 1024), 0); err != nil {
		t.Fatalf("write fail: %s", err)
	}
	if err := w.Finish(1024); err != nil {
		t.Fatalf("write fail: %s", err)
	}
	cleanCache()
	if readSlice(2, 1024) != nil {
		t.Fatalf("check slice 2 should success")
	}
}

func TestStoreDelayed(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.Writeback = true
	conf.UploadDelay = time.Millisecond * 200
	store := NewCachedStore(mem, conf, nil)
	time.Sleep(time.Second) // waiting for cache scanned
	testStore(t, store)
	if err := forgetSlice(store, 10, 1024); err != nil {
		t.Fatalf("forge slice 10 1024: %s", err)
	}
	defer store.Remove(10, 1024)
	time.Sleep(time.Second) // waiting for upload
	if _, err := mem.Head(ctx, "chunks/0/0/10_0_1024"); err != nil {
		t.Fatalf("head object 10_0_1024: %s", err)
	}
}

func TestStoreMultiBuckets(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.HashPrefix = true
	store := NewCachedStore(mem, conf, nil)
	testStore(t, store)
}

func TestFillCache(t *testing.T) {
	mem, _ := object.CreateStorage("mem", "", "", "", "")
	conf := defaultConf
	conf.CacheSize = 10 << 20
	conf.FreeSpace = 0.01
	_ = os.RemoveAll(conf.CacheDir)
	store := NewCachedStore(mem, conf, nil)
	if err := forgetSlice(store, 10, 1024); err != nil {
		t.Fatalf("forge slice 10 1024: %s", err)
	}
	defer store.Remove(10, 1024)
	bsize := conf.BlockSize
	if err := forgetSlice(store, 11, bsize); err != nil {
		t.Fatalf("forge slice 11 %d: %s", bsize, err)
	}
	defer store.Remove(11, bsize)

	time.Sleep(time.Millisecond * 100) // waiting for flush
	bcache := store.(*cachedStore).bcache
	if cnt, used := bcache.stats(); cnt != 1 || used != 1024+4096 { // only chunk 10 cached
		t.Fatalf("cache cnt %d used %d, expect cnt 1 used 5120", cnt, used)
	}
	if err := store.FillCache(10, 1024); err != nil {
		t.Fatalf("fill cache 10 1024: %s", err)
	}
	if err := store.FillCache(11, uint32(bsize)); err != nil {
		t.Fatalf("fill cache 11 %d: %s", bsize, err)
	}
	time.Sleep(time.Second)
	expect := int64(1024 + 4096 + bsize + 4096)
	if cnt, used := bcache.stats(); cnt != 2 || used != expect {
		t.Fatalf("cache cnt %d used %d, expect cnt 2 used %d", cnt, used, expect)
	}

	var missBytes uint64
	handler := func(exists bool, loc string, size int) {
		if !exists {
			missBytes += uint64(size)
		}
	}
	// check
	err := store.CheckCache(10, 1024, handler)
	assert.Nil(t, err)
	assert.Equal(t, uint64(0), missBytes)

	missBytes = 0
	err = store.CheckCache(11, uint32(bsize), handler)
	assert.Nil(t, err)
	assert.Equal(t, uint64(0), missBytes)

	// evict slice 11
	err = store.EvictCache(11, uint32(bsize))
	assert.Nil(t, err)

	// stat
	if cnt, used := bcache.stats(); cnt != 1 || used != 1024+4096 { // only chunk 10 cached
		t.Fatalf("cache cnt %d used %d, expect cnt 1 used 5120", cnt, used)
	}

	// check again
	missBytes = 0
	err = store.CheckCache(11, uint32(bsize), handler)
	assert.Nil(t, err)
	assert.Equal(t, uint64(bsize), missBytes)
}

func BenchmarkCachedRead(b *testing.B) {
	blob, _ := object.CreateStorage("mem", "", "", "", "")
	config := defaultConf
	config.BlockSize = 4 << 20
	store := NewCachedStore(blob, config, nil)
	w := store.NewWriter(1)
	if _, err := w.WriteAt(make([]byte, 1024), 0); err != nil {
		b.Fatalf("write fail: %s", err)
	}
	if err := w.Finish(1024); err != nil {
		b.Fatalf("write fail: %s", err)
	}
	time.Sleep(time.Millisecond * 100)
	p := NewPage(make([]byte, 1024))
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := store.NewReader(1, 1024)
		if n, err := r.ReadAt(context.Background(), p, 0); err != nil || n != 1024 {
			b.FailNow()
		}
	}
}

func BenchmarkUncachedRead(b *testing.B) {
	blob, _ := object.CreateStorage("mem", "", "", "", "")
	config := defaultConf
	config.BlockSize = 4 << 20
	config.CacheSize = 0
	store := NewCachedStore(blob, config, nil)
	w := store.NewWriter(2)
	if _, err := w.WriteAt(make([]byte, 1024), 0); err != nil {
		b.Fatalf("write fail: %s", err)
	}
	if err := w.Finish(1024); err != nil {
		b.Fatalf("write fail: %s", err)
	}
	p := NewPage(make([]byte, 1024))
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := store.NewReader(2, 1024)
		if n, err := r.ReadAt(context.Background(), p, 0); err != nil || n != 1024 {
			b.FailNow()
		}
	}
}

type dStore struct {
	object.ObjectStorage
	cnt int32
}

func (s *dStore) Get(ctx context.Context, key string, off, limit int64, getters ...object.AttrGetter) (io.ReadCloser, error) {
	atomic.AddInt32(&s.cnt, 1)
	return nil, errors.New("not found")
}

func TestStoreRetry(t *testing.T) {
	s := &dStore{}
	cs := NewCachedStore(s, defaultConf, nil)
	p := NewPage(nil)
	defer p.Release()
	cs.(*cachedStore).load(context.TODO(), "non", p, false, false) // wont retry
	require.Equal(t, int32(1), s.cnt)
}


================================================
FILE: pkg/chunk/chunk.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"context"
	"io"
)

type Reader interface {
	ReadAt(ctx context.Context, p *Page, off int) (int, error)
}

type Writer interface {
	io.WriterAt
	ID() uint64
	SetID(id uint64)
	SetWriteback(enabled bool)
	FlushTo(offset int) error
	Finish(length int) error
	Abort()
}

type ChunkStore interface {
	NewReader(id uint64, length int) Reader
	NewWriter(id uint64) Writer
	Remove(id uint64, length int) error
	FillCache(id uint64, length uint32) error
	EvictCache(id uint64, length uint32) error
	CheckCache(id uint64, length uint32, handler func(exists bool, loc string, size int)) error
	UsedMemory() int64
	UpdateLimit(upload, download int64)
}


================================================
FILE: pkg/chunk/disk_cache.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"errors"
	"fmt"
	"hash/crc32"
	"hash/fnv"
	"io"
	"io/fs"
	"math"
	"os"
	"path/filepath"
	"reflect"
	"regexp"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/charlievieth/fastwalk"
	"github.com/davies/groupcache/consistenthash"
	"github.com/dustin/go-humanize"
	"github.com/google/uuid"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/twmb/murmur3"
)

var (
	stagingDir          = "rawstaging"
	cacheDir            = "raw"
	maxIODur            = time.Second * 30
	stagingBlocks       atomic.Int64
	errNotCached        = errors.New("not cached")
	errStageFull        = errors.New("space not enough on device")
	errStageConcurrency = errors.New("concurrent staging limit reached")
)

type cacheKey struct {
	id   uint64
	indx uint32
	size uint32
}

func (k cacheKey) String() string { return fmt.Sprintf("%d_%d_%d", k.id, k.indx, k.size) }

type pendingFile struct {
	key       string
	page      *Page
	dropCache bool
}

type cacheStore struct {
	id         string
	totalPages int64
	sync.Mutex
	dir           string
	mode          os.FileMode
	maxStageWrite int
	capacity      int64
	maxItems      int64
	freeRatio     float32
	hashPrefix    bool
	scanInterval  time.Duration
	cacheExpire   time.Duration
	pending       chan pendingFile
	pages         map[string]*Page
	m             *cacheManagerMetrics

	used      int64
	keys      KeyIndex
	scanned   bool
	stageFull bool
	rawFull   bool
	checksum  string // checksum level
	uploader  func(key, path string, force bool) bool

	opTs map[time.Duration]func() error
	opMu sync.Mutex

	state     dcState
	stateLock sync.Mutex

	// newBlockCooldown reduces the initial access time for newly cached staged blocks.
	// This helps prevent a surge of writes from evicting active read blocks.
	stagedBlockCooldown time.Duration
}

func newCacheStore(m *cacheManagerMetrics, dir string, cacheSize, maxItems int64, pendingPages int, config *Config, uploader func(key, path string, force bool) bool) *cacheStore {
	if config.CacheMode == 0 {
		config.CacheMode = 0600 // only owner can read/write cache
	}
	if config.FreeSpace == 0.0 {
		config.FreeSpace = 0.1 // 10%
	}
	keyIndex, err := NewKeyIndex(config)
	if err != nil {
		logger.Warnf("%s, fallback to %s", err, Eviction2Random)
		config.CacheEviction = Eviction2Random
		keyIndex, _ = NewKeyIndex(config)
	}
	c := &cacheStore{
		m:                   m,
		dir:                 dir,
		mode:                config.CacheMode,
		capacity:            cacheSize,
		maxItems:            maxItems,
		maxStageWrite:       config.MaxStageWrite,
		freeRatio:           config.FreeSpace,
		checksum:            config.CacheChecksum,
		hashPrefix:          config.HashPrefix,
		scanInterval:        config.CacheScanInterval,
		cacheExpire:         config.CacheExpire,
		keys:                keyIndex,
		pending:             make(chan pendingFile, pendingPages),
		pages:               make(map[string]*Page),
		uploader:            uploader,
		opTs:                make(map[time.Duration]func() error),
		stagedBlockCooldown: config.CacheExpire / 2,
	}
	c.stateLock = sync.Mutex{}
	if config.Writeback {
		c.state = newDCState(dcUnchanged, c)
	} else {
		c.state = newDCState(dcNormal, c)
	}

	c.createDir(c.dir)
	usage := c.curFreeRatio()
	if usage.br < c.freeRatio || usage.fr < c.freeRatio {
		logger.Warnf("not enough space (%d%%) or inodes (%d%%) for caching in %s: free ratio should be >= %d%%", int(usage.br*100), int(usage.fr*100), c.dir, int(c.freeRatio*100))
	}
	logger.Infof("Disk cache (%s): used ratio - [space %s%%, inode %s%%]",
		c.dir, humanize.FtoaWithDigits(float64((1-usage.br)*100), 1), humanize.FtoaWithDigits(float64((1-usage.fr)*100), 1))

	c.setLimitByFreeRatio(usage, c.freeRatio)

	c.createLockFile()
	go c.checkLockFile()
	go c.flush()
	go c.checkFreeSpace()
	if c.cacheExpire > 0 {
		go c.cleanupExpire()
	}
	go c.refreshCacheKeys()
	go c.scanStaging()
	go c.checkTimeout()
	return c
}

func (cache *cacheStore) setLimitByFreeRatio(usage DiskFreeRatio, freeRatio float32) {
	sizeLimit := int64(float64(1-freeRatio) * float64(usage.spaceCap))
	if sizeLimit < cache.capacity {
		limit := cache.capacity
		cache.capacity = sizeLimit
		logger.Infof("Adjusted cache capacity based on freeratio: from %d to %d bytes", limit, cache.capacity)
	}
	if usage.inodeCap <= 0 {
		return
	}
	inodeLimit := int64(float64(1-freeRatio) * float64(usage.inodeCap))
	if inodeLimit < cache.maxItems || cache.maxItems == 0 {
		limit := cache.maxItems
		cache.maxItems = inodeLimit

		maxItems := "unlimited"
		if cache.maxItems != 0 {
			maxItems = strconv.FormatInt(cache.maxItems, 10)
		}
		logger.Infof("Adjusted max items based on freeratio: from %d to %s items", limit, maxItems)
	}
}

func (cache *cacheStore) lockFilePath() string {
	return filepath.Join(cache.dir, ".lock")
}

func (cache *cacheStore) createLockFile() {
	lockfile := cache.lockFilePath()
	err := cache.checkErr(func() error {
		f, err := os.OpenFile(lockfile, os.O_CREATE|os.O_RDWR, 0666)
		if err != nil {
			return fmt.Errorf("open lock file %s: %w", lockfile, err)
		}
		defer f.Close()
		rawId, err := io.ReadAll(f)
		if err != nil {
			return fmt.Errorf("read lock file %s: %w", lockfile, err)
		}
		if len(rawId) > 0 {
			cache.id = string(rawId)
		} else {
			cache.id = uuid.New().String()
			_, err = f.Write([]byte(cache.id))
			if err != nil {
				return fmt.Errorf("write lock file %s: %w", lockfile, err)
			}
		}
		return nil
	})
	if err != nil {
		logger.Warnf("create lock file %s: %s", lockfile, err)
	}
}

func (cache *cacheStore) checkLockFile() {
	lockfile := cache.lockFilePath()
	for cache.available() {
		time.Sleep(time.Second * 10)
		if err := cache.statFile(lockfile); err != nil && os.IsNotExist(err) {
			logger.Infof("lockfile %s is lost, cache device maybe broken", lockfile)
			if inRootVolume(cache.dir) && cache.freeRatio < 0.2 {
				logger.Infof("cache directory %s is in root volume, keep 20%% space free", cache.dir)
				cache.freeRatio = 0.2
			}
		}
	}
}

func (c *cacheStore) available() bool {
	return c.state.state() != dcDown
}

func (c *cacheStore) enabled() bool {
	return c.capacity > 0
}

func (c *cacheStore) full() bool {
	return c.used > c.capacity || (c.maxItems != 0 && int64(c.keys.len()) > c.maxItems)
}

func (cache *cacheStore) checkErr(f func() error) error {
	if !cache.available() {
		return errCacheDown
	}
	cache.state.beforeCacheOp()
	defer cache.state.afterCacheOp()
	if err := cache.state.checkCacheOp(); err != nil {
		return err
	}

	start := utils.Clock()
	cache.opMu.Lock()
	cache.opTs[start] = f
	cache.opMu.Unlock()
	err := f()
	cache.opMu.Lock()
	delete(cache.opTs, start)
	cache.opMu.Unlock()

	if err != nil {
		if errors.Is(err, syscall.EIO) || errors.Is(err, utils.ErrFuncTimeout) {
			logger.Errorf("cache store is unavailable: %s", err)
			cache.state.onIOErr()
		}
	} else {
		cache.state.onIOSucc()
	}
	return err
}

func getFunctionName(f interface{}) string {
	return runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name()
}

func (c *cacheStore) checkTimeout() {
	for c.available() {
		now := utils.Clock()
		cutOff := now - maxIODur
		c.opMu.Lock()
		for ts := range c.opTs {
			if ts < cutOff {
				logger.Warnf("IO operation %s on %s is timeout after %s, ", getFunctionName(c.opTs[ts]), c.dir, now-ts)
				c.state.onIOErr()
				delete(c.opTs, ts)
			}
		}
		c.opMu.Unlock()
		time.Sleep(time.Second)
	}
}

func (c *cacheStore) statFile(path string) error {
	return c.checkErr(func() error {
		_, err := os.Stat(path)
		return err
	})
}

func (cache *cacheStore) removeFile(path string) error {
	return cache.checkErr(func() error {
		return os.Remove(path)
	})
}

func (cache *cacheStore) renameFile(oldpath, newpath string) error {
	return cache.checkErr(func() error {
		return os.Rename(oldpath, newpath)
	})
}

func (cache *cacheStore) writeFile(f *os.File, data []byte) error {
	return cache.checkErr(func() error {
		_, err := f.Write(data)
		return err
	})
}

func (cache *cacheStore) closeFile(f *os.File) error {
	return cache.checkErr(func() error {
		return f.Close()
	})
}

func (cache *cacheStore) usedMemory() int64 {
	return atomic.LoadInt64(&cache.totalPages)
}

func (cache *cacheStore) stats() (int64, int64) {
	cache.Lock()
	defer cache.Unlock()
	return int64(len(cache.pages) + cache.keys.len()), cache.used + cache.usedMemory()
}

func (cache *cacheStore) checkFreeSpace() {
	for cache.available() {
		usage := cache.curFreeRatio()
		cache.stageFull = usage.br < cache.freeRatio/2 || (usage.inodeCap > 0 && usage.fr < cache.freeRatio/2)
		cache.rawFull = usage.br < cache.freeRatio || (usage.inodeCap > 0 && usage.fr < cache.freeRatio)
		if cache.rawFull && cache.keys.name() != EvictionNone {
			logger.Tracef("Cleanup cache when check free space (%s): free ratio (%d%%), space usage (%d%%), inodes usage (%d%%)", cache.dir, int(cache.freeRatio*100), int(usage.br*100), int(usage.fr*100))
			cache.Lock()
			cache.cleanupFull()
			cache.Unlock()
			usage = cache.curFreeRatio()
			cache.rawFull = usage.br < cache.freeRatio || (usage.inodeCap > 0 && usage.fr < cache.freeRatio)
		}
		if cache.rawFull {
			cache.uploadStaging()
		}
		time.Sleep(time.Second)
	}
	logger.Infof("stop checkFreeSpace at %s", cache.dir)
}

func (cache *cacheStore) cleanupExpire() {
	var todel []cacheKey
	var interval = time.Minute
	if cache.cacheExpire < time.Minute {
		interval = cache.cacheExpire
	}
	for {
		var freed int64
		var cnt, deleted int
		var cutoff = uint32(time.Now().Unix()) - uint32(cache.cacheExpire/time.Second)
		cache.Lock()
		for k, v := range cache.keys.randomIter() {
			cnt++
			if cnt > 1e3 {
				break
			}
			if v.size < 0 {
				continue // staging
			}
			if v.atime < cutoff {
				if cache.keys.remove(k, false) != nil {
					deleted++
					freed += int64(v.size + 4096)
					cache.used -= int64(v.size + 4096)
					todel = append(todel, k)
					cache.m.cacheEvicts.Add(1)
				}
			}
		}
		if len(todel) > 0 {
			logger.Debugf("cleanup expired cache (%s): %d blocks (%s), expired %d blocks (%s)", cache.dir, cache.keys.len(), humanize.IBytes(uint64(cache.used)), len(todel), humanize.IBytes(uint64(freed)))
		}
		cache.Unlock()
		for _, k := range todel {
			if !cache.available() {
				break
			}
			_ = cache.removeFile(cache.cachePath(cache.getPathFromKey(k)))
		}
		todel = todel[:0]
		time.Sleep(interval / 1000 * time.Duration((cnt+1-deleted)*1000/(cnt+1)))
	}
}

func (cache *cacheStore) refreshCacheKeys() {
	if cache.scanInterval < 0 {
		return
	}
	cache.scanCached()
	if cache.scanInterval > 0 {
		for {
			time.Sleep(cache.scanInterval)
			cache.scanCached()
		}
	}
}

func (cache *cacheStore) removeStage(key string) error {
	var err error
	if err = cache.removeFile(cache.stagePath(key)); err == nil {
		cache.m.stageBlocks.Sub(1)
		cache.m.stageBlockBytes.Sub(float64(parseObjOrigSize(key)))
	}
	// ignore ENOENT error
	if err != nil && os.IsNotExist(err) {
		return nil
	}
	return err
}

func (cache *cacheStore) cache(key string, p *Page, force, dropCache bool) {
	if !cache.enabled() {
		return
	}
	if cache.rawFull && cache.keys.name() == EvictionNone {
		logger.Debugf("Caching directory is full (%s), drop %s (%d bytes)", cache.dir, key, len(p.Data))
		cache.m.cacheDrops.Add(1)
		return
	}
	cache.Lock()
	defer cache.Unlock()
	if _, ok := cache.pages[key]; ok {
		return
	}
	k := cache.getCacheKey(key)
	if cache.keys.get(k) != nil {
		return
	}
	p.Acquire()
	cache.pages[key] = p
	atomic.AddInt64(&cache.totalPages, int64(cap(p.Data)))
	select {
	case cache.pending <- pendingFile{key, p, dropCache}:
	default:
		if force {
			cache.Unlock()
			cache.pending <- pendingFile{key, p, dropCache}
			cache.Lock()
		} else {
			// does not have enough bandwidth to write it into disk, discard it
			logger.Debugf("Caching queue is full (%s), drop %s (%d bytes)", cache.dir, key, len(p.Data))
			cache.m.cacheDrops.Add(1)
			delete(cache.pages, key)
			atomic.AddInt64(&cache.totalPages, -int64(cap(p.Data)))
			p.Release()
		}
	}
}

type DiskFreeRatio struct {
	br       float32
	fr       float32
	spaceCap uint64
	inodeCap uint64
}

// caller should not hold cache lock
func (cache *cacheStore) curFreeRatio() DiskFreeRatio {
	var total, free, files, ffree uint64
	_ = cache.checkErr(func() error {
		total, free, files, ffree = getDiskUsage(cache.dir)
		return nil
	})
	usage := DiskFreeRatio{
		spaceCap: total,
		inodeCap: files,
	}
	if total != 0 {
		usage.br = float32(free) / float32(total)
	}
	if files != 0 {
		usage.fr = float32(ffree) / float32(files)
	}
	return usage
}

func (cache *cacheStore) flushPage(path string, data []byte, dropCache bool) (err error) {
	if !cache.available() {
		return errCacheDown
	}

	start := time.Now()
	cache.m.cacheWrites.Add(1)
	cache.m.cacheWriteBytes.Add(float64(len(data)))
	defer func() {
		cache.m.cacheWriteHist.Observe(time.Since(start).Seconds())
	}()
	cache.createDir(filepath.Dir(path))
	tmp := path + ".tmp"

	var f *os.File
	err = cache.checkErr(func() error {
		f, err = os.OpenFile(tmp, os.O_WRONLY|os.O_CREATE, cache.mode)
		return err
	})
	if err != nil {
		logger.Warnf("Can't create cache file %s: %s", tmp, err)
		return err
	}

	defer func() {
		if err != nil {
			_ = cache.removeFile(tmp)
		}
	}()

	if err = cache.writeFile(f, data); err != nil {
		logger.Warnf("Write to cache file %s failed: %s", tmp, err)
		_ = f.Close()
		return
	}
	if cache.checksum != CsNone {
		if err = cache.writeFile(f, checksum(data)); err != nil {
			logger.Warnf("Write checksum to cache file %s failed: %s", tmp, err)
			_ = f.Close()
			return
		}
	}
	if dropCache {
		dropOSCache(f)
	}
	if err = cache.closeFile(f); err != nil {
		logger.Warnf("Close cache file %s failed: %s", tmp, err)
		return
	}
	if err = cache.renameFile(tmp, path); err != nil {
		logger.Warnf("Rename cache file %s -> %s failed: %s", tmp, path, err)
	}
	return
}

func (cache *cacheStore) createDir(dir string) {
	// who can read the cache, should be able to access the directories and add new file.
	_ = cache.checkErr(func() error {
		readmode := cache.mode & 0444
		mode := cache.mode | (readmode >> 2) | (readmode >> 1)
		var st os.FileInfo
		var err error
		dir = filepath.Clean(dir) // `CacheManager` appends "/" to dir, remove it so that following `filepath.Dir` returns the parent dir
		if st, err = os.Stat(dir); os.IsNotExist(err) {
			if filepath.Dir(dir) != dir {
				cache.createDir(filepath.Dir(dir))
			}
			_ = os.Mkdir(dir, mode)
			// umask may remove some permissions
			return os.Chmod(dir, mode)
		} else if strings.HasPrefix(dir, cache.dir) && err == nil && st.Mode().Perm() != mode.Perm() { // check permission only
			changeMode(dir, st, mode)
		}
		return err
	})
}

func (cache *cacheStore) getCacheKey(key string) cacheKey {
	p := strings.LastIndexByte(key, '/')
	p++
	var k cacheKey
	l := len(key)
	for p < l {
		if key[p] == '_' {
			p++
			break
		}
		k.id *= 10
		k.id += uint64(key[p] - '0')
		p++
	}
	for p < l {
		if key[p] == '_' {
			p++
			break
		}
		k.indx *= 10
		k.indx += uint32(key[p] - '0')
		p++
	}
	for p < l {
		k.size *= 10
		k.size += uint32(key[p] - '0')
		p++
	}
	return k
}

func (cache *cacheStore) getPathFromKey(k cacheKey) string {
	if cache.hashPrefix {
		return fmt.Sprintf("chunks/%02X/%v/%v_%v_%v", k.id%256, k.id/1000/1000, k.id, k.indx, k.size)
	} else {
		return fmt.Sprintf("chunks/%v/%v/%v_%v_%v", k.id/1000/1000, k.id/1000, k.id, k.indx, k.size)
	}
}

func (cache *cacheStore) remove(key string, staging bool) {
	cache.Lock()
	delete(cache.pages, key)
	path := cache.cachePath(key)
	k := cache.getCacheKey(key)
	if it := cache.keys.remove(k, staging); it != nil {
		if it.size > 0 {
			cache.used -= int64(it.size + 4096)
		}
	} else if cache.scanned || !staging {
		path = "" // not existed or staging block
	}
	cache.Unlock()

	if path != "" {
		if err := cache.removeFile(path); err != nil && !os.IsNotExist(err) {
			logger.Warnf("remove %s failed: %s", path, err)
		}
		if staging {
			if err := cache.removeStage(key); err != nil && !os.IsNotExist(err) {
				logger.Warnf("remove stage %s failed: %s", cache.stagePath(key), err)
			}
		}
	}
}

func (cache *cacheStore) load(key string) (ReadCloser, error) {
	cache.Lock()
	defer cache.Unlock()
	if p, ok := cache.pages[key]; ok {
		return NewPageReader(p), nil
	}
	k := cache.getCacheKey(key)
	if cache.scanned && cache.keys.get(k) == nil {
		return nil, errNotCached
	}
	cache.Unlock()

	var f *cacheFile
	var err error
	err = cache.checkErr(func() error {
		f, err = openCacheFile(cache.cachePath(key), parseObjOrigSize(key), cache.checksum)
		if err != nil && !os.IsNotExist(err) {
			logger.Warnf("Open cache file %s failed: %s", cache.cachePath(key), err)
		}
		return err
	})

	cache.Lock()
	if err != nil {
		if it := cache.keys.remove(k, false); it != nil {
			cache.used -= int64(it.size + 4096)
		}
	}
	return f, err
}

func (cache *cacheStore) exist(key string) (bool, error) {
	cache.Lock()
	defer cache.Unlock()
	if _, ok := cache.pages[key]; ok {
		return true, nil
	}
	k := cache.getCacheKey(key)
	if cache.scanned && cache.keys.get(k) == nil {
		return false, errNotCached
	}
	cache.Unlock()
	var err error
	err = cache.checkErr(func() error {
		_, err = os.Stat(cache.cachePath(key))
		if err != nil && !os.IsNotExist(err) {
			logger.Warnf("Stat %s failed: %s", cache.cachePath(key), err)
		}
		return err
	})

	cache.Lock()
	if err == nil {
		return true, nil
	} else if it := cache.keys.remove(k, false); it != nil {
		cache.used -= int64(it.size + 4096)
	}
	return false, err
}

func (cache *cacheStore) cachePath(key string) string {
	return filepath.Join(cache.dir, cacheDir, key)
}

func (cache *cacheStore) stagePath(key string) string {
	return filepath.Join(cache.dir, stagingDir, key)
}

// flush cached block into disk
func (cache *cacheStore) flush() {
	for {
		w := <-cache.pending
		path := cache.cachePath(w.key)
		if cache.enabled() && cache.flushPage(path, w.page.Data, w.dropCache) == nil {
			cache.add(w.key, int32(len(w.page.Data)), uint32(time.Now().Unix()))
		}
		cache.Lock()
		_, ok := cache.pages[w.key]
		delete(cache.pages, w.key)
		atomic.AddInt64(&cache.totalPages, -int64(cap(w.page.Data)))
		cache.Unlock()
		w.page.Release()
		if !ok {
			cache.remove(w.key, false)
		}
	}
}

func (cache *cacheStore) add(key string, size int32, atime uint32) {
	if size == 0 {
		logger.Warnf("Cache add %s with size 0, atime %d", key, atime) // should not happen
		return
	}
	k := cache.getCacheKey(key)
	cache.Lock()
	defer cache.Unlock()
	iter := cache.keys.get(k)
	if iter == nil {
		iter = &cacheItem{size: size, atime: atime}
	} else {
		if iter.size > 0 {
			cache.used -= int64(iter.size + 4096)
		}
		iter.size = size
	}
	cache.keys.add(k, *iter) // add or update
	if size > 0 {
		cache.used += int64(size + 4096)
	}
	if cache.full() && cache.keys.name() != EvictionNone {
		logger.Debugf("Cleanup cache when add new data (%s): %d blocks (%s)", cache.dir, cache.keys.len(), humanize.IBytes(uint64(cache.used)))
		cache.cleanupFull()
	}
}

func (cache *cacheStore) stage(key string, data []byte) (string, error) {
	stagingPath := cache.stagePath(key)
	if cache.stageFull {
		return stagingPath, errStageFull
	}
	if cache.maxStageWrite != 0 && stagingBlocks.Load() > int64(cache.maxStageWrite) {
		return stagingPath, errStageConcurrency
	}
	stagingBlocks.Add(1)
	defer stagingBlocks.Add(-1)
	err := cache.flushPage(stagingPath, data, false)
	if err == nil {
		cache.m.stageBlocks.Add(1)
		cache.m.stageBlockBytes.Add(float64(len(data)))
		cache.m.stageWriteBytes.Add(float64(len(data)))
		if cache.enabled() {
			path := cache.cachePath(key)
			cache.createDir(filepath.Dir(path))
			if err = os.Link(stagingPath, path); err == nil {
				cache.add(key, -int32(len(data)), uint32(time.Now().Add(-cache.stagedBlockCooldown).Unix()))
			} else {
				logger.Warnf("link %s to %s failed: %s", stagingPath, path, err)
			}
		}
	}
	return stagingPath, err
}

func (cache *cacheStore) uploaded(key string, size int) {
	cache.add(key, int32(size), 0)
}

// locked
func (cache *cacheStore) cleanupFull() {
	if !cache.available() {
		return
	}

	goal := cache.capacity * 95 / 100
	num := int64(cache.keys.len()) * 99 / 100
	if cache.maxItems != 0 && num > cache.maxItems*99/100 {
		num = cache.maxItems * 99 / 100
	}
	cache.Unlock()
	// make sure we have enough free space after cleanup
	usage := cache.curFreeRatio()
	cache.Lock()
	if usage.br < cache.freeRatio {
		toFree := int64(float32(usage.spaceCap) * (cache.freeRatio - usage.br))
		if toFree > cache.used {
			goal = 0
		} else if cache.used-toFree < goal {
			goal = (cache.used - toFree) * 95 / 100
		}
	}
	if usage.fr < cache.freeRatio {
		toFree := int(float32(usage.inodeCap) * (cache.freeRatio - usage.fr))
		if toFree > cache.keys.len() {
			num = 0
		} else {
			num = int64(cache.keys.len()-toFree) * 99 / 100
		}
	}
	if int64(cache.keys.len()) <= num && cache.used <= goal {
		return // some other thread has done the cleanup
	}

	var todel []cacheKey
	var freed int64
	var now = uint32(time.Now().Unix())

	for k, item := range cache.keys.evictionIter() {
		freed += int64(item.size + 4096)
		cache.used -= int64(item.size + 4096)
		todel = append(todel, k)

		logger.Debugf("remove %s from cache, age: %ds", k, now-item.atime)
		cache.m.cacheEvicts.Add(1)

		if int64(cache.keys.len()) <= num && cache.used <= goal {
			break
		}
	}
	if len(todel) > 0 {
		logger.Debugf("cleanup cache (%s) using %s eviction: %d blocks (%s), freed %d blocks (%s)", cache.dir, cache.keys.name(), cache.keys.len(), humanize.IBytes(uint64(cache.used)), len(todel), humanize.IBytes(uint64(freed)))
	}
	cache.Unlock()
	for _, k := range todel {
		if !cache.available() {
			break
		}
		_ = cache.removeFile(cache.cachePath(cache.getPathFromKey(k)))
	}
	cache.Lock()
}

func (cache *cacheStore) uploadStaging() {
	if !cache.scanned || cache.uploader == nil {
		return
	}
	var toFree int64
	usage := cache.curFreeRatio()
	if usage.br < cache.freeRatio || usage.fr < cache.freeRatio {
		toFree = int64(float64(usage.spaceCap)*float64(cache.freeRatio) - math.Min(float64(usage.br), float64(usage.fr)))
	}
	cache.Lock()
	defer cache.Unlock()
	var cnt int
	var lastK cacheKey
	var lastValue cacheItem
	// for each two random keys, then compare the access time, upload the older one
	for k, value := range cache.keys.randomIter() {
		if value.size > 0 {
			continue // read cache
		}

		// pick the bigger one if they were accessed within the same minute
		if cnt == 0 || lastValue.atime/60 > value.atime/60 ||
			lastValue.atime/60 == value.atime/60 && lastValue.size > value.size { // both size are < 0
			lastK = k
			lastValue = value
		}
		cnt++
		if cnt > 1 {
			cache.Unlock()
			key := cache.getPathFromKey(lastK)
			if !cache.uploader(key, cache.stagePath(key), true) {
				logger.Warnf("Upload list is too full")
				cache.Lock()
				return
			}
			logger.Debugf("upload %s, age: %d", key, uint32(time.Now().Unix())-lastValue.atime)
			cache.Lock()
			// the size in keys should be updated
			toFree -= int64(-lastValue.size + 4096)
			cnt = 0
		}

		if toFree < 0 {
			break
		}
	}
	if cnt > 0 {
		cache.Unlock()
		key := cache.getPathFromKey(lastK)
		if cache.uploader(key, cache.stagePath(key), true) {
			logger.Debugf("upload %s, age: %d", key, uint32(time.Now().Unix())-lastValue.atime)
		}
		cache.Lock()
	}
}

func (cache *cacheStore) scanCached() {
	cache.Lock()
	cache.used = 0
	// atime in memory is more accurate than on disk, inherit it for the next round
	lastSnap := cache.keys.reset()
	cache.scanned = false
	cache.Unlock()

	var start = time.Now()
	var oneMinAgo = start.Add(-time.Minute)

	cachePrefix := filepath.Join(cache.dir, cacheDir)
	logger.Debugf("Scan %s to find cached blocks", cachePrefix)
	_ = fastwalk.Walk(nil, cachePrefix, func(path string, d fs.DirEntry, err error) error {
		// this func should be concurrent safe
		if err != nil {
			return nil
		}
		fi, _ := d.Info()
		if fi != nil {
			if fi.IsDir() || strings.HasSuffix(path, ".tmp") {
				if fi.ModTime().Before(oneMinAgo) {
					// try to remove empty directory
					if cache.removeFile(path) == nil {
						logger.Debugf("Remove empty directory: %s", path)
					}
				}
			} else {
				key := path[len(cachePrefix)+1:]
				if runtime.GOOS == "windows" {
					key = strings.ReplaceAll(key, "\\", "/")
				}
				atime := uint32(getAtime(fi).Unix())
				if lastAtime := lastSnap.peekAtime(cache.getCacheKey(key)); lastAtime > atime {
					atime = lastAtime
				}
				size := parseObjOrigSize(key) // track logical size
				if size == 0 {
					logger.Warnf("Ignore file with unknown size: %s", path)
					return nil
				}
				if getNlink(fi) > 1 {
					cache.add(key, -int32(size), atime)
				} else {
					cache.add(key, int32(size), atime)
				}
			}
		}
		return nil
	})
	cache.Lock()
	cache.scanned = true
	logger.Debugf("Found %s cached blocks (%s) in %s with %s", humanize.Comma(int64(cache.keys.len())), humanize.IBytes(uint64(cache.used)), cache.dir, time.Since(start))
	cache.Unlock()
}

var pathReg, _ = regexp.Compile(`^chunks/((\d+)|([0-9a-fA-F]{2}))/\d+/\d+_\d+_\d+$`)

func (cache *cacheStore) scanStaging() {
	if cache.uploader == nil {
		return
	}

	var start = time.Now()
	var oneMinAgo = start.Add(-time.Minute)
	var count, usage uint64
	stagingPrefix := filepath.Join(cache.dir, stagingDir)
	logger.Debugf("Scan %s to find staging blocks", stagingPrefix)
	_ = fastwalk.Walk(nil, stagingPrefix, func(path string, d fs.DirEntry, err error) error {
		// this func should be concurrent safe
		if err != nil {
			return nil // ignore it
		}
		if d.IsDir() || strings.HasSuffix(path, ".tmp") {
			fi, err := d.Info()
			if err != nil {
				return nil
			}
			if fi.ModTime().Before(oneMinAgo) {
				// try to remove empty directory
				if cache.removeFile(path) == nil {
					logger.Debugf("Remove empty directory: %s", path)
				}
			}
		} else {
			key := path[len(stagingPrefix)+1:]
			if runtime.GOOS == "windows" {
				key = strings.ReplaceAll(key, "\\", "/")
			}
			if !pathReg.MatchString(key) {
				logger.Warnf("Ignore invalid file in staging: %s", path)
				return nil
			}
			origSize := parseObjOrigSize(key)
			if origSize == 0 {
				logger.Warnf("Ignore file with zero size: %s", path)
				return nil
			}
			logger.Debugf("Found staging block: %s", path)
			cache.m.stageBlocks.Add(1)
			cache.m.stageBlockBytes.Add(float64(origSize))
			cache.uploader(key, path, false)
			atomic.AddUint64(&count, 1)
			atomic.AddUint64(&usage, uint64(origSize))
		}
		return nil
	})
	if count > 0 {
		logger.Infof("Found %d staging blocks (%s) in %s with %s", count, humanize.IBytes(usage), cache.dir, time.Since(start))
	}
}

type cacheManager struct {
	sync.Mutex
	consistentMap *consistenthash.Map
	storeMap      map[string]*cacheStore
	stores        []*cacheStore
	metrics       *cacheManagerMetrics
}

func legacyKeyHash(s string) uint32 {
	hash := fnv.New32()
	_, _ = hash.Write([]byte(s))
	return hash.Sum32()
}

// hasMeta reports whether path contains any of the magic characters
// recognized by Match.
func hasMeta(path string) bool {
	magicChars := `*?[`
	if runtime.GOOS != "windows" {
		magicChars = `*?[\`
	}
	return strings.ContainsAny(path, magicChars)
}

var osPathSeparator = string([]byte{os.PathSeparator})

func expandDir(pattern string) []string {
	pattern = strings.TrimRight(pattern, "/")
	if runtime.GOOS == "windows" {
		pattern = strings.TrimRight(pattern, osPathSeparator)
	}
	if pattern == "" {
		return []string{"/"}
	}
	if !hasMeta(pattern) {
		return []string{pattern}
	}
	dir, f := filepath.Split(pattern)
	if hasMeta(f) {
		matched, err := filepath.Glob(pattern)
		if err != nil {
			logger.Errorf("glob %s: %s", pattern, err)
			return []string{pattern}
		}
		return matched
	}
	var rs []string
	for _, p := range expandDir(dir) {
		rs = append(rs, filepath.Join(p, f))
	}
	return rs
}

type CacheManager interface {
	cache(key string, p *Page, force, dropCache bool)
	remove(key string, staging bool)
	load(key string) (ReadCloser, error)
	exist(key string) (string, bool)
	uploaded(key string, size int)
	stage(key string, data []byte) (string, error)
	removeStage(key string) error
	stats() (int64, int64)
	usedMemory() int64
	isEmpty() bool
	getMetrics() *cacheManagerMetrics
}

func newCacheManager(config *Config, reg prometheus.Registerer, uploader func(key, path string, force bool) bool) CacheManager {
	getEnvs()
	metrics := newCacheManagerMetrics(reg)
	if config.CacheDir == "memory" || !config.CacheEnabled() {
		return newMemStore(config, metrics)
	}
	var dirs []string
	for _, d := range utils.SplitDir(config.CacheDir) {
		dd := expandDir(d)
		if config.AutoCreate {
			dirs = append(dirs, dd...)
		} else {
			for _, d := range dd {
				if fi, err := os.Stat(d); err == nil && fi.IsDir() {
					dirs = append(dirs, d)
				}
			}
		}
	}
	if len(dirs) == 0 {
		config.CacheSize = 100 << 20
		logger.Warnf("No cache dir existed, use memory cache instead, cache size: 100 MiB")
		return newMemStore(config, metrics)
	}
	sort.Strings(dirs)
	dirCacheSize := int64(config.CacheSize) / int64(len(dirs))
	dirCacheItems := config.CacheItems / int64(len(dirs))
	m := &cacheManager{
		consistentMap: consistenthash.New(100, murmur3.Sum32),
		storeMap:      make(map[string]*cacheStore, len(dirs)),
		stores:        make([]*cacheStore, len(dirs)),
		metrics:       metrics,
	}

	// 20% of buffer could be used for pending pages
	pendingPages := int(config.BufferSize) * 2 / 10 / config.BlockSize / len(dirs)
	for i, d := range dirs {
		store := newCacheStore(metrics, strings.TrimSpace(d)+string(filepath.Separator), dirCacheSize, dirCacheItems, pendingPages, config, uploader)
		m.stores[i] = store
		m.storeMap[store.id] = store
		m.consistentMap.Add(store.id)
	}
	go m.cleanup()
	return m
}

func (m *cacheManager) getMetrics() *cacheManagerMetrics {
	return m.metrics
}

func (m *cacheManager) cleanup() {
	for !m.isEmpty() {
		var ids []string
		m.Lock()
		for id, s := range m.storeMap {
			if s == nil || !s.available() {
				ids = append(ids, id)
			}
		}
		m.Unlock()
		for _, id := range ids {
			m.removeStore(id)
		}
		time.Sleep(time.Second)
	}
}

func (m *cacheManager) isEmpty() bool {
	return m.length() == 0
}

func (m *cacheManager) length() int {
	m.Lock()
	defer m.Unlock()
	return len(m.storeMap)
}

func (m *cacheManager) removeStore(id string) {
	m.Lock()
	m.consistentMap.Remove(id)
	var dir string
	if s := m.storeMap[id]; s != nil {
		dir = s.dir
	}
	delete(m.storeMap, id)
	for i, c := range m.stores {
		if c != nil && c.id == id {
			m.stores[i] = nil
		}
	}
	m.Unlock()
	logger.Errorf("cache dir `%s`(%s) is unavailable, removed", dir, id)
}

func (m *cacheManager) getStore(key string) *cacheStore {
	for {
		m.Lock()
		id := m.consistentMap.Get(key)
		s := m.storeMap[id]
		m.Unlock()
		if s == nil || s.available() {
			return s
		}
		m.removeStore(id)
	}
}

func (m *cacheManager) removeStage(key string) error {
	if s := m.getStore(key); s == nil {
		return errCacheDown
	} else {
		return s.removeStage(key)
	}
}

// Deprecated: use getStore instead
func (m *cacheManager) getStoreLegacy(key string) *cacheStore {
	return m.stores[legacyKeyHash(key)%uint32(len(m.stores))]
}

func (m *cacheManager) usedMemory() int64 {
	var used int64
	for _, s := range m.stores {
		if s != nil {
			used += s.usedMemory()
		}
	}
	return used
}

func (m *cacheManager) stats() (int64, int64) {
	var cnt, used int64
	for _, s := range m.stores {
		if s != nil {
			c, u := s.stats()
			cnt += c
			used += u
		}
	}
	return cnt, used
}

func (m *cacheManager) cache(key string, p *Page, force, dropCache bool) {
	store := m.getStore(key)
	if store != nil {
		store.cache(key, p, force, dropCache)
	}
}

type ReadCloser interface {
	// io.Reader
	io.ReaderAt
	io.Closer
}

func (m *cacheManager) load(key string) (ReadCloser, error) {
	store := m.getStore(key)
	if store == nil {
		return nil, errors.New("no available cache dir")
	}
	r, err := store.load(key)
	if err == errNotCached {
		legacy := m.getStoreLegacy(key)
		if legacy != store && legacy != nil {
			r, err = legacy.load(key)
		}
	}
	return r, err
}

func (m *cacheManager) exist(key string) (string, bool) {
	store := m.getStore(key)
	if store == nil {
		return "", false
	}
	loc := store.dir
	existed, err := m.getStore(key).exist(key)
	if err == errNotCached {
		legacy := m.getStoreLegacy(key)
		if legacy != store && legacy != nil {
			existed, _ = legacy.exist(key)
			loc = legacy.dir
		}
	}
	return loc, existed
}

func (m *cacheManager) remove(key string, staging bool) {
	store := m.getStore(key)
	if store != nil {
		store.remove(key, staging)
	}
}

func (m *cacheManager) stage(key string, data []byte) (string, error) {
	store := m.getStore(key)
	if store != nil {
		return store.stage(key, data)
	}
	return "", errors.New("no available cache dir")
}

func (m *cacheManager) uploaded(key string, size int) {
	store := m.getStore(key)
	if store != nil {
		store.uploaded(key, size)
	}
}

/* --- Checksum --- */
const (
	CsNone   = "none"
	CsFull   = "full"
	CsShrink = "shrink"
	CsExtend = "extend"

	csBlock = 32 << 10
)

var crc32c = crc32.MakeTable(crc32.Castagnoli)

type cacheFile struct {
	*os.File
	length  int // length of data
	csLevel string
}

// Calculate 32-bits checksum for every 32 KiB data, so 512 Bytes for 4 MiB in total
func checksum(data []byte) []byte {
	length := len(data)
	buf := utils.NewBuffer(uint32((length-1)/csBlock+1) * 4)
	for start, end := 0, 0; start < length; start = end {
		end = start + csBlock
		if end > length {
			end = length
		}
		sum := crc32.Checksum(data[start:end], crc32c)
		buf.Put32(sum)
	}
	return buf.Bytes()
}

func openCacheFile(name string, length int, level string) (*cacheFile, error) {
	fp, err := os.Open(name)
	if err != nil {
		return nil, err
	}
	fi, err := fp.Stat()
	if err != nil {
		_ = fp.Close()
		return nil, err
	}
	checksumLength := ((length-1)/csBlock + 1) * 4
	switch fi.Size() - int64(length) {
	case 0:
		return &cacheFile{fp, length, CsNone}, nil
	case int64(checksumLength):
		return &cacheFile{fp, length, level}, nil
	default:
		_ = fp.Close()
		return nil, fmt.Errorf("invalid file size %d, data length %d", fi.Size(), length)
	}
}

func (cf *cacheFile) ReadAt(b []byte, off int64) (n int, err error) {
	logger.Tracef("CacheFile length %d level %s, readat off %d buffer size %d", cf.length, cf.csLevel, off, len(b))
	defer func() {
		logger.Tracef("CacheFile readat returns n %d err %s", n, err)
	}()
	if cf.csLevel == CsNone || cf.csLevel == CsFull && (off != 0 || len(b) != cf.length) {
		return cf.File.ReadAt(b, off)
	}
	var rb = b     // read buffer
	var roff = off // read offset
	if cf.csLevel == CsExtend {
		roff = off / csBlock * csBlock
		rend := int(off) + len(b)
		if rend%csBlock != 0 {
			rend = (rend/csBlock + 1) * csBlock
			if rend > cf.length {
				rend = cf.length
			}
		}
		if size := rend - int(roff); size != len(b) {
			p := NewOffPage(size)
			rb = p.Data
			defer func() {
				if err == nil {
					n = copy(b, rb[off-roff:])
				} else {
					n = 0
				}
				p.Release()
			}()
		}
	}
	if n, err = cf.File.ReadAt(rb, roff); err != nil {
		return
	}

	ioff := int(roff) / csBlock // index offset
	if cf.csLevel == CsShrink {
		if roff%csBlock != 0 {
			if o := csBlock - int(roff)%csBlock; len(rb) <= o {
				return
			} else {
				rb = rb[o:]
				ioff += 1
			}
		}
		if end := int(roff) + n; end != cf.length && end%csBlock != 0 {
			if len(rb) <= end%csBlock {
				return
			}
			rb = rb[:len(rb)-end%csBlock]
		}
	}
	// now rb contains the data to check
	length := len(rb)
	buf := utils.NewBuffer(uint32((length-1)/csBlock+1) * 4)
	if _, err = cf.File.ReadAt(buf.Bytes(), int64(cf.length+ioff*4)); err != nil {
		logger.Warnf("Read checksum of data length %d checksum offset %d: %s", length, cf.length+ioff*4, err)
		return
	}
	for start, end := 0, 0; start < length; start = end {
		end = start + csBlock
		if end > length {
			end = length
		}
		sum := crc32.Checksum(rb[start:end], crc32c)
		expect := buf.Get32()
		logger.Debugf("Cache file read data start %d end %d checksum %d, expected %d", start, end, sum, expect)
		if sum != expect {
			err = fmt.Errorf("data checksum %d != expect %d", sum, expect)
			break
		}
	}
	return
}


================================================
FILE: pkg/chunk/disk_cache_state.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"errors"
	"fmt"
	"os"
	"strconv"
	"sync/atomic"
	"time"
)

var (
	numIOErrToUnstable         uint32  = 3                // from normal to unstable
	minIOSuccToNormal          uint32  = 60               // from unstable to normal
	maxIOErrPercentageToNormal float64 = 0                // from unstable to normal
	maxDurToDown                       = 30 * time.Minute // from unstable to down
	maxConcurrencyForUnstable  int64   = 10
	tickDurForNormal                   = 1 * time.Minute
	tickDurForUnstable                 = 1 * time.Minute

	probeDur  = 500 * time.Millisecond
	probeDir  = "probe"
	probeData = []byte{1, 2, 3}
	probeBuff = make([]byte, 3)
)

var (
	errCacheDown       = errors.New("cache down")
	errUnstableCoLimit = fmt.Errorf("exceed concurrency %d limit for unstable disk cache", maxConcurrencyForUnstable)
)

var diskStateNames = map[int]string{
	dcUnknown:   "unknown",
	dcNormal:    "normal",
	dcUnstable:  "unstable",
	dcDown:      "down",
	dcUnchanged: "unchanged",
}

const (
	dcUnknown = iota
	dcNormal
	dcUnstable
	dcDown
	dcUnchanged
)

const (
	eventUnknown = iota
	eventToNormal
	eventToUnstable
	eventToDown
)

// dcState disk cache state
type dcState interface {
	init(cs *cacheStore)
	tick()
	stop()
	state() int
	checkCacheOp() error
	beforeCacheOp()
	afterCacheOp()
	onIOErr()
	onIOSucc()
}

type baseDC struct {
	cache  *cacheStore
	stopCh chan struct{}
}

func newDCState(state int, cs *cacheStore) dcState {
	var s dcState
	switch state {
	case dcNormal:
		s = &normalDC{}
	case dcUnstable:
		s = &unstableDC{}
	case dcDown:
		s = &downDC{}
	case dcUnchanged:
		s = &unchangedDC{}
	}
	s.init(cs)
	s.tick()
	return s
}

func (dc *baseDC) init(cs *cacheStore) {
	dc.cache = cs
	dc.stopCh = make(chan struct{})
}

func (dc *baseDC) stop() {
	close(dc.stopCh)
}
func (dc *baseDC) onIOErr()            {}
func (dc *baseDC) onIOSucc()           {}
func (dc *baseDC) state() int          { return dcUnknown }
func (dc *baseDC) tick()               {}
func (dc *baseDC) checkCacheOp() error { return nil }
func (dc *baseDC) beforeCacheOp()      {}
func (dc *baseDC) afterCacheOp()       {}

type unchangedDC struct {
	baseDC
}

func (dc *unchangedDC) state() int { return dcUnchanged }

type normalDC struct {
	baseDC
	ioErrCnt uint32
}

func (dc *normalDC) state() int { return dcNormal }

func (dc *normalDC) init(cs *cacheStore) {
	dc.baseDC.init(cs)
	_ = os.RemoveAll(dc.cache.cachePath(probeDir))
}

func (dc *normalDC) tick() {
	go func() {
		for {
			select {
			case <-dc.stopCh:
				return
			case <-time.After(tickDurForNormal):
				atomic.StoreUint32(&dc.ioErrCnt, 0)
			}
		}
	}()
}

func (dc *normalDC) onIOErr() {
	cnt := atomic.AddUint32(&dc.ioErrCnt, 1)
	if cnt >= uint32(numIOErrToUnstable) {
		dc.cache.event(eventToUnstable)
	}
}

type unstableDC struct {
	baseDC
	startTime time.Time
	ioErrCnt  uint32
	ioCnt     uint32

	concurrency atomic.Int64
}

func (dc *unstableDC) state() int { return dcUnstable }

func (dc *unstableDC) init(cs *cacheStore) {
	dc.baseDC.init(cs)
	dc.startTime = time.Now()
}

func (dc *unstableDC) onIOErr() {
	atomic.AddUint32(&dc.ioCnt, 1)
	atomic.AddUint32(&dc.ioErrCnt, 1)
}

func (dc *unstableDC) onIOSucc() {
	atomic.AddUint32(&dc.ioCnt, 1)
}

func probeCacheKey(id, size int) string {
	return fmt.Sprintf("%s/%02X/%v/%v_%v_%v", probeDir, id%256, id/1000/1000, id, 0, size)
}

func (dc *unstableDC) tick() {
	go dc.probe()
	go func() {
		ticker := time.NewTicker(tickDurForUnstable)
		defer ticker.Stop()

		for {
			select {
			case <-dc.stopCh:
				return
			case <-ticker.C:
				errCnt, ioCnt := atomic.LoadUint32(&dc.ioErrCnt), atomic.LoadUint32(&dc.ioCnt)
				if ioCnt >= minIOSuccToNormal && float64(errCnt)/float64(ioCnt) <= maxIOErrPercentageToNormal {
					dc.cache.event(eventToNormal)
				} else if time.Since(dc.startTime) >= maxDurToDown {
					dc.cache.event(eventToDown)
				} else {
					atomic.StoreUint32(&dc.ioErrCnt, 0)
					atomic.StoreUint32(&dc.ioCnt, 0)
				}
			}
		}
	}()
}

func (dc *unstableDC) probe() {
	page := NewPage(probeData)
	defer page.Release()
	cnt := 0

	for {
		select {
		case <-dc.stopCh:
			return
		default:
			cnt++
			start := time.Now()
			dc.doProbe(probeCacheKey(cnt, len(probeData)), page)
			diff := probeDur - time.Since(start)
			if diff > 0 {
				time.Sleep(diff)
			}
		}
	}
}

func (dc *unstableDC) doProbe(key string, page *Page) {
	dc.cache.cache(key, page, true, false)
	reader, err := dc.cache.load(key)
	if err != nil {
		return
	}
	defer reader.Close()
	_, _ = reader.ReadAt(probeBuff, 0)
	dc.cache.remove(key, false)
}

func (dc *unstableDC) beforeCacheOp() { dc.concurrency.Add(1) }
func (dc *unstableDC) afterCacheOp()  { dc.concurrency.Add(-1) }

func (dc *unstableDC) checkCacheOp() error {
	if dc.concurrency.Load() >= maxConcurrencyForUnstable {
		return errUnstableCoLimit
	}
	return nil
}

type downDC struct {
	baseDC
}

func (dc *downDC) state() int          { return dcDown }
func (dc *downDC) checkCacheOp() error { return errCacheDown }

func (cache *cacheStore) event(eventType int) {
	cache.stateLock.Lock()
	defer cache.stateLock.Unlock()
	state := cache.state.state()
	switch state {
	case dcNormal:
		if eventType == eventToUnstable {
			cache.state.stop()
			cache.state = newDCState(dcUnstable, cache)
		}
	case dcUnstable:
		switch eventType {
		case eventToNormal:
			cache.state.stop()
			cache.state = newDCState(dcNormal, cache)
		case eventToDown:
			cache.state.stop()
			cache.state = newDCState(dcDown, cache)
		}
	}
	logger.Infof("disk cache %s state change from %s to %s", cache.dir, diskStateNames[state], diskStateNames[cache.state.state()])
}

func getEnvs() {
	if os.Getenv("JFS_MAX_IO_DURATION") != "" {
		dur, err := time.ParseDuration(os.Getenv("JFS_MAX_IO_DURATION"))
		if err != nil {
			logger.Errorf("parse JFS_MAX_IO_DURATION error: %v", err)
		} else {
			maxIODur = dur
		}
		logger.Infof("set maxIODur to %v", maxIODur)
	}
	if os.Getenv("JFS_MAX_IO_ERR_PERCENTAGE") != "" {
		percentage, err := strconv.ParseFloat(os.Getenv("JFS_MAX_IO_ERR_PERCENTAGE"), 64)
		if err != nil {
			logger.Errorf("parse JFS_MAX_IO_ERR_PERCENTAGE error: %v", err)
		} else {
			maxIOErrPercentageToNormal = percentage
		}
		logger.Infof("set maxIOErrPercentageToNormal to %f", maxIOErrPercentageToNormal)
	}
	if os.Getenv("JFS_MAX_DURATION_TO_DOWN") != "" {
		dur, err := time.ParseDuration(os.Getenv("JFS_MAX_DURATION_TO_DOWN"))
		if err != nil {
			logger.Errorf("parse JFS_MAX_DURATION_TO_DOWN error: %v", err)
		} else {
			maxDurToDown = dur
		}
		logger.Infof("set maxDurToDown to %v", maxDurToDown)
	}
	if os.Getenv("JFS_MAX_CONCURRENCY_FOR_UNSTABLE") != "" {
		co, err := strconv.ParseInt(os.Getenv("JFS_MAX_CONCURRENCY_FOR_UNSTABLE"), 10, 64)
		if err != nil {
			logger.Errorf("parse JFS_MAX_CONCURRENCY_FOR_UNSTABLE error: %v", err)
		} else {
			maxConcurrencyForUnstable = co
		}
		logger.Infof("set maxConcurrencyForUnstable to %d", maxConcurrencyForUnstable)
	}
}


================================================
FILE: pkg/chunk/disk_cache_state_test.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"fmt"
	"os"
	"strings"
	"sync/atomic"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

func setState(s *cacheStore, state int) {
	s.stateLock.Lock()
	defer s.stateLock.Unlock()
	s.state.stop()
	s.state = newDCState(state, s)
}

func testDiskCacheState(t *testing.T, cacheNum int) {
	oriTickDurForUnstable, oriMinIOSuccToNormal, oriMaxDurToDown := tickDurForUnstable, minIOSuccToNormal, maxDurToDown
	defer func() {
		tickDurForUnstable, minIOSuccToNormal, maxDurToDown = oriTickDurForUnstable, oriMinIOSuccToNormal, oriMaxDurToDown
	}()

	genDirs := func(num int) []string {
		dirs := make([]string, 0, num)
		for i := 0; i < num; i++ {
			dirs = append(dirs, fmt.Sprintf("/tmp/diskCache%d", i))
		}
		return dirs
	}

	conf := defaultConf
	dirs := genDirs(cacheNum)
	conf.CacheDir = strings.Join(dirs, ":")
	conf.AutoCreate = true
	defer func() {
		for _, dir := range dirs {
			_ = os.RemoveAll(dir)
		}
	}()

	manager := newCacheManager(&conf, nil, nil)
	require.False(t, manager.isEmpty())

	m, ok := manager.(*cacheManager)
	require.True(t, ok)
	require.Equal(t, cacheNum, m.length())

	// case: cache
	data := []byte{1, 2, 3}
	page := NewPage(data)
	defer page.Release()
	k1 := probeCacheKey(0, len(data))
	m.cache(k1, page, true, false)
	time.Sleep(time.Second)

	// case: normal -> unstable
	s1 := m.getStore(k1)
	for i := 0; i <= int(numIOErrToUnstable); i++ {
		s1.state.onIOErr()
	}
	require.Equal(t, dcUnstable, s1.state.state())

	// case: probe in unstable
	time.Sleep(time.Second)
	require.GreaterOrEqual(t, atomic.LoadUint32(&s1.state.(*unstableDC).ioCnt), uint32(1))

	// case: unstable concurrency limit
	for i := 0; i < int(maxConcurrencyForUnstable); i++ {
		s1.state.beforeCacheOp()
	}
	_, err := m.load(k1)
	assert.Equal(t, errUnstableCoLimit, err)
	for i := 0; i < int(maxConcurrencyForUnstable); i++ {
		s1.state.afterCacheOp()
	}

	// case: unstable -> normal
	tickDurForUnstable = time.Second
	minIOSuccToNormal = 1
	setState(s1, dcUnstable)
	s1.state.(*unstableDC).doProbe(k1, page)
	time.Sleep(2 * time.Second)
	require.Equal(t, dcNormal, s1.state.state())

	// case: unstable -> down
	tickDurForUnstable = time.Second
	maxDurToDown = 1
	minIOSuccToNormal = 5 * 60
	setState(s1, dcUnstable)
	time.Sleep(2 * time.Second)
	require.Equal(t, dcDown, s1.state.state())
}

func TestDiskCacheState(t *testing.T) {
	testDiskCacheState(t, 1)
	testDiskCacheState(t, 10)
}


================================================
FILE: pkg/chunk/disk_cache_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"fmt"
	"os"
	"path/filepath"
	"sync"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
	dto "github.com/prometheus/client_model/go"
	"github.com/stretchr/testify/require"

	. "github.com/bytedance/mockey"
	. "github.com/smartystreets/goconvey/convey"
)

// Copy from https://github.com/prometheus/client_golang/blob/v1.14.0/prometheus/testutil/testutil.go
func toFloat64(c prometheus.Collector) float64 {
	var (
		m      prometheus.Metric
		mCount int
		mChan  = make(chan prometheus.Metric)
		done   = make(chan struct{})
	)

	go func() {
		for m = range mChan {
			mCount++
		}
		close(done)
	}()

	c.Collect(mChan)
	close(mChan)
	<-done

	if mCount != 1 {
		panic(fmt.Errorf("collected %d metrics instead of exactly 1", mCount))
	}

	pb := &dto.Metric{}
	if err := m.Write(pb); err != nil {
		panic(fmt.Errorf("error happened while collecting metrics: %w", err))
	}
	if pb.Gauge != nil {
		return pb.Gauge.GetValue()
	}
	if pb.Counter != nil {
		return pb.Counter.GetValue()
	}
	if pb.Untyped != nil {
		return pb.Untyped.GetValue()
	}
	panic(fmt.Errorf("collected a non-gauge/counter/untyped metric: %s", pb))
}

func testConf() Config {
	conf := defaultConf
	conf.CacheDir = filepath.Join(conf.CacheDir, fmt.Sprintf("%d", time.Now().UnixNano()))
	return conf
}

func TestNewCacheStore(t *testing.T) {
	conf := testConf()
	defer os.RemoveAll(conf.CacheDir)
	s := newCacheStore(nil, conf.CacheDir, 1<<30, conf.CacheItems, 1, &conf, nil)
	if s == nil {
		t.Fatalf("Create new cache store failed")
	}
}

func TestMetrics(t *testing.T) {
	conf := testConf()
	defer os.RemoveAll(conf.CacheDir)
	m := newCacheManager(&conf, nil, nil)
	metrics := m.(*cacheManager).metrics
	s := m.(*cacheManager).stores[0]
	content := []byte("helloworld")
	p := NewPage(content)
	s.cache("test", p, true, false)
	// Waiting for the cache to be flushed
	time.Sleep(time.Millisecond * 100)
	if toFloat64(metrics.cacheWrites) != 1.0 {
		t.Fatalf("expect the cacheWrites is 1")
	}

	if toFloat64(metrics.cacheWriteBytes) != float64(len(content)) {
		t.Fatalf("expect the cacheWriteBytes is %d", len(content))
	}

	if toFloat64(metrics.stageBlocks) != 0.0 {
		t.Fatalf("expect the stageBlocks is %d", len(content))
	}

	if toFloat64(metrics.stageBlockBytes) != 0.0 {
		t.Fatalf("expect the stageBlockBytes is %d", len(content))
	}
	key := fmt.Sprintf("chunks/0/5/5000_2_%d", len(content))
	stagingPath, err := m.stage(key, content)
	if err != nil {
		t.Fatalf("stage failed: %s", err)
	}
	if toFloat64(metrics.stageBlocks) != 1.0 {
		t.Fatalf("expect the stageBlocks is %d", len(content))
	}

	if toFloat64(metrics.stageBlockBytes) != float64(len(content)) {
		t.Fatalf("expect the stageBlockBytes is %d", len(content))
	}
	err = m.removeStage(key)
	if err != nil {
		t.Fatalf("faild to remove stage")
	}

	if toFloat64(metrics.stageBlocks) != 0.0 {
		t.Fatalf("expect the stageBlocks is %d", len(content))
	}

	if toFloat64(metrics.stageBlockBytes) != 0.0 {
		t.Fatalf("expect the stageBlockBytes is %d", len(content))
	}

	if _, err := os.Stat(stagingPath); err != nil && !os.IsNotExist(err) {
		t.Fatalf("expect the stageingPath %s not exists", stagingPath)
	}
}

func TestScanCached(t *testing.T) {
	var err error
	cfg := defaultConf
	cfg.CacheEviction = EvictionNone
	cache := &cacheStore{
		opTs: make(map[time.Duration]func() error),
	}
	cache.state = newDCState(dcUnchanged, cache)
	cache.keys, err = NewKeyIndex(&cfg)
	require.NoError(t, err)
	cache.dir = "/tmp/jfstest_scan"
	rawDir := filepath.Join(cache.dir, cacheDir)
	if err := os.MkdirAll(rawDir, 0755); err != nil {
		t.Fatalf("mkdir %s: %s", rawDir, err)
	}
	num := 10
	for i := 0; i < num; i++ {
		if f, err := os.Create(filepath.Join(rawDir, fmt.Sprintf("test%d_1024", i))); err == nil {
			_ = f.Close()
		}
	}
	defer os.RemoveAll(rawDir)
	cache.scanCached()
	require.Equal(t, num, cache.keys.len())
}

func TestChecksum(t *testing.T) {
	conf := testConf()
	conf.FreeSpace = 0.01
	conf.CacheEviction = EvictionNone
	defer os.RemoveAll(conf.CacheDir)
	m := new(cacheManagerMetrics)
	m.initMetrics()
	s := newCacheStore(m, conf.CacheDir, 1<<30, conf.CacheItems, 1, &conf, nil)
	k1 := "0_0_10" // no checksum
	k2 := "1_0_10"
	k3 := "2_1_102400"
	k4 := "3_5_102400" // corrupt data
	k5 := "4_8_1048576"

	p := NewPage([]byte("helloworld"))
	defer p.Release()
	s.cache(k1, p, true, false)

	s.checksum = CsFull
	s.cache(k2, p, true, false)

	buf := make([]byte, 102400)
	utils.RandRead(buf)
	s.cache(k3, NewPage(buf), true, false)

	fpath := s.cachePath(k4)
	dir := filepath.Dir(fpath)
	if err := os.MkdirAll(dir, 0755); err != nil {
		t.Fatalf("mkdir parent dir %s: %s", dir, err)
	}
	f, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE, s.mode)
	if err != nil {
		t.Fatalf("Create cache file %s: %s", fpath, err)
	}
	if _, err = f.Write(buf); err != nil {
		_ = f.Close()
		t.Fatalf("Write cache file %s: %s", fpath, err)
	}
	corrupt := make([]byte, 102400)
	copy(corrupt, buf)
	for i := 98304; i < 102400; i++ { // reset 96K ~ 100K
		corrupt[i] = 0
	}
	if _, err = f.Write(checksum(corrupt)); err != nil {
		_ = f.Close()
		t.Fatalf("Write checksum to cache file %s: %s", fpath, err)
	}
	_ = f.Close()
	s.add(k4, 102400, uint32(time.Now().Unix()))

	buf = make([]byte, 1048576)
	utils.RandRead(buf)
	s.cache(k5, NewPage(buf), true, false)
	time.Sleep(time.Second * 5) // wait for cache file flushed

	check := func(key string, off int64, size int) error {
		rc, err := s.load(key)
		if err != nil {
			t.Logf("CacheStore files in %s:", s.dir)
			filepath.Walk(s.dir, func(path string, info os.FileInfo, err error) error {
				if err != nil {
					t.Logf("error accessing %s: %v", path, err)
					return nil
				}
				t.Logf("cache file: %s", path)
				return nil
			})
			t.Fatalf("CacheStore load key %s: %s", key, err)
		}
		defer rc.Close()
		buf := make([]byte, size)
		_, err = rc.ReadAt(buf, off)
		return err
	}
	cases := []struct {
		key    string
		off    int64
		size   int
		expect bool
	}{
		{k1, 0, 10, true},
		{k1, 3, 5, true},
		{k2, 0, 10, true},
		{k2, 3, 5, true},
		{k3, 0, 102400, true},
		{k3, 8192, 92160, true}, // 8K ~ 98K
		{k4, 0, 102400, true},
		{k4, 8192, 92160, true}, // only CsExtend can detect the error
		{k5, 0, 1048576, true},
		{k5, 131072, 131072, true},
		{k5, 102400, 512000, true},
	}
	for _, l := range []string{CsNone, CsFull, CsShrink, CsExtend} {
		s.checksum = l
		if l != CsNone {
			cases[6].expect = false
		}
		if l == CsExtend {
			cases[7].expect = false
		}
		for _, c := range cases {
			if err = check(c.key, c.off, c.size); (err == nil) != c.expect {
				t.Fatalf("CacheStore check level %s case %+v: %s", l, c, err)
			}
		}
	}
}

func TestExpand(t *testing.T) {
	rs := expandDir("/not/exists/jfsCache")
	if len(rs) != 1 || rs[0] != "/not/exists/jfsCache" {
		t.Errorf("expand: %v", rs)
		t.FailNow()
	}

	dir := t.TempDir()
	_ = os.Mkdir(filepath.Join(dir, "aaa1"), 0755)
	_ = os.Mkdir(filepath.Join(dir, "aaa2"), 0755)
	_ = os.Mkdir(filepath.Join(dir, "aaa3"), 0755)
	_ = os.Mkdir(filepath.Join(dir, "aaa3", "jfscache"), 0755)
	_ = os.Mkdir(filepath.Join(dir, "aaa3", "jfscache", "jfs"), 0755)

	rs = expandDir(filepath.Join(dir, "aaa*", "jfscache", "jfs"))
	if len(rs) != 3 || rs[0] != filepath.Join(dir, "aaa1", "jfscache", "jfs") {
		t.Errorf("expand: %v", rs)
		t.FailNow()
	}
}

func BenchmarkLoadCached(b *testing.B) {
	conf := testConf()
	defer os.RemoveAll(conf.CacheDir)
	s := newCacheStore(nil, conf.CacheDir, 1<<30, conf.CacheItems, 1, &conf, nil)
	p := NewPage(make([]byte, 1024))
	key := "/chunks/1_1024"
	s.cache(key, p, false, false)
	time.Sleep(time.Millisecond * 100)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if f, e := s.load(key); e == nil {
			_ = f.Close()
		} else {
			b.FailNow()
		}
	}
}

func BenchmarkLoadUncached(b *testing.B) {
	conf := testConf()
	defer os.RemoveAll(conf.CacheDir)
	s := newCacheStore(nil, conf.CacheDir, 1<<30, conf.CacheItems, 1, &conf, nil)
	key := "chunks/222_1024"
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if f, e := s.load(key); e == nil {
			_ = f.Close()
		}
	}
}

func TestCheckPath(t *testing.T) {
	cases := []struct {
		path     string
		expected bool
	}{
		// unix path style
		{path: "chunks/111/222/3333_3333_3333", expected: true},
		{path: "chunks/111/222/3333_3333_0", expected: true},
		{path: "chunks/0/0/0_0_0", expected: true},
		{path: "chunks/01/10/0_01_0", expected: true},
		{path: "achunks/111/222/3333_3333_3333", expected: false},
		{path: "chunksa/111/222/3333_3333_3333", expected: false},
		{path: "chunksa", expected: false},
		{path: "chunks/111", expected: false},
		{path: "chunks/111/2222", expected: false},
		{path: "chunks/111/2222/3", expected: false},
		{path: "chunks/111/2222/3333_3333", expected: false},
		{path: "chunks/111/2222/3333_3333_3333_4444", expected: false},
		{path: "chunks/111/2222/3333_3333_3333/4444", expected: false},
		{path: "chunks/111_/2222/3333_3333_3333", expected: false},
		{path: "chunks/111/22_22/3333_3333_3333", expected: false},
		{path: "chunks/111/22_22/3333_3333_3333", expected: false},
		{path: "chunks/dd/222/3333_3333_0", expected: true}, // hash prefix
		{path: "chunks/FF/222/3333_3333_0", expected: true}, // hash prefix
		{path: "chunks/5D/222/3333_3333_0", expected: true}, // hash prefix
		{path: "chunks/D1/222/3333_3333_0", expected: true}, // hash prefix
		{path: "chunks/5DD/222/3333_3333_0", expected: false},
		{path: "chunks/111D/222/3333_3333_0", expected: false},
	}
	for _, c := range cases {
		if res := pathReg.MatchString(c.path); res != c.expected {
			t.Fatalf("check path %s expected %v but got %v", c.path, c.expected, res)
		}
	}
}

func shutdownStore(s *cacheStore) {
	s.stateLock.Lock()
	defer s.stateLock.Unlock()
	s.state.stop()
	s.state = newDCState(dcDown, s)
}

func TestCacheManager(t *testing.T) {
	conf := defaultConf
	conf.CacheDir = "/tmp/diskCache0:/tmp/diskCache1:/tmp/diskCache2"
	conf.AutoCreate = true
	defer os.RemoveAll("/tmp/diskCache0")
	defer os.RemoveAll("/tmp/diskCache1")
	defer os.RemoveAll("/tmp/diskCache2")
	manager := newCacheManager(&conf, nil, nil)
	require.True(t, !manager.isEmpty())

	m, ok := manager.(*cacheManager)
	require.True(t, ok)
	require.Equal(t, 3, m.length())

	// case: key rehash after store removal
	k1 := "k1"
	p1 := NewPage([]byte{1, 2, 3})
	defer p1.Release()
	m.cache(k1, p1, true, false)

	s1 := m.getStore(k1)
	require.NotNil(t, s1)

	PatchConvey("test getDiskUsage", t, func() {
		Mock(getDiskUsage).To(func(path string) (uint64, uint64, uint64, uint64) {
			time.Sleep(time.Second * 10)
			return 1, 1, 1, 1
		}).Build()
		var wg sync.WaitGroup
		wg.Add(1)
		go func() {
			s1.Lock()
			wg.Done()
			s1.cleanupFull()
			s1.Unlock()
		}()

		wg.Wait()
		start := time.Now()
		s1.load(k1)
		So(time.Since(start), ShouldBeLessThan, time.Second*3)
	})

	m.Lock()
	shutdownStore(s1)
	m.Unlock()
	time.Sleep(3 * time.Second)

	rc, _ := m.load(k1)
	require.Nil(t, rc)
	_, exist := m.exist(k1)
	require.False(t, exist)

	s2 := m.getStore(k1)
	require.NotNil(t, s2)

	// case: remove all store
	m.Lock()
	for _, s := range m.storeMap {
		shutdownStore(s)
	}
	m.Unlock()
	time.Sleep(3 * time.Second)
	require.True(t, m.isEmpty())
}

func TestAtimeNotLost(t *testing.T) {
	for _, eviction := range []string{EvictionNone, Eviction2Random, EvictionLRU} {
		cfg := defaultConf
		cfg.CacheEviction = eviction
		cfg.FreeSpace = 0.03
		m := newCacheManager(&cfg, nil, nil)
		key := "0_0_10"

		p := NewPage([]byte("helloworld"))
		defer p.Release()
		m.cache(key, p, true, false)
		time.Sleep(3 * time.Second)

		_, exist := m.exist(key) // touch atime
		if !exist {
			t.Fatalf("CacheStore key %s not exist", key)
		}
		s := m.(*cacheManager).stores[0]
		atimeMem := s.keys.peekAtime(s.getCacheKey(key))
		if atimeMem == 0 {
			t.Fatalf("CacheStore key %s atime lost", key)
		}
		s.scanCached() // should use atime from memory
		atimeAfterScan := s.keys.peekAtime(s.getCacheKey(key))
		if atimeAfterScan != atimeMem {
			t.Fatalf("CacheStore key %s atime lost after scan, before: %d, after: %d", key, atimeMem, atimeAfterScan)
		}
	}
}
func TestSetlimitByFreeRatio(t *testing.T) {
	conf := testConf()
	defer os.RemoveAll(conf.CacheDir)
	cache := newCacheStore(nil, conf.CacheDir, 1<<30, 1000, 1, &conf, nil)

	usage := DiskFreeRatio{
		spaceCap: 1 << 30,
		inodeCap: 1000,
	}
	freeRatio := float32(0.2)
	cache.setLimitByFreeRatio(usage, 0.2)

	expectedSizeLimit := int64((1 - freeRatio + 0.05) * float32(usage.spaceCap))
	if cache.capacity > expectedSizeLimit {
		t.Fatalf("Expected capacity <= %d, but got %d", expectedSizeLimit, cache.capacity)
	}
	expectedInodeLimit := int64((1 - freeRatio + 0.05) * float32(usage.inodeCap))
	if cache.maxItems > expectedInodeLimit && cache.maxItems != 0 {
		t.Fatalf("Expected maxItems <= %d, but got %d", expectedInodeLimit, cache.maxItems)
	}
}

func TestSetLimitByFreeRatioUnknownInodesKeepExplicitMaxItems(t *testing.T) {
	conf := testConf()
	defer os.RemoveAll(conf.CacheDir)
	cache := newCacheStore(nil, conf.CacheDir, 1<<30, 1000, 1, &conf, nil)

	usage := DiskFreeRatio{
		spaceCap: 1 << 30,
		inodeCap: 0,
	}
	cache.setLimitByFreeRatio(usage, 0.2)
	require.Equal(t, int64(1000), cache.maxItems)
}

func TestUnknownInodeStatsShouldNotMarkCacheAsRawFull(t *testing.T) {
	PatchConvey("unknown inode stats should not trigger rawFull", t, func() {
		Mock(getDiskUsage).To(func(path string) (uint64, uint64, uint64, uint64) {
			return 1 << 30, 1 << 30, 0, 0
		}).Build()

		conf := defaultConf
		conf.CacheDir = t.TempDir()
		m := new(cacheManagerMetrics)
		m.initMetrics()
		s := newCacheStore(m, conf.CacheDir, 1<<30, conf.CacheItems, 1, &conf, nil)
		defer shutdownStore(s)

		require.Never(t, func() bool {
			s.Lock()
			defer s.Unlock()
			return s.rawFull
		}, 1500*time.Millisecond, 100*time.Millisecond)
	})
}

func Test2RandomEviction(t *testing.T) {
	Convey("Test2RandomEviction-CacheFull", t, func() {
		dir := t.TempDir()
		defer os.RemoveAll(dir)
		conf := defaultConf
		conf.FreeSpace = 0.00001
		conf.CacheScanInterval = -1 // Disable periodic scan
		conf.CacheSize = 1 << 30
		conf.CacheItems = 10 // Max 10 items to easily trigger eviction

		m := new(cacheManagerMetrics)
		m.initMetrics()
		s := newCacheStore(m, filepath.Join(dir, "diskCache"), int64(conf.CacheSize), conf.CacheItems, 1, &conf, nil)
		require.NotNil(t, s)
		if _, ok := s.keys.(*randomEviction); !ok {
			t.Fatalf("Expected randomEviction, but got %T", s.keys)
		}

		// Add items with distinct atimes
		for i := 1; i <= 20; i++ {
			key := fmt.Sprintf("%d_%d_1024", i, i)
			s.add(key, 1024, uint32(time.Now().Add(time.Duration(i)*time.Second).Unix())) // New items have larger atime
			require.LessOrEqual(t, int64(s.keys.len()), conf.CacheItems, "Cache should not exceed max items limit during addition")
			require.Greater(t, s.keys.len(), 0, "Cache should always have items after addition")
		}
	})
}

func TestLruEviction(t *testing.T) {
	Convey("TestLruEviction-CacheFull", t, func() {
		dir := t.TempDir()
		defer os.RemoveAll(dir)
		conf := defaultConf
		conf.CacheEviction = EvictionLRU
		conf.FreeSpace = 0.00001
		conf.CacheScanInterval = -1 // Disable periodic scan
		conf.CacheSize = 1 << 30
		conf.CacheItems = 10 // Max 10 items to easily trigger eviction

		m := new(cacheManagerMetrics)
		m.initMetrics()
		s := newCacheStore(m, filepath.Join(dir, "diskCache"), int64(conf.CacheSize), conf.CacheItems, 1, &conf, nil)
		require.NotNil(t, s)
		le := s.keys.(*lruEviction)

		// Add items with distinct atimes
		for i := 1; i <= 20; i++ {
			key := fmt.Sprintf("%d_%d_1024", i, i)
			s.add(key, 1024, uint32(time.Now().Add(time.Duration(i)*time.Second).Unix())) // New items have larger atime
			require.True(t, le.verifyHeap())
			require.LessOrEqual(t, int64(s.keys.len()), conf.CacheItems, "Cache should not exceed max items limit during addition")
			require.Greater(t, s.keys.len(), 0, "Cache should always have items after addition")
		}

		cutIndex := 20 - conf.CacheItems
		expectedKeys := make(map[string]bool)
		// After eviction, the cache should only contain the newest items.
		for i := cutIndex + 1; i <= 20; i++ {
			key := fmt.Sprintf("%d_%d_1024", i, i)
			expectedKeys[key] = true
		}

		require.Equal(t, le.lruHeap.Len(), len(le.keys), "Heap length should match keys length after insertion")
		require.Equal(t, len(expectedKeys), len(le.keys), "Number of items in cache after eviction mismatch")
		require.Equal(t, len(expectedKeys), le.lruHeap.Len(), "Number of items in heap after eviction mismatch")

		// Verify the heap also contains the expected keys
		tempHeap := make(atimeHeap, le.lruHeap.Len())
		copy(tempHeap, le.lruHeap)
		for tempHeap.Len() > 0 {
			item := tempHeap.Pop().(heapItem)
			require.Contains(t, expectedKeys, item.key.String(), "Unexpected key found in heap: %s", item.key.String())
		}

		// Verify all evicted keys are no longer in the cache
		for i := int64(1); i <= cutIndex; i++ {
			key := fmt.Sprintf("%d_%d_1024", i, i)
			_, ok := le.keys[s.getCacheKey(key)]
			require.False(t, ok, "Evicted key %s still found in cache", key)
		}
	})

	Convey("TestLruEviction-WriteBack", t, func() {
		dir := t.TempDir()
		defer os.RemoveAll(dir)
		conf := defaultConf
		conf.CacheEviction = EvictionLRU
		conf.Writeback = true
		conf.FreeSpace = 0.00001
		conf.CacheScanInterval = -1 // Disable periodic scan
		conf.CacheSize = 1 << 30
		conf.CacheItems = 10 // Max 10 items to easily trigger eviction

		// TODO: delete me
		m := new(cacheManagerMetrics)
		m.initMetrics()
		s := newCacheStore(m, filepath.Join(dir, "diskCache"), int64(conf.CacheSize), conf.CacheItems, 1, &conf, nil)
		require.NotNil(t, s)
		le := s.keys.(*lruEviction)

		// Add items with distinct atimes
		blockPlaceHolder := []byte("test data")
		for i := 1; i <= 20; i++ {
			key := fmt.Sprintf("%d_%d_9", i, i)
			_, err := s.stage(key, blockPlaceHolder)
			require.True(t, le.verifyHeap())
			require.NoError(t, err, "Failed to stage data for key %s", key)
		}
		require.Equal(t, 20, len(le.keys), "Cache should contain 20 staged items even if full")
		require.Equal(t, 0, len(le.lruHeap), "Staged items should not be in the LRU heap")

		s.Lock()
		s.cleanupFull()
		s.Unlock()
		for i := 1; i <= 20; i++ {
			key := fmt.Sprintf("%d_%d_9", i, i)
			s.uploaded(key, len(blockPlaceHolder))
		}
		require.Equal(t, len(le.keys), le.lruHeap.Len(), "Heap length should match keys length after staged items are uploaded")

		s.maxItems = 1
		s.Lock()
		s.cleanupFull()
		s.Unlock()
		require.Equal(t, 0, len(le.keys), "Cache should be empty by cleanupFull after setting maxItems to 1")
		require.Equal(t, 0, len(le.lruHeap), "LRU heap should be empty by cleanupFull after setting maxItems to 1")
	})
}

func TestCooldownAtimeOnWriteFixedOnLoad(t *testing.T) {
	dir := t.TempDir()
	conf := defaultConf
	conf.CacheExpire = time.Hour
	conf.CacheEviction = EvictionNone
	m := new(cacheManagerMetrics)
	m.initMetrics()
	cache := newCacheStore(m, dir, 1<<30, 1000, 1, &conf, nil)
	key := "0_0_4"

	PatchConvey("mock time.Now to avoid drift", t, func() {
		fixedTime := time.Date(2025, 1, 28, 12, 0, 0, 0, time.UTC)
		Mock(time.Now).Return(fixedTime).Build()
		path, err := cache.stage(key, []byte("test"))
		require.NoError(t, err)
		require.NotEmpty(t, path)
		expectedCooldownAtime := uint32(fixedTime.Add(-conf.CacheExpire / 2).Unix())
		require.Equal(t, expectedCooldownAtime, cache.keys.peekAtime(cache.getCacheKey(key)))
		rc, err := cache.load(key)
		require.NoError(t, err)
		require.NotNil(t, rc)
		defer rc.Close()
		require.Equal(t, uint32(fixedTime.Unix()), cache.keys.peekAtime(cache.getCacheKey(key)))
	})
}


================================================
FILE: pkg/chunk/mem_cache.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"errors"
	"runtime"
	"sync"
	"time"

	"github.com/dustin/go-humanize"
)

type memItem struct {
	atime time.Time
	page  *Page
}

type memcache struct {
	sync.Mutex
	capacity    int64
	maxItems    int64
	used        int64
	pages       map[string]memItem
	eviction    string
	cacheExpire time.Duration

	metrics *cacheManagerMetrics
}

func newMemStore(config *Config, metrics *cacheManagerMetrics) *memcache {
	c := &memcache{
		capacity:    int64(config.CacheSize),
		maxItems:    config.CacheItems,
		pages:       make(map[string]memItem),
		eviction:    config.CacheEviction,
		cacheExpire: config.CacheExpire,
		metrics:     metrics,
	}
	runtime.SetFinalizer(c, func(c *memcache) {
		for _, p := range c.pages {
			p.page.Release()
		}
		c.pages = nil
	})
	if c.cacheExpire > 0 {
		go c.cleanupExpire()
	}
	return c
}

func (c *memcache) removeStage(key string) error {
	return nil
}

func (c *memcache) usedMemory() int64 {
	c.Lock()
	defer c.Unlock()
	return c.used
}

func (c *memcache) stats() (int64, int64) {
	c.Lock()
	defer c.Unlock()
	return int64(len(c.pages)), c.used
}

func (c *memcache) cache(key string, p *Page, force, dropCache bool) {
	if !c.enabled() {
		return
	}
	c.Lock()
	defer c.Unlock()
	if c.full() && c.eviction == EvictionNone {
		logger.Debugf("Caching is full, drop %s (%d bytes)", key, len(p.Data))
		c.metrics.cacheDrops.Add(1)
		return
	}
	if _, ok := c.pages[key]; ok {
		return
	}
	size := int64(cap(p.Data))
	c.metrics.cacheWrites.Add(1)
	c.metrics.cacheWriteBytes.Add(float64(size))
	p.Acquire()
	c.pages[key] = memItem{time.Now(), p}
	c.used += size
	if c.full() && c.eviction != EvictionNone {
		c.cleanup()
	}
}

func (c *memcache) delete(key string, p *Page) {
	size := int64(cap(p.Data))
	c.used -= size
	p.Release()
	delete(c.pages, key)
}

func (c *memcache) remove(key string, staging bool) {
	c.Lock()
	defer c.Unlock()
	if item, ok := c.pages[key]; ok {
		c.delete(key, item.page)
		logger.Debugf("remove %s from cache", key)
	}
}

func (c *memcache) load(key string) (ReadCloser, error) {
	c.Lock()
	defer c.Unlock()
	if item, ok := c.pages[key]; ok {
		c.pages[key] = memItem{time.Now(), item.page}
		return NewPageReader(item.page), nil
	}
	return nil, errNotCached
}

func (c *memcache) exist(key string) (string, bool) {
	if !c.enabled() {
		return "", false
	}
	c.Lock()
	defer c.Unlock()
	if item, ok := c.pages[key]; ok {
		c.pages[key] = memItem{time.Now(), item.page}
		return "memory", true
	}
	return "", false
}

// locked
func (c *memcache) cleanup() {
	var cnt int
	var lastKey string
	var lastValue memItem
	var now = time.Now()
	// for each two random keys, then compare the access time, evict the older one
	for k, v := range c.pages {
		if cnt == 0 || lastValue.atime.After(v.atime) {
			lastKey = k
			lastValue = v
		}
		cnt++
		if cnt > 1 {
			logger.Debugf("remove %s from cache, age: %d", lastKey, now.Sub(lastValue.atime))
			c.metrics.cacheEvicts.Add(1)
			c.delete(lastKey, lastValue.page)
			cnt = 0
			if !c.full() {
				break
			}
		}
	}
}

func (c *memcache) enabled() bool {
	return c.capacity > 0
}

func (c *memcache) full() bool {
	return c.used > c.capacity || (c.maxItems != 0 && int64(len(c.pages)) > c.maxItems)
}

func (c *memcache) cleanupExpire() {
	var interval = time.Minute
	if c.cacheExpire < time.Minute {
		interval = c.cacheExpire
	}
	for {
		var freed int64
		var cnt, deleted int
		c.Lock()
		cutoff := time.Now().Add(-c.cacheExpire)
		for k, v := range c.pages {
			cnt++
			if cnt > 1e3 {
				break
			}
			if v.atime.Before(cutoff) {
				deleted++
				freed += int64(cap(v.page.Data))
				c.metrics.cacheEvicts.Add(1)
				c.delete(k, v.page)
			}
		}
		c.Unlock()
		if deleted > 0 {
			logger.Debugf("Expired cache blocks: %d blocks (%s), remaining: %d blocks (%s)", deleted, humanize.IBytes(uint64(freed)), len(c.pages), humanize.IBytes(uint64(c.used)))
		}
		time.Sleep(interval / 1000 * time.Duration((cnt+1-deleted)*1000/(cnt+1)))
	}
}

func (c *memcache) stage(key string, data []byte) (string, error) {
	return "", errors.New("not supported")
}
func (c *memcache) uploaded(key string, size int)    {}
func (c *memcache) isEmpty() bool                    { return false }
func (c *memcache) getMetrics() *cacheManagerMetrics { return c.metrics }


================================================
FILE: pkg/chunk/metrics.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"github.com/prometheus/client_golang/prometheus"
)

// CacheManager Metrics
type cacheManagerMetrics struct {
	cacheDrops      prometheus.Counter
	cacheWrites     prometheus.Counter
	cacheEvicts     prometheus.Counter
	cacheWriteBytes prometheus.Counter
	cacheWriteHist  prometheus.Histogram
	stageBlocks     prometheus.Gauge
	stageBlockBytes prometheus.Gauge
	stageWriteBytes prometheus.Counter
}

func newCacheManagerMetrics(reg prometheus.Registerer) *cacheManagerMetrics {
	metrics := &cacheManagerMetrics{}
	metrics.initMetrics()
	metrics.registerMetrics(reg)
	return metrics
}

func (c *cacheManagerMetrics) registerMetrics(reg prometheus.Registerer) {
	if reg != nil {
		reg.MustRegister(c.cacheDrops)
		reg.MustRegister(c.cacheWrites)
		reg.MustRegister(c.cacheEvicts)
		reg.MustRegister(c.cacheWriteHist)
		reg.MustRegister(c.cacheWriteBytes)
		reg.MustRegister(c.stageBlocks)
		reg.MustRegister(c.stageBlockBytes)
		reg.MustRegister(c.stageWriteBytes)
		reg.MustRegister(prometheus.NewGaugeFunc(prometheus.GaugeOpts{
			Name: "staging_writing_blocks",
			Help: "Number of writing blocks in staging.",
		}, func() float64 {
			return float64(stagingBlocks.Load())
		}))
	}
}

func (c *cacheManagerMetrics) initMetrics() {
	c.cacheDrops = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_drops",
		Help: "dropped block",
	})
	c.cacheWrites = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_writes",
		Help: "written cached block",
	})
	c.cacheEvicts = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_evicts",
		Help: "evicted cache blocks",
	})
	c.cacheWriteBytes = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "blockcache_write_bytes",
		Help: "write bytes of cached block",
	})
	c.cacheWriteHist = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "blockcache_write_hist_seconds",
		Help:    "write cached block latency distribution",
		Buckets: prometheus.ExponentialBuckets(0.00001, 2, 20),
	})
	c.stageBlocks = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "staging_blocks",
		Help: "Number of blocks in the staging path.",
	})
	c.stageBlockBytes = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "staging_block_bytes",
		Help: "Total bytes of blocks in the staging path.",
	})
	c.stageWriteBytes = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "staging_write_bytes",
		Help: "write bytes of blocks in the staging path.",
	})
}


================================================
FILE: pkg/chunk/page.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"errors"
	"io"
	"os"
	"runtime"
	"runtime/debug"
	"sync/atomic"

	"github.com/juicedata/juicefs/pkg/utils"
)

var pageStack = os.Getenv("JFS_PAGE_STACK") != ""

// Page is a page with refcount
type Page struct {
	refs    int32
	offheap bool
	dep     *Page
	Data    []byte
	stack   []byte
}

// NewPage create a new page.
func NewPage(data []byte) *Page {
	return &Page{refs: 1, Data: data}
}

func NewOffPage(size int) *Page {
	if size <= 0 {
		panic("size of page should > 0")
	}
	p := utils.Alloc(size)
	page := &Page{refs: 1, offheap: true, Data: p}
	if pageStack {
		page.stack = debug.Stack()
	}
	runtime.SetFinalizer(page, func(p *Page) {
		refcnt := atomic.LoadInt32(&p.refs)
		if refcnt != 0 {
			logger.Errorf("refcount of page %p (%d bytes) is not zero: %d, created by: %s", p, cap(p.Data), refcnt, string(p.stack))
			if refcnt > 0 {
				p.Release()
			}
		}
	})
	return page
}

func (p *Page) Slice(off, len int) *Page {
	p.Acquire()
	np := NewPage(p.Data[off : off+len])
	np.dep = p
	return np
}

// Acquire increase the refcount
func (p *Page) Acquire() {
	if pageStack {
		p.stack = append(p.stack, debug.Stack()...)
	}
	atomic.AddInt32(&p.refs, 1)
}

// Release decrease the refcount
func (p *Page) Release() {
	if pageStack {
		p.stack = append(p.stack, debug.Stack()...)
	}
	if atomic.AddInt32(&p.refs, -1) == 0 {
		if p.offheap {
			utils.Free(p.Data)
		}
		if p.dep != nil {
			p.dep.Release()
			p.dep = nil
		}
		p.Data = nil
	}
}

type pageReader struct {
	p   *Page
	off int
}

func NewPageReader(p *Page) *pageReader {
	p.Acquire()
	return &pageReader{p, 0}
}

func (r *pageReader) Read(buf []byte) (int, error) {
	n, err := r.ReadAt(buf, int64(r.off))
	r.off += n
	return n, err
}

func (r *pageReader) ReadAt(buf []byte, off int64) (int, error) {
	if len(buf) == 0 {
		return 0, nil
	}
	if r.p == nil {
		return 0, errors.New("page is already released")
	}
	if int(off) == len(r.p.Data) {
		return 0, io.EOF
	}
	n := copy(buf, r.p.Data[off:])
	if n < len(buf) {
		return n, io.EOF
	}
	return n, nil
}

func (r *pageReader) Close() error {
	if r.p != nil {
		r.p.Release()
		r.p = nil
	}
	return nil
}


================================================
FILE: pkg/chunk/page_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"io"
	"testing"
)

func TestPage(t *testing.T) {
	p1 := NewOffPage(1)
	if len(p1.Data) != 1 {
		t.Fail()
	}
	if cap(p1.Data) != 1 {
		t.Fail()
	}
	p1.Acquire()
	p1.Release()
	if p1.Data == nil {
		t.Fail()
	}

	p2 := p1.Slice(0, 1)
	p1.Release()
	if p1.Data == nil {
		t.Fail()
	}

	p2.Release()
	if p2.Data != nil {
		t.Fail()
	}
	if p1.Data != nil {
		t.Fail()
	}
}

func TestPageReader(t *testing.T) {
	data := []byte("hello")
	p := NewPage(data)
	r := NewPageReader(p)

	if n, err := r.Read(nil); n != 0 || err != nil {
		t.Fatalf("read should return 0")
	}
	buf := make([]byte, 3)
	if n, err := r.Read(buf); n != 3 || err != nil {
		t.Fatalf("read should return 3 but got %d", n)
	}
	if n, err := r.Read(buf); n != 2 || (err != nil && err != io.EOF) {
		t.Fatalf("read should return 2 but got %d", n)
	}
	if n, err := r.Read(buf); n != 0 || err != io.EOF {
		t.Fatalf("read should return 0")
	}
	if n, err := r.ReadAt(buf, 4); n != 1 || (err != nil && err != io.EOF) {
		t.Fatalf("read should return 1")
	}
	if n, err := r.ReadAt(buf, 5); n != 0 || err != io.EOF {
		t.Fatalf("read should return 0")
	}
	_ = r.Close()
	if n, err := r.ReadAt(buf, 5); n != 0 || err == nil {
		t.Fatalf("read should fail after close")
	}
}


================================================
FILE: pkg/chunk/prefetch.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"sync"
)

type prefetcher struct {
	sync.Mutex
	pending chan string
	busy    map[string]bool
	op      func(key string)
}

func newPrefetcher(parallel int, fetch func(string)) *prefetcher {
	p := &prefetcher{
		pending: make(chan string, 10),
		busy:    make(map[string]bool),
		op:      fetch,
	}
	for i := 0; i < parallel; i++ {
		go p.do()
	}
	return p
}

func (p *prefetcher) do() {
	for key := range p.pending {
		p.op(key)

		p.Lock()
		delete(p.busy, key)
		p.Unlock()
	}
}

func (p *prefetcher) fetch(key string) {
	p.Lock()
	defer p.Unlock()
	if _, ok := p.busy[key]; ok {
		return
	}
	select {
	case p.pending <- key:
		p.busy[key] = true
	default:
	}
}


================================================
FILE: pkg/chunk/prefetch_test.go
================================================
package chunk

import (
	"sync/atomic"
	"testing"
	"time"
)

func TestPrefetcher(t *testing.T) {
	t.Run("should fetch given keys", func(t *testing.T) {
		keys := []string{"source/1", "source/2", "source/3", "source/4"}
		chRes := make(chan string, len(keys))
		defer close(chRes)
		f := newPrefetcher(2, func(k string) {
			chRes <- k + "Done"
		})
		for _, k := range keys {
			f.fetch(k)
		}
		res := make(map[string]bool, len(keys))
		for range keys {
			res[<-chRes] = true
		}
		if len(res) != len(keys) {
			t.Errorf("Incorrect number of keys fetched, expect: %d, got: %d", len(keys), len(res))
		}
		for _, k := range keys {
			if !res[k+"Done"] {
				t.Errorf("Key not fetched: %s", k)
			}
		}
	})
	t.Run("should ignore duplicate keys", func(t *testing.T) {
		var counter int32
		f := newPrefetcher(4, func(k string) {
			// Introduce a little latency to mimic a slower fetch operation
			// so that our few duplicate keys can reach the prefetcher in the time period
			time.Sleep(time.Millisecond)
			atomic.AddInt32(&counter, 1)
		})
		for i := 0; i < 5; i++ {
			f.fetch("a")
		}
		if atomic.LoadInt32(&counter) > 1 {
			t.Errorf("Duplicate keys  fetched")
		}
	})

	t.Run("should drop keys when pending queue is full", func(t *testing.T) {
		const maxPending = 10
		var counter int32

		f := newPrefetcher(1, func(k string) {
			atomic.AddInt32(&counter, 1)
			time.Sleep(10 * time.Millisecond)
		})

		for i := 0; i < maxPending+1; i++ {
			f.fetch(string(rune('a' + i)))
		}

		time.Sleep(50 * time.Millisecond)

		finalCount := atomic.LoadInt32(&counter)
		if finalCount > maxPending {
			t.Errorf("Processed count %d exceeds queue capacity %d", finalCount, maxPending)
		}
	})
}


================================================
FILE: pkg/chunk/singleflight.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import "sync"

type request struct {
	wg   sync.WaitGroup
	val  *Page
	dups int
	err  error
}

type Controller struct {
	sync.Mutex
	rs map[string]*request
}

func NewController() *Controller {
	return &Controller{
		rs: make(map[string]*request),
	}
}

func (con *Controller) Execute(key string, fn func() (*Page, error)) (*Page, error) {
	con.Lock()
	if c, ok := con.rs[key]; ok {
		c.dups++
		con.Unlock()
		c.wg.Wait()
		return c.val, c.err
	}
	c := new(request)
	c.wg.Add(1)
	con.rs[key] = c
	con.Unlock()

	c.val, c.err = fn()

	con.Lock()
	for i := 0; i < c.dups; i++ {
		// Acquire for the pending Execute
		c.val.Acquire()
	}
	delete(con.rs, key)
	con.Unlock()

	c.wg.Done()

	return c.val, c.err
}

func (con *Controller) TryPiggyback(key string) (*Page, error) {
	con.Lock()
	if c, ok := con.rs[key]; ok {
		c.dups++
		con.Unlock()
		c.wg.Wait()
		return c.val, c.err
	}
	con.Unlock()
	return nil, nil
}


================================================
FILE: pkg/chunk/singleflight_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"bytes"
	"fmt"
	"strconv"
	"sync"
	"sync/atomic"
	"testing"
	"time"
)

func TestSingleFlight(t *testing.T) {
	g := NewController()
	gp := &sync.WaitGroup{}
	var cache sync.Map
	var n int32
	var piggyback atomic.Int64
	iters := 100000
	errCh := make(chan error, iters)

	for i := 0; i < iters; i++ {
		gp.Add(2)
		go func(k int) {
			p, _ := g.Execute(strconv.Itoa(k/100), func() (*Page, error) {
				time.Sleep(time.Microsecond * 500000) // In most cases 500ms is enough to run 100 goroutines
				atomic.AddInt32(&n, 1)
				page := NewOffPage(100)
				copy(page.Data, make([]byte, 100)) // zeroed
				copy(page.Data, strconv.Itoa(k/100))
				return page, nil
			})
			p.Release()
			cache.LoadOrStore(strconv.Itoa(k/100), p)
			gp.Done()
		}(i)
		go func(k int) {
			defer gp.Done()
			page, _ := g.TryPiggyback(strconv.Itoa(k / 100))
			if page != nil {
				expected := make([]byte, 100)
				copy(expected, strconv.Itoa(k/100))
				if bytes.Compare(page.Data, expected) != 0 {
					errCh <- fmt.Errorf("got %x, want %x, key: %d", page.Data, expected, k/100)
				}
				page.Release()
				piggyback.Add(1)
			}
		}(i)
	}
	gp.Wait()
	close(errCh)

	for err := range errCh {
		t.Fatalf("Test failed: %v", err)
	}

	nv := int(atomic.LoadInt32(&n))
	if nv != iters/100 {
		t.Fatalf("singleflight doesn't take effect: %v", nv)
	}
	if piggyback.Load() == 0 {
		t.Fatal("never piggybacked?")
	}

	// verify the ref
	cache.Range(func(key any, value any) bool {
		if value.(*Page).refs != 0 {
			t.Fatalf("refs of page is not 0, got: %d, key: %s", value.(*Page).refs, key)
		}
		return true
	})
}


================================================
FILE: pkg/chunk/utils_darwin.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"os"
	"syscall"
	"time"
)

func getAtime(fi os.FileInfo) time.Time {
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return time.Unix(sst.Atimespec.Unix())
	} else {
		return fi.ModTime()
	}
}

func dropOSCache(r ReadCloser) {}


================================================
FILE: pkg/chunk/utils_linux.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"os"
	"syscall"
	"time"

	"golang.org/x/sys/unix"
)

func getAtime(fi os.FileInfo) time.Time {
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return time.Unix(sst.Atim.Unix())
	}
	return fi.ModTime()
}

func dropOSCache(r ReadCloser) {
	if cf, ok := r.(*cacheFile); ok {
		_ = unix.Fadvise(int(cf.Fd()), 0, 0, unix.FADV_DONTNEED)
	} else if f, ok := r.(*os.File); ok {
		_ = unix.Fadvise(int(f.Fd()), 0, 0, unix.FADV_DONTNEED)
	}
}


================================================
FILE: pkg/chunk/utils_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"os"
	"syscall"
)

func getNlink(fi os.FileInfo) int {
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return int(sst.Nlink)
	}
	return 1
}

func getDiskUsage(path string) (uint64, uint64, uint64, uint64) {
	var stat syscall.Statfs_t
	if err := syscall.Statfs(path, &stat); err == nil {
		return stat.Blocks * uint64(stat.Bsize), stat.Bavail * uint64(stat.Bsize), stat.Files, stat.Ffree
	} else {
		logger.Warnf("statfs %s: %s", path, err)
		return 1, 1, 1, 1
	}
}

func changeMode(dir string, st os.FileInfo, mode os.FileMode) {
	sst := st.Sys().(*syscall.Stat_t)
	if os.Getuid() == int(sst.Uid) {
		_ = os.Chmod(dir, mode)
	}
}

func inRootVolume(dir string) bool {
	dstat, err := os.Stat(dir)
	if err != nil {
		logger.Warnf("stat `%s`: %s", dir, err.Error())
		return false
	}
	rstat, err := os.Stat("/")
	if err != nil {
		logger.Warnf("stat `/`: %s", err.Error())
		return false
	}
	return dstat.Sys().(*syscall.Stat_t).Dev == rstat.Sys().(*syscall.Stat_t).Dev
}


================================================
FILE: pkg/chunk/utils_unix_test.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"os"
	"runtime"
	"testing"
)

func TestInRootVolume(t *testing.T) {
	if runtime.GOOS == "windows" {
		t.SkipNow()
	}
	if !inRootVolume("/") {
		t.Fatal("`/` is in root volume")
	}
	if inRootVolume(".") {
		err := os.MkdirAll("./__test__", 0755)
		if err != nil {
			t.Fatal(err)
		}
		defer os.RemoveAll("./__test__")
		if !inRootVolume("./__test__") {
			t.Fatal("`./__test__` is in root volume")
		}
	}
	if !inRootVolume("/tmp") {
		err := os.MkdirAll("/tmp/__jfs_test__", 0755)
		if err != nil {
			t.Fatal(err)
		}
		defer os.RemoveAll("/tmp/__jfs_test__")
		if inRootVolume("/tmp/__jfs_test__") {
			t.Fatal("`/tmp/__jfs_test__` is not in root volume")
		}
	}
}


================================================
FILE: pkg/chunk/utils_windows.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package chunk

import (
	"os"
	"syscall"
	"time"

	sys "golang.org/x/sys/windows"
)

func getAtime(fi os.FileInfo) time.Time {
	stat, ok := fi.Sys().(*syscall.Win32FileAttributeData)
	if ok {
		return time.Unix(0, stat.LastAccessTime.Nanoseconds())
	} else {
		return time.Unix(0, 0)
	}
}

func dropOSCache(r ReadCloser) {}

func getNlink(fi os.FileInfo) int {
	return 1
}

func getDiskUsage(path string) (uint64, uint64, uint64, uint64) {
	var freeBytes, total, totalFree uint64
	err := sys.GetDiskFreeSpaceEx(sys.StringToUTF16Ptr(path), &freeBytes, &total, &totalFree)
	if err != nil {
		logger.Errorf("GetDiskFreeSpaceEx %s: %s", path, err.Error())
		return 1, 1, 1, 1
	}
	return total, freeBytes, 1, 1
}

func changeMode(dir string, st os.FileInfo, mode os.FileMode) {}

func inRootVolume(dir string) bool { return false }


================================================
FILE: pkg/compress/compress.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package compress

import (
	"fmt"
	"strings"

	"github.com/DataDog/zstd"
	"github.com/hungys/go-lz4"
)

// ZSTD_LEVEL compression level used by Zstd
const ZSTD_LEVEL = 1 // fastest

// Compressor interface to be implemented by a compression algo
type Compressor interface {
	Name() string
	CompressBound(int) int
	Compress(dst, src []byte) (int, error)
	Decompress(dst, src []byte) (int, error)
}

// NewCompressor returns a struct implementing Compressor interface
func NewCompressor(algr string) Compressor {
	algr = strings.ToLower(algr)
	if algr == "zstd" {
		return ZStandard{ZSTD_LEVEL}
	} else if algr == "lz4" {
		return LZ4{}
	} else if algr == "none" || algr == "" {
		return noOp{}
	}
	return nil
}

type noOp struct{}

func (n noOp) Name() string            { return "Noop" }
func (n noOp) CompressBound(l int) int { return l }
func (n noOp) Compress(dst, src []byte) (int, error) {
	if len(dst) < len(src) {
		return 0, fmt.Errorf("buffer too short: %d < %d", len(dst), len(src))
	}
	copy(dst, src)
	return len(src), nil
}
func (n noOp) Decompress(dst, src []byte) (int, error) {
	if len(dst) < len(src) {
		return 0, fmt.Errorf("buffer too short: %d < %d", len(dst), len(src))
	}
	copy(dst, src)
	return len(src), nil
}

// ZStandard implements Compressor interface using zstd library
type ZStandard struct {
	level int
}

// Name returns name of the algorithm Zstd
func (n ZStandard) Name() string { return "Zstd" }

// CompressBound max size of compressed data
func (n ZStandard) CompressBound(l int) int { return zstd.CompressBound(l) }

// Compress using Zstd
func (n ZStandard) Compress(dst, src []byte) (int, error) {
	d, err := zstd.CompressLevel(dst, src, n.level)
	if err != nil {
		return 0, err
	}
	if len(d) > 0 && len(dst) > 0 && &d[0] != &dst[0] {
		return 0, fmt.Errorf("buffer too short: %d < %d", cap(dst), cap(d))
	}
	return len(d), err
}

// Decompress using Zstd
func (n ZStandard) Decompress(dst, src []byte) (int, error) {
	d, err := zstd.Decompress(dst, src)
	if err != nil {
		return 0, err
	}
	if len(d) > 0 && len(dst) > 0 && &d[0] != &dst[0] {
		return 0, fmt.Errorf("buffer too short: %d < %d", len(dst), len(d))
	}
	return len(d), err
}

// LZ4 implements Compressor using LZ4 library
type LZ4 struct{}

// Name returns name of the algorithm LZ4
func (l LZ4) Name() string { return "LZ4" }

// CompressBound max size of compressed data
func (l LZ4) CompressBound(size int) int { return lz4.CompressBound(size) }

// Compress using LZ4 algorithm
func (l LZ4) Compress(dst, src []byte) (int, error) {
	return lz4.CompressDefault(src, dst)
}

// Decompress using LZ4 algorithm
func (l LZ4) Decompress(dst, src []byte) (int, error) {
	if len(src) == 0 {
		return 0, fmt.Errorf("decompress an empty input")
	}
	return lz4.DecompressSafe(src, dst)
}


================================================
FILE: pkg/compress/compress_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package compress

import (
	"io"
	"os"
	"testing"
)

func testCompress(t *testing.T, c Compressor) {
	src := []byte(c.Name())
	testIt := func(src []byte) {
		if len(src) > 1 {
			_, err := c.Compress(make([]byte, 1), src)
			if err == nil {
				t.Fatal("expect short buffer error, but got nil ")
			}
		}
		dst := make([]byte, c.CompressBound(len(src)))
		n, err := c.Compress(dst, src)
		if err != nil {
			t.Fatalf("compress: %s", err)
		}
		if len(src) > 1 {
			_, err = c.Decompress(make([]byte, 1), dst[:n])
			if err == nil {
				t.Fatalf("expect short buffer error, but got nil")
			}
		}
		src2 := make([]byte, len(src))
		n, err = c.Decompress(src2, dst[:n])
		if err != nil {
			t.Fatalf("decompress: %s", err)
		}
		if string(src2[:n]) != string(src) {
			t.Fatalf("expect %s but got %s", string(src), string(src2))
		}
	}

	testIt(src)
	testIt(nil)

	if c.CompressBound(0) > 0 {
		n, err := c.Decompress(make([]byte, 100), src[:0])
		if err == nil || n > 0 {
			t.Fatalf("decompress should fail, but got %d", n)
		}
	}
}

func TestUncompressed(t *testing.T) {
	testCompress(t, NewCompressor("none"))
}

func TestZstd(t *testing.T) {
	testCompress(t, NewCompressor("zstd"))
}

func TestLZ4(t *testing.T) {
	testCompress(t, NewCompressor("lz4"))
}

func benchmarkDecompress(b *testing.B, comp Compressor) {
	f, _ := os.Open(os.Getenv("PAYLOAD"))
	var c = make([]byte, 5<<20)
	var d = make([]byte, 4<<20)
	n, err := io.ReadFull(f, d)
	f.Close()
	if err != nil {
		b.Skip()
		return
	}
	d = d[:n]
	n, err = comp.Compress(c[:4<<20], d)
	if err != nil {
		b.Errorf("compress: %s", err)
		b.FailNow()
	}
	c = c[:n]
	// println("compres", comp.Name(), len(c), len(d))
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		n, err := comp.Decompress(d, c)
		if err != nil {
			b.Errorf("decompress %d %s", n, err)
			b.FailNow()
		}
		b.SetBytes(int64(len(d)))
	}
}

func BenchmarkDecompressZstd(b *testing.B) {
	benchmarkDecompress(b, NewCompressor("zstd"))
}

func BenchmarkDecompressLZ4(b *testing.B) {
	benchmarkDecompress(b, LZ4{})
}

func BenchmarkDecompressNone(b *testing.B) {
	benchmarkDecompress(b, NewCompressor("none"))
}

func benchmarkCompress(b *testing.B, comp Compressor) {
	f, _ := os.Open(os.Getenv("PAYLOAD"))
	var d = make([]byte, 4<<20)
	n, err := io.ReadFull(f, d)
	f.Close()
	if err != nil {
		b.Skip()
		return
	}
	d = d[:n]
	var c = make([]byte, 5<<20)
	// println("compres", comp.Name(), len(c), len(d))
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		n, err := comp.Compress(c, d)
		if err != nil {
			b.Errorf("compress %d %s", n, err)
			b.FailNow()
		}
		b.SetBytes(int64(len(d)))
	}
}

func BenchmarkCompressZstd(b *testing.B) {
	benchmarkCompress(b, NewCompressor("Zstd"))
}

func BenchmarkCompressCLZ4(b *testing.B) {
	benchmarkCompress(b, LZ4{})
}
func BenchmarkCompressNone(b *testing.B) {
	benchmarkCompress(b, NewCompressor("none"))
}


================================================
FILE: pkg/fs/fs.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"os"
	"path"
	"runtime/trace"
	"sort"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/acl"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/prometheus/client_golang/prometheus"
)

var logger = utils.GetLogger("juicefs")

type Ino = meta.Ino
type Attr = meta.Attr
type LogContext = vfs.LogContext

func IsExist(err error) bool {
	return err == syscall.EEXIST || err == syscall.EACCES || err == syscall.EPERM
}

func IsNotExist(err error) bool {
	return err == syscall.ENOENT
}

func IsNotEmpty(err error) bool {
	return err == syscall.ENOTEMPTY
}

func errstr(e error) string {
	if e == nil {
		return "OK"
	}
	if eno, ok := e.(syscall.Errno); ok && eno == 0 {
		return "OK"
	}
	return e.Error()
}

type FileStat struct {
	name  string
	inode Ino
	attr  *Attr
}

func (fs *FileStat) Inode() Ino   { return fs.inode }
func (fs *FileStat) Name() string { return fs.name }
func (fs *FileStat) Size() int64  { return int64(fs.attr.Length) }
func (fs *FileStat) Mode() os.FileMode {
	attr := fs.attr
	mode := os.FileMode(attr.Mode & 0777)
	if attr.Mode&04000 != 0 {
		mode |= os.ModeSetuid
	}
	if attr.Mode&02000 != 0 {
		mode |= os.ModeSetgid
	}
	if attr.Mode&01000 != 0 {
		mode |= os.ModeSticky
	}
	if attr.AccessACL+attr.DefaultACL > 0 {
		mode |= 1 << 18
	}
	switch attr.Typ {
	case meta.TypeDirectory:
		mode |= os.ModeDir
	case meta.TypeSymlink:
		mode |= os.ModeSymlink
	case meta.TypeFile:
	default:
	}
	return mode
}
func (fs *FileStat) ModTime() time.Time {
	return time.Unix(fs.attr.Mtime, int64(fs.attr.Mtimensec))
}
func (fs *FileStat) IsDir() bool      { return fs.attr.Typ == meta.TypeDirectory }
func (fs *FileStat) IsSymlink() bool  { return fs.attr.Typ == meta.TypeSymlink }
func (fs *FileStat) Sys() interface{} { return fs.attr }
func (fs *FileStat) Uid() int         { return int(fs.attr.Uid) }
func (fs *FileStat) Gid() int         { return int(fs.attr.Gid) }

func (fs *FileStat) Atime() int64 { return fs.attr.Atime*1000 + int64(fs.attr.Atimensec/1e6) }
func (fs *FileStat) Mtime() int64 { return fs.attr.Mtime*1000 + int64(fs.attr.Mtimensec/1e6) }

func (fs *FileStat) Attr() *Attr { return fs.attr }

func AttrToFileInfo(inode Ino, attr *Attr) *FileStat {
	return &FileStat{inode: inode, attr: attr}
}

type entryCache struct {
	inode  Ino
	typ    uint8
	expire time.Time
}

type attrCache struct {
	attr   Attr
	expire time.Time
}

type FileSystem struct {
	conf        *vfs.Config
	reader      vfs.DataReader
	writer      vfs.DataWriter
	m           meta.Meta
	store       chunk.ChunkStore
	cacheFiller *vfs.CacheFiller

	Superuser  string
	Supergroup string

	cacheM          sync.Mutex
	entries         map[Ino]map[string]*entryCache
	attrs           map[Ino]*attrCache
	checkAccessFile time.Duration
	rotateAccessLog int64
	logBuffer       chan string

	readSizeHistogram     prometheus.Histogram
	writtenSizeHistogram  prometheus.Histogram
	opsDurationsHistogram prometheus.Histogram

	registry *prometheus.Registry

	// Pre-parsed subdir prefixes for fast path checking
	subdirPrefixes []string
}

type File struct {
	path  string
	inode Ino
	info  *FileStat
	fs    *FileSystem

	sync.Mutex
	flags    uint32
	offset   int64
	rdata    vfs.FileReader
	wdata    vfs.FileWriter
	dircache []os.FileInfo
	entries  []*meta.Entry
	data     []byte
}

func NewFileSystem(conf *vfs.Config, m meta.Meta, d chunk.ChunkStore, registry *prometheus.Registry) (*FileSystem, error) {
	reader := vfs.NewDataReader(conf, m, d)
	fs := &FileSystem{
		m:               m,
		store:           d,
		conf:            conf,
		cacheFiller:     vfs.NewCacheFiller(conf, m, d),
		reader:          reader,
		writer:          vfs.NewDataWriter(conf, m, d, reader),
		entries:         make(map[meta.Ino]map[string]*entryCache),
		attrs:           make(map[meta.Ino]*attrCache),
		checkAccessFile: time.Minute,
		rotateAccessLog: 300 << 20, // 300 MiB

		readSizeHistogram: prometheus.NewHistogram(prometheus.HistogramOpts{
			Name:    "sdk_read_size_bytes",
			Help:    "size of read distributions.",
			Buckets: prometheus.LinearBuckets(4096, 4096, 32),
		}),
		writtenSizeHistogram: prometheus.NewHistogram(prometheus.HistogramOpts{
			Name:    "sdk_written_size_bytes",
			Help:    "size of write distributions.",
			Buckets: prometheus.LinearBuckets(4096, 4096, 32),
		}),
		opsDurationsHistogram: prometheus.NewHistogram(prometheus.HistogramOpts{
			Name:    "sdk_ops_durations_histogram_seconds",
			Help:    "Operations latency distributions.",
			Buckets: prometheus.ExponentialBuckets(0.00001, 1.8, 29),
		}),
		registry: registry,
	}

	// Pre-parse subdir prefixes for fast path checking
	if conf.Subdir != "" {
		subdirs := strings.Split(conf.Subdir, ",")
		fs.subdirPrefixes = make([]string, 0, len(subdirs))
		for _, prefix := range subdirs {
			prefix = strings.TrimSpace(prefix)
			if prefix != "" {
				fs.subdirPrefixes = append(fs.subdirPrefixes, prefix)
			}
		}
	}

	go fs.cleanupCache()
	if conf.AccessLog != "" {
		f, err := os.OpenFile(conf.AccessLog, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
		if err != nil {
			logger.Errorf("Open access log %s: %s", conf.AccessLog, err)
		} else {
			_ = os.Chmod(conf.AccessLog, 0666)
			fs.logBuffer = make(chan string, 1024)
			go fs.flushLog(f, fs.logBuffer, conf.AccessLog)
		}
	}
	return fs, nil
}

func (fs *FileSystem) InitMetrics(reg prometheus.Registerer) {
	if reg != nil {
		reg.MustRegister(fs.readSizeHistogram)
		reg.MustRegister(fs.writtenSizeHistogram)
		reg.MustRegister(fs.opsDurationsHistogram)
		vfs.InitMemoryBufferMetrics(fs.writer, fs.reader, reg)
	}
}

func (fs *FileSystem) cleanupCache() {
	for {
		fs.cacheM.Lock()
		now := time.Now()
		var cnt int
		for inode, it := range fs.attrs {
			if now.After(it.expire) {
				delete(fs.attrs, inode)
			}
			cnt++
			if cnt > 1000 {
				break
			}
		}
		cnt = 0
	OUTER:
		for inode, es := range fs.entries {
			for n, e := range es {
				if now.After(e.expire) {
					delete(es, n)
					if len(es) == 0 {
						delete(fs.entries, inode)
					}
				}
				cnt++
				if cnt > 1000 {
					break OUTER
				}
			}
		}
		fs.cacheM.Unlock()
		time.Sleep(time.Second)
	}
}

func (fs *FileSystem) InvalidateEntry(parent Ino, name string) {
	fs.cacheM.Lock()
	defer fs.cacheM.Unlock()
	es, ok := fs.entries[parent]
	if ok {
		delete(es, name)
		if len(es) == 0 {
			delete(fs.entries, parent)
		}
	}
}

func (fs *FileSystem) InvalidateAttr(ino Ino) {
	fs.cacheM.Lock()
	defer fs.cacheM.Unlock()
	delete(fs.attrs, ino)
}

func (fs *FileSystem) log(ctx LogContext, format string, args ...interface{}) {
	used := ctx.Duration()
	fs.opsDurationsHistogram.Observe(used.Seconds())
	if fs.logBuffer == nil {
		return
	}
	now := utils.Now()
	cmd := fmt.Sprintf(format, args...)
	ts := now.Format("2006.01.02 15:04:05.000000")
	cmd += fmt.Sprintf(" <%.6f>", used.Seconds())
	line := fmt.Sprintf("%s [uid:%d,gid:%d,pid:%d] %s\n", ts, ctx.Uid(), ctx.Gid(), ctx.Pid(), cmd)
	select {
	case fs.logBuffer <- line:
	default:
		logger.Debugf("log dropped: %s", line[:len(line)-1])
	}
}

func (fs *FileSystem) flushLog(f *os.File, logBuffer chan string, path string) {
	buf := make([]byte, 0, 128<<10)
	var lastcheck = time.Now()
	for {
		line := <-logBuffer
		buf = append(buf[:0], []byte(line)...)
	LOOP:
		for len(buf) < (128 << 10) {
			select {
			case line = <-logBuffer:
				buf = append(buf, []byte(line)...)
			default:
				break LOOP
			}
		}
		_, err := f.Write(buf)
		if err != nil {
			logger.Errorf("write access log: %s", err)
			break
		}
		if lastcheck.Add(fs.checkAccessFile).After(time.Now()) {
			continue
		}
		lastcheck = time.Now()
		var fi os.FileInfo
		fi, err = f.Stat()
		if err == nil && fi.Size() > fs.rotateAccessLog {
			_ = f.Close()
			fi, err = os.Stat(path)
			if err == nil && fi.Size() > fs.rotateAccessLog {
				tmp := fmt.Sprintf("%s.%p", path, fs)
				if os.Rename(path, tmp) == nil {
					for i := 6; i > 0; i-- {
						_ = os.Rename(path+"."+strconv.Itoa(i), path+"."+strconv.Itoa(i+1))
					}
					_ = os.Rename(tmp, path+".1")
				} else {
					fi, err = os.Stat(path)
					if err == nil && fi.Size() > fs.rotateAccessLog*7 {
						logger.Infof("can't rename %s, truncate it", path)
						_ = os.Truncate(path, 0)
					}
				}
			}
			f, err = os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
			if err != nil {
				logger.Errorf("open %s: %s", path, err)
				break
			}
			_ = os.Chmod(path, 0666)
		}
	}
}

func (fs *FileSystem) Meta() meta.Meta {
	return fs.m
}

func (fs *FileSystem) StatFS(ctx meta.Context) (totalspace uint64, availspace uint64) {
	defer trace.StartRegion(context.TODO(), "fs.StatFS").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "StatFS (): (%d,%d)", totalspace, availspace) }()
	var iused, iavail uint64
	_ = fs.m.StatFS(ctx, meta.RootInode, &totalspace, &availspace, &iused, &iavail)
	return
}

// open file without following symlink
func (fs *FileSystem) Lopen(ctx meta.Context, path string, flags uint32) (f *File, err syscall.Errno) {
	return fs.open(ctx, path, flags, false)
}

func (fs *FileSystem) Open(ctx meta.Context, path string, flags uint32) (*File, syscall.Errno) {
	return fs.open(ctx, path, flags, true)
}

func (fs *FileSystem) open(ctx meta.Context, path string, flags uint32, followLink bool) (f *File, err syscall.Errno) {
	_, task := trace.NewTask(context.TODO(), "Open")
	defer task.End()
	l := vfs.NewLogContext(ctx)
	if flags != 0 {
		defer func() { fs.log(l, "Open (%s,%d): %s", path, flags, errstr(err)) }()
	} else {
		defer func() { fs.log(l, "Lookup (%s): %s", path, errstr(err)) }()
	}
	var fi *FileStat
	fi, err = fs.resolve(ctx, path, followLink)
	if err != 0 {
		return
	}

	if flags != 0 && !fi.IsDir() {
		var oflags uint32 = syscall.O_RDONLY
		if flags == vfs.MODE_MASK_W {
			oflags = syscall.O_WRONLY
		} else if flags&vfs.MODE_MASK_W != 0 {
			oflags = syscall.O_RDWR
		}
		err = fs.m.Open(ctx, fi.inode, oflags, fi.attr)
		if err != 0 {
			return
		}
	}

	f = &File{}
	f.path = path
	f.inode = fi.inode
	f.info = fi
	f.fs = fs
	f.flags = flags
	switch fi.inode {
	case vfs.ConfigInode:
		fs.conf.Format = fs.Meta().GetFormat()
		fs.conf.Format.RemoveSecret()
		f.data, _ = json.MarshalIndent(fs.conf, "", " ")
		f.info.attr.Length = uint64(len(f.data))
	case vfs.StatsInode:
		f.data = vfs.CollectMetrics(fs.registry)
		f.info.attr.Length = uint64(len(f.data))
	}
	return
}

func (fs *FileSystem) Access(ctx meta.Context, path string, flags int) (err syscall.Errno) {
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Access (%s): %s", path, errstr(err)) }()
	var fi *FileStat
	fi, err = fs.resolve(ctx, path, true)
	if err != 0 {
		return
	}

	if ctx.Uid() != 0 && flags != 0 {
		err = fs.m.Access(ctx, fi.inode, uint8(flags), fi.attr)
	}
	return
}

func (fs *FileSystem) Stat(ctx meta.Context, path string) (fi *FileStat, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Stat").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Stat (%s): %s", path, errstr(err)) }()
	return fs.resolve(ctx, path, true)
}

func (fs *FileSystem) Lstat(ctx meta.Context, path string) (fi *FileStat, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Lstat").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Lstat (%s): %s", path, errstr(err)) }()
	return fs.resolve(ctx, path, false)
}

// parentDir returns parent of /foo/bar/ as /foo
func parentDir(p string) string {
	return path.Dir(strings.TrimRight(p, "/"))
}

func (fs *FileSystem) Mkdir(ctx meta.Context, p string, mode uint16, umask uint16) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Mkdir").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Mkdir (%s, %o): %s", p, mode, errstr(err)) }()
	if p == "/" {
		return syscall.EEXIST
	}
	fi, err := fs.resolve(ctx, parentDir(p), true)
	if err != 0 {
		return err
	}
	var inode Ino
	err = fs.m.Mkdir(ctx, fi.inode, path.Base(p), mode, umask, 0, &inode, nil)
	if err == syscall.ENOENT && fi.inode != 1 {
		// parent be moved into trash, try again
		if fs.conf.DirEntryTimeout > 0 {
			parent := parentDir(p)
			if fi, err := fs.resolve(ctx, parentDir(parent), true); err == 0 {
				fs.InvalidateEntry(fi.inode, path.Base(parent))
			}
		}
		if fi2, e := fs.resolve(ctx, parentDir(p), true); e != 0 {
			return e
		} else if fi2.inode != fi.inode {
			err = fs.m.Mkdir(ctx, fi2.inode, path.Base(p), mode, umask, 0, &inode, nil)
		}
	}
	fs.InvalidateEntry(fi.inode, path.Base(p))
	return
}

func (fs *FileSystem) MkdirAll(ctx meta.Context, p string, mode uint16, umask uint16) (err syscall.Errno) {
	return fs.MkdirAll0(ctx, p, mode, umask, true)
}

func (fs *FileSystem) MkdirAll0(ctx meta.Context, p string, mode uint16, umask uint16, existOK bool) (err syscall.Errno) {
	err = fs.Mkdir(ctx, p, mode, umask)
	if err == syscall.ENOENT {
		err = fs.MkdirAll(ctx, parentDir(p), mode, umask)
		if err == 0 {
			err = fs.Mkdir(ctx, p, mode, umask)
		}
	}
	if existOK && err == syscall.EEXIST {
		err = 0
	}
	return err
}

func (fs *FileSystem) Unlink(ctx meta.Context, p string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Unlink").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Unlink (%s): %s", p, errstr(err)) }()
	return fs.Delete0(ctx, p, true)
}

func (fs *FileSystem) Delete(ctx meta.Context, p string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Delete").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Delete (%s): %s", p, errstr(err)) }()
	return fs.Delete0(ctx, p, false)
}

func (fs *FileSystem) BatchDeleteEntries(ctx meta.Context, parent string, ps []string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.BatchDeleteEntries").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "BatchDeleteEntries : %s", errstr(err)) }()
	parentInfo, errno := fs.Stat(ctx, parent)
	if errno != 0 {
		return errno
	}
	var entries []*meta.Entry
	for _, p := range ps {
		fi, e := fs.Stat(ctx, p)
		if errors.Is(e, syscall.ENOENT) {
			continue
		}
		if e != 0 {
			return e
		}
		entries = append(entries, &meta.Entry{Inode: fi.Inode(), Name: []byte(fi.Name()), Attr: fi.Attr()})
	}
	if len(entries) == 0 {
		return 0
	}
	eno := fs.m.BatchUnlink(ctx, parentInfo.inode, entries, nil, false)
	for _, p := range ps {
		fs.InvalidateEntry(parentInfo.inode, path.Base(p))
	}
	return eno
}

func (fs *FileSystem) Delete0(ctx meta.Context, p string, callByUnlink bool) (err syscall.Errno) {
	parent, err := fs.resolve(ctx, parentDir(p), true)
	if err != 0 {
		return
	}
	fi, err := fs.resolve(ctx, p, false)
	if err != 0 {
		return
	}
	if fi.IsDir() {
		if callByUnlink {
			err = syscall.EISDIR
			return
		}
		err = fs.m.Rmdir(ctx, parent.inode, path.Base(p))
	} else {
		err = fs.m.Unlink(ctx, parent.inode, path.Base(p))
	}
	fs.InvalidateEntry(parent.inode, path.Base(p))
	return
}

func (fs *FileSystem) Rmdir(ctx meta.Context, p string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Rmdir").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Rmdir (%s): %s", p, errstr(err)) }()
	parent, err := fs.resolve(ctx, parentDir(p), true)
	if err != 0 {
		return
	}
	err = fs.m.Rmdir(ctx, parent.inode, path.Base(p))
	fs.InvalidateEntry(parent.inode, path.Base(p))
	return
}

func (fs *FileSystem) Rmr(ctx meta.Context, p string, skipTrash bool, numthreads int) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Rmr").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Rmr (%s): %s", p, errstr(err)) }()
	parent, err := fs.resolve(ctx, parentDir(p), true)
	if err != 0 {
		return
	}
	err = fs.m.Remove(ctx, parent.inode, path.Base(p), skipTrash, numthreads, nil)
	fs.InvalidateEntry(parent.inode, path.Base(p))
	return
}

func trimDotsForRename(paths []string) (res []string) {
	for i, p := range paths {
		if p == "." {
			paths[i] = ""
		} else if p == ".." {
			if i > 0 {
				paths[i] = ""
				paths[i-1] = ""
			}
		}
	}
	for _, p := range paths {
		if p != "" {
			res = append(res, p)
		}
	}

	return
}

func (fs *FileSystem) Rename(ctx meta.Context, oldpath string, newpath string, flags uint32) (err syscall.Errno) {
	oss := trimDotsForRename(strings.Split(oldpath, "/"))
	nss := trimDotsForRename(strings.Split(newpath, "/"))
	var err0 syscall.Errno

	// check if oldpath is ancestor of newpath
	for i := 0; i < len(oss); {
		if i >= len(nss) || oss[i] != nss[i] {
			break
		} else { // oss[i] == nss[i]
			i++
			if i == len(oss) && i == len(nss) {
				break
			} else if i == len(oss) {
				err0 = syscall.EINVAL
				break
			}
		}
	}

	defer trace.StartRegion(context.TODO(), "fs.Rename").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Rename (%s,%s,%d): %s", oldpath, newpath, flags, errstr(err)) }()
	oldfi, err := fs.resolve(ctx, parentDir(oldpath), true)
	if err != 0 {
		return
	}
	newfi, err := fs.resolve(ctx, parentDir(newpath), true)
	if err != 0 {
		return
	}
	if err0 != 0 {
		return err0
	}
	err = fs.m.Rename(ctx, oldfi.inode, path.Base(oldpath), newfi.inode, path.Base(newpath), flags, nil, nil)
	fs.InvalidateEntry(oldfi.inode, path.Base(oldpath))
	fs.InvalidateEntry(newfi.inode, path.Base(newpath))
	return
}

func (fs *FileSystem) Link(ctx meta.Context, src string, dst string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Link").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Link (%s,%s): %s", src, dst, errstr(err)) }()

	fi, err := fs.resolve(ctx, src, false)
	if err != 0 {
		return
	}
	pi, err := fs.resolve(ctx, parentDir(dst), true)
	if err != 0 {
		return
	}
	err = fs.m.Link(ctx, fi.inode, pi.inode, path.Base(dst), nil)
	fs.InvalidateEntry(pi.inode, path.Base(dst))
	return
}

func (fs *FileSystem) Symlink(ctx meta.Context, target string, link string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Symlink").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Symlink (%s,%s): %s", target, link, errstr(err)) }()
	if strings.HasSuffix(link, "/") {
		return syscall.EINVAL
	}
	fi, err := fs.resolve(ctx, parentDir(link), true)
	if err != 0 {
		return
	}
	err = fs.m.Symlink(ctx, fi.inode, path.Base(link), target, nil, nil)
	fs.InvalidateEntry(fi.inode, path.Base(link))
	return
}

func (fs *FileSystem) Readlink(ctx meta.Context, link string) (path []byte, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Readlink").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Readlink (%s): %s (%d)", link, errstr(err), len(path)) }()
	fi, err := fs.resolve(ctx, link, false)
	if err != 0 {
		return
	}
	err = fs.m.ReadLink(ctx, fi.inode, &path)
	return
}

func (fs *FileSystem) Truncate(ctx meta.Context, path string, length uint64) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Truncate").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Truncate (%s,%d): %s", path, length, errstr(err)) }()
	fi, err := fs.resolve(ctx, path, true)
	if err != 0 {
		return
	}
	if fi.IsDir() {
		return syscall.EISDIR
	}
	err = fs.m.Truncate(ctx, fi.inode, 0, length, nil, false)
	return
}

func (fs *FileSystem) CopyFileRange(ctx meta.Context, src string, soff uint64, dst string, doff uint64, size uint64) (written uint64, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.CopyFileRange").End()
	l := vfs.NewLogContext(ctx)
	defer func() {
		fs.log(l, "CopyFileRange (%s,%d,%s,%d,%d): (%d,%s)", dst, doff, src, soff, size, written, errstr(err))
	}()
	var dfi, sfi *FileStat
	dfi, err = fs.resolve(ctx, dst, true)
	if err != 0 {
		return
	}
	sfi, err = fs.resolve(ctx, src, true)
	if err != 0 {
		return
	}
	err = fs.m.CopyFileRange(ctx, sfi.inode, soff, dfi.inode, doff, size, 0, &written, nil)
	return
}

func (fs *FileSystem) SetXattr(ctx meta.Context, p string, name string, value []byte, flags uint32) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.SetXattr").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "SetXAttr (%s,%s,%d,%d): %s", p, name, len(value), flags, errstr(err)) }()
	fi, err := fs.resolve(ctx, p, true)
	if err != 0 {
		return
	}
	err = fs.m.SetXattr(ctx, fi.inode, name, value, flags)
	return
}

func (fs *FileSystem) GetXattr(ctx meta.Context, p string, name string) (result []byte, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.GetXattr").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "GetXattr (%s,%s): (%d,%s)", p, name, len(result), errstr(err)) }()
	fi, err := fs.resolve(ctx, p, true)
	if err != 0 {
		return
	}
	err = fs.m.GetXattr(ctx, fi.inode, name, &result)
	return
}

func (fs *FileSystem) ListXattr(ctx meta.Context, p string) (names []byte, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.ListXattr").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "ListXattr (%s): (%d,%s)", p, len(names), errstr(err)) }()
	fi, err := fs.resolve(ctx, p, true)
	if err != 0 {
		return
	}
	err = fs.m.ListXattr(ctx, fi.inode, &names)
	return
}

func (fs *FileSystem) RemoveXattr(ctx meta.Context, p string, name string) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.RemoveXattr").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "RemoveXattr (%s,%s): %s", p, name, errstr(err)) }()
	fi, err := fs.resolve(ctx, p, true)
	if err != 0 {
		return
	}
	err = fs.m.RemoveXattr(ctx, fi.inode, name)
	return
}

func (fs *FileSystem) GetFacl(ctx meta.Context, p string, acltype uint8, rule *acl.Rule) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.GetFacl").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "GetFacl (%s,%d): %s", p, acltype, errstr(err)) }()
	fi, err := fs.resolve(ctx, p, true)
	if err != 0 {
		return
	}
	err = fs.m.GetFacl(ctx, fi.inode, acltype, rule)
	return
}

func (fs *FileSystem) SetFacl(ctx meta.Context, p string, acltype uint8, rule *acl.Rule) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.SetFacl").End()
	l := vfs.NewLogContext(ctx)
	defer func() {
		fs.log(l, "SetFacl (%s,%d,%v): %s", p, acltype, rule, errstr(err))
	}()
	fi, err := fs.resolve(ctx, p, true)
	if err != 0 {
		return
	}
	if acltype == acl.TypeDefault && fi.Mode().IsRegular() {
		if rule.IsEmpty() {
			return
		} else {
			return syscall.ENOTSUP
		}
	}
	if rule.IsEmpty() {
		oldRule := acl.EmptyRule()
		if err = fs.m.GetFacl(ctx, fi.inode, acltype, oldRule); err != 0 {
			return err
		}
		rule.Owner = oldRule.Owner
		rule.Other = oldRule.Other
		rule.Group = oldRule.Group & oldRule.Mask
	}
	err = fs.m.SetFacl(ctx, fi.inode, acltype, rule)
	return
}

func (fs *FileSystem) lookup(ctx meta.Context, parent Ino, name string, inode *Ino, attr *Attr) (err syscall.Errno) {
	now := time.Now()
	if fs.conf.DirEntryTimeout > 0 || fs.conf.EntryTimeout > 0 {
		fs.cacheM.Lock()
		es, ok := fs.entries[parent]
		if ok {
			e, ok := es[name]
			if ok {
				if now.Before(e.expire) {
					ac := fs.attrs[e.inode]
					fs.cacheM.Unlock()
					*inode = e.inode
					if ac == nil || now.After(ac.expire) {
						err = fs.m.GetAttr(ctx, e.inode, attr)
						if err == 0 && fs.conf.AttrTimeout > 0 {
							fs.cacheM.Lock()
							fs.attrs[e.inode] = &attrCache{*attr, now.Add(fs.conf.AttrTimeout)}
							fs.cacheM.Unlock()
						}
					} else {
						*attr = ac.attr
					}
					return err
				}
				delete(es, name)
				if len(es) == 0 {
					delete(fs.entries, parent)
				}
			}
		}
		fs.cacheM.Unlock()
	}

	err = fs.m.Lookup(ctx, parent, name, inode, attr, false)
	if err == 0 && (fs.conf.DirEntryTimeout > 0 && attr.Typ == meta.TypeDirectory || fs.conf.EntryTimeout > 0 && attr.Typ != meta.TypeDirectory) {
		fs.cacheM.Lock()
		if fs.conf.AttrTimeout > 0 {
			fs.attrs[*inode] = &attrCache{*attr, now.Add(fs.conf.AttrTimeout)}
		}
		es, ok := fs.entries[parent]
		if !ok {
			es = make(map[string]*entryCache)
			fs.entries[parent] = es
		}
		var expire time.Time
		if attr.Typ == meta.TypeDirectory {
			expire = now.Add(fs.conf.DirEntryTimeout)
		} else {
			expire = now.Add(fs.conf.EntryTimeout)
		}
		es[name] = &entryCache{*inode, attr.Typ, expire}
		fs.cacheM.Unlock()
	}
	// TODO: support for `negative_dentry_cache`?
	return err
}

func (fs *FileSystem) resolve(ctx meta.Context, p string, followLastSymlink bool) (fi *FileStat, err syscall.Errno) {
	return fs.doResolve(ctx, p, followLastSymlink, make(map[Ino]struct{}))
}

func (fs *FileSystem) doResolve(ctx meta.Context, p string, followLastSymlink bool, visited map[Ino]struct{}) (fi *FileStat, err syscall.Errno) {
	p = path.Clean(p)

	// Check if path is allowed by any of the configured subdirs
	if len(fs.subdirPrefixes) > 0 {
		allowed := false
		plen := len(p)
		for _, prefix := range fs.subdirPrefixes {
			prefixLen := len(prefix)
			// Fast path: check length first to avoid string comparison if possible
			if prefixLen > plen {
				continue
			}
			// Check if path starts with prefix and is either the prefix itself or has '/' after prefix
			// This prevents matching "/test" with "/testfile" (should match "/test" or "/test/...")
			if strings.HasPrefix(p, prefix) && (prefixLen == plen || p[prefixLen] == '/') {
				allowed = true
				break
			}
		}
		if !allowed {
			return nil, syscall.EACCES
		}
	}
	var inode Ino
	var attr = &Attr{}

	if fs.conf.FastResolve {
		err = fs.m.Resolve(ctx, 1, p, &inode, attr)
		if err == 0 {
			fi = AttrToFileInfo(inode, attr)
			p = strings.TrimRight(p, "/")
			ss := strings.Split(p, "/")
			fi.name = ss[len(ss)-1]
			if fi.IsSymlink() && followLastSymlink {
				// fast resolve can't follow symlink
				err = syscall.ENOTSUP
			}
		}
		if err != syscall.ENOTSUP {
			return
		}
	}

	// Fallback to the default implementation that calls `fs.m.Lookup` for each directory along the path.
	// It might be slower for deep directories, but it works for every meta that implements `Lookup`.
	parent := Ino(1)
	ss := strings.Split(p, "/")
	for i, name := range ss {
		if len(name) == 0 {
			continue
		}
		if parent == meta.RootInode && i == len(ss)-1 && vfs.IsSpecialName(name) {
			inode, attr := vfs.GetInternalNodeByName(name)
			fi = AttrToFileInfo(inode, attr)
			parent = inode
			break
		}
		if parent > 1 {
			if (name == "." || name == "..") && attr.Typ != meta.TypeDirectory {
				return nil, syscall.ENOTDIR
			}
			if err := fs.m.Access(ctx, parent, meta.MODE_MASK_X, attr); err != 0 {
				return nil, err
			}
		}

		var inode Ino
		var resolved bool

		err = fs.lookup(ctx, parent, name, &inode, attr)
		if i == len(ss)-1 {
			resolved = true
		}
		if err != 0 {
			return
		}
		fi = AttrToFileInfo(inode, attr)
		if (!resolved || followLastSymlink) && fi.IsSymlink() {
			if _, ok := visited[inode]; ok {
				logger.Errorf("find a loop symlink: %d", inode)
				return nil, syscall.ELOOP
			} else {
				visited[inode] = struct{}{}
			}
			var buf []byte
			err = fs.m.ReadLink(ctx, inode, &buf)
			if err != 0 {
				return
			}
			target := string(buf)
			if strings.Contains(target, "://") {
				return &FileStat{name: target}, syscall.ENOTSUP
			}
			if strings.HasPrefix(target, "/") {
				mp := fs.conf.Mountpoint
				if !strings.HasSuffix(mp, "/") {
					mp += "/"
				}
				if strings.HasPrefix(target, mp) {
					target = target[len(mp):]
				} else {
					fi.name = "file:" + target
					logger.Errorf("external link: %s -> %s", p, target)
					return fi, utils.ErrExtlink
				}
			} else {
				target = path.Join(strings.Join(ss[:i], "/"), target)
			}
			fi, err = fs.doResolve(ctx, target, followLastSymlink, visited)
			if err != 0 {
				return
			}
			inode = fi.Inode()
			attr = fi.attr
		}
		fi.name = name
		parent = inode
	}
	if parent == meta.RootInode {
		err = fs.m.GetAttr(ctx, parent, attr)
		if err != 0 {
			return
		}
		fi = AttrToFileInfo(1, attr)
	}
	return fi, 0
}

func (fs *FileSystem) Create(ctx meta.Context, p string, mode uint16, umask uint16) (f *File, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Create").End()
	l := vfs.NewLogContext(ctx)
	defer func() { fs.log(l, "Create (%s,%o): %s", p, mode, errstr(err)) }()
	if strings.HasSuffix(p, "/") {
		return nil, syscall.EINVAL
	}
	var inode Ino
	var attr = &Attr{}
	var fi *FileStat
	fi, err = fs.resolve(ctx, parentDir(p), true)
	if err != 0 {
		return
	}
	err = fs.m.Create(ctx, fi.inode, path.Base(p), mode&07777, umask, syscall.O_EXCL, &inode, attr)
	if err == syscall.ENOENT && fi.inode != 1 {
		// dir be moved into trash, try again
		if fs.conf.DirEntryTimeout > 0 {
			parent := parentDir(p)
			if fi, err := fs.resolve(ctx, parentDir(parent), true); err == 0 {
				fs.InvalidateEntry(fi.inode, path.Base(parent))
			}
		}
		if fi2, e := fs.resolve(ctx, parentDir(p), true); e != 0 {
			return nil, e
		} else if fi2.inode != fi.inode {
			err = fs.m.Create(ctx, fi2.inode, path.Base(p), mode&07777, umask, syscall.O_EXCL, &inode, attr)
		}
	}
	if err == 0 {
		fi = AttrToFileInfo(inode, attr)
		fi.name = path.Base(p)
		f = &File{}
		f.flags = vfs.MODE_MASK_W
		f.path = p
		f.inode = fi.inode
		f.info = fi
		f.fs = fs
	}
	fs.InvalidateEntry(fi.inode, path.Base(p))
	return
}

func (fs *FileSystem) Flush() error {
	buffer := fs.logBuffer
	if buffer != nil {
		buffer <- "" // flush
	}
	fs.Meta().FlushSession()
	return nil
}

func (fs *FileSystem) Close() error {
	_ = fs.Flush()
	buffer := fs.logBuffer
	if buffer != nil {
		fs.logBuffer = nil
		close(buffer)
	}
	return nil
}

func (fs *FileSystem) Clone(ctx meta.Context, src, dst string, preserve bool) (err syscall.Errno) {
	srcParent, err := fs.resolve(ctx, parentDir(src), true)
	if err != 0 {
		return
	}
	var srcIno Ino
	err = fs.lookup(ctx, srcParent.Inode(), path.Base(src), &srcIno, &Attr{})
	if err != 0 {
		return
	}
	dstParent, err := fs.resolve(ctx, parentDir(dst), true)
	if err != 0 {
		return
	}

	var count, total uint64
	umask := uint16(utils.GetUmask())

	var cmode uint8
	if preserve {
		cmode |= meta.CLONE_MODE_PRESERVE_ATTR
	}

	if err = fs.m.Clone(meta.NewContext(ctx.Pid(), ctx.Uid(), ctx.Gids()), srcParent.Inode(), srcIno, dstParent.Inode(), path.Base(dst), cmode, umask, meta.CLONE_DEFAULT_CONCURRENCY, &count, &total); err != 0 {
		logger.Errorf("clone failed srcIno:%d,dstParentIno:%d,dstName:%s,cmode:%d,umask:%d,eno:%v", srcIno, dstParent.Inode(), path.Base(dst), cmode, umask, err)
	}
	return
}

func (fs *FileSystem) Warmup(ctx meta.Context, paths []string, numthreads int, background bool, isEvict bool, isCheck bool, resp *vfs.CacheResponse) {
	action := vfs.WarmupCache
	if isEvict {
		action = vfs.EvictCache
	}
	if isCheck {
		action = vfs.CheckCache
	}

	if background {
		go fs.cacheFiller.Cache(meta.NewContext(ctx.Pid(), ctx.Uid(), ctx.Gids()), action, paths, int(numthreads), resp)
	} else {
		fs.cacheFiller.Cache(meta.NewContext(ctx.Pid(), ctx.Uid(), ctx.Gids()), action, paths, int(numthreads), resp)
	}
}

func (fs *FileSystem) HandleQuota(ctx meta.Context, path string, cmd uint8, capacity, inodes uint64, strict, repair, create bool) (qs map[string]*meta.Quota, err syscall.Errno) {
	l := vfs.NewLogContext(ctx)
	defer func() {
		fs.log(l, "QuotaCtl (%s,%d,%d,%d,%t,%t,%t): %s", path, cmd, capacity, inodes, create, repair, strict, errstr(err))
	}()
	if cmd == meta.QuotaSet && capacity == 0 && inodes == 0 {
		return nil, syscall.EINVAL
	}
	qs = make(map[string]*meta.Quota)
	if cmd == meta.QuotaSet {
		q := &meta.Quota{MaxSpace: -1, MaxInodes: -1} // negative means no change
		if capacity > 0 {
			q.MaxSpace = int64(capacity)
		}
		if inodes > 0 {
			q.MaxInodes = int64(inodes)
		}
		qs[path] = q
	}

	if _err := fs.m.HandleQuota(meta.Background(), cmd, path, 0, 0, qs, strict, repair, create); _err != nil {
		if strings.HasPrefix(_err.Error(), "no quota for inode") {
			return qs, 0
		}
		err = syscall.EINVAL
	}
	return
}

// File

func (f *File) FS() *FileSystem {
	return f.fs
}

func (f *File) Inode() Ino {
	return f.inode
}

func (f *File) Name() string {
	return f.path
}

func (f *File) Stat() (fi os.FileInfo, err error) {
	return f.info, nil
}

func (f *File) Chmod(ctx meta.Context, mode uint16) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Chmod").End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Chmod (%s,%o): %s", f.path, mode, errstr(err)) }()
	var attr = Attr{Mode: mode}
	err = f.fs.m.SetAttr(ctx, f.inode, meta.SetAttrMode, 0, &attr)
	f.fs.InvalidateAttr(f.inode)
	return
}

func (f *File) Chown(ctx meta.Context, uid uint32, gid uint32) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Chown").End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Chown (%s,%d,%d): %s", f.path, uid, gid, errstr(err)) }()
	var flag uint16
	if uid != uint32(f.info.Uid()) {
		flag |= meta.SetAttrUID
	}
	if gid != uint32(f.info.Gid()) {
		flag |= meta.SetAttrGID
	}
	var attr = Attr{Uid: uid, Gid: gid}
	err = f.fs.m.SetAttr(ctx, f.inode, flag, 0, &attr)
	f.fs.InvalidateAttr(f.inode)
	return
}

func (f *File) Utime(ctx meta.Context, atime, mtime int64) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Utime").End()
	var flag uint16
	if atime >= 0 {
		flag |= meta.SetAttrAtime
	}
	if mtime >= 0 {
		flag |= meta.SetAttrMtime
	}
	if flag == 0 {
		return 0
	}
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Utime (%s,%d,%d): %s", f.path, atime, mtime, errstr(err)) }()
	var attr Attr
	attr.Atime = atime / 1000
	attr.Atimensec = uint32(atime%1000) * 1e6
	attr.Mtime = mtime / 1000
	attr.Mtimensec = uint32(mtime%1000) * 1e6
	err = f.fs.m.SetAttr(ctx, f.inode, flag, 0, &attr)
	f.fs.InvalidateAttr(f.inode)
	return
}

func (f *File) Utime2(ctx meta.Context, atimeSec, atimeNSec, mtimeSec, mtimeNsec int64) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Utime2").End()
	var flag uint16
	if atimeSec >= 0 || atimeNSec >= 0 {
		flag |= meta.SetAttrAtime
	}
	if mtimeSec >= 0 || mtimeNsec >= 0 {
		flag |= meta.SetAttrMtime
	}
	if flag == 0 {
		return 0
	}
	l := vfs.NewLogContext(ctx)
	defer func() {
		f.fs.log(l, "Utime2 (%s,%d,%d,%d,%d): %s", f.path, atimeSec, atimeNSec, mtimeSec, mtimeNsec, errstr(err))
	}()
	var attr Attr
	attr.Atime = atimeSec
	attr.Atimensec = uint32(atimeNSec)
	attr.Mtime = mtimeSec
	attr.Mtimensec = uint32(mtimeNsec)
	err = f.fs.m.SetAttr(ctx, f.inode, flag, 0, &attr)
	f.fs.InvalidateAttr(f.inode)
	return
}

func (f *File) Seek(ctx meta.Context, offset int64, whence int) (int64, error) {
	defer trace.StartRegion(context.TODO(), "fs.Seek").End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Seek (%s,%d,%d): %d", f.path, offset, whence, f.offset) }()
	f.Lock()
	defer f.Unlock()
	switch whence {
	case io.SeekStart:
		f.offset = offset
	case io.SeekCurrent:
		f.offset += offset
	case io.SeekEnd:
		f.offset = f.info.Size() + offset
	}
	return f.offset, nil
}

func (f *File) Read(ctx meta.Context, b []byte) (n int, err error) {
	_, task := trace.NewTask(context.TODO(), "Read")
	defer task.End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Read (%s,%d): (%d,%s)", f.path, len(b), n, errstr(err)) }()
	f.Lock()
	defer f.Unlock()
	n, err = f.pread(ctx, b, f.offset)
	f.offset += int64(n)
	return
}

func (f *File) Pread(ctx meta.Context, b []byte, offset int64) (n int, err error) {
	_, task := trace.NewTask(context.TODO(), "Pread")
	defer task.End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Pread (%s,%d,%d): (%d,%s)", f.path, len(b), offset, n, errstr(err)) }()
	f.Lock()
	defer f.Unlock()
	n, err = f.pread(ctx, b, offset)
	return
}

func (f *File) pread(ctx meta.Context, b []byte, offset int64) (n int, err error) {
	if offset >= f.info.Size() {
		return 0, io.EOF
	}
	if int64(len(b))+offset > f.info.Size() {
		b = b[:f.info.Size()-offset]
	}
	if f.data != nil {
		n := copy(b, f.data[offset:])
		return n, nil
	}
	if f.wdata != nil {
		eno := f.wdata.Flush(ctx)
		if eno != 0 {
			err = eno
			return
		}
	}
	if f.rdata == nil {
		f.rdata = f.fs.reader.Open(f.inode, uint64(f.info.Size()))
	}

	got, eno := f.rdata.Read(ctx, uint64(offset), b)
	for eno == syscall.EAGAIN {
		got, eno = f.rdata.Read(ctx, uint64(offset), b)
	}
	if eno != 0 {
		err = eno
		return
	}
	if got == 0 {
		return 0, io.EOF
	}
	f.fs.readSizeHistogram.Observe(float64(got))
	return got, nil
}

func (f *File) Write(ctx meta.Context, b []byte) (n int, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Write").End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Write (%s,%d): (%d,%s)", f.path, len(b), n, errstr(err)) }()
	f.Lock()
	defer f.Unlock()
	n, err = f.pwrite(ctx, b, f.offset)
	f.offset += int64(n)
	return
}

func (f *File) Pwrite(ctx meta.Context, b []byte, offset int64) (n int, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Pwrite").End()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Pwrite (%s,%d,%d): (%d,%s)", f.path, len(b), offset, n, errstr(err)) }()
	f.Lock()
	defer f.Unlock()
	n, err = f.pwrite(ctx, b, offset)
	return
}

func (f *File) pwrite(ctx meta.Context, b []byte, offset int64) (n int, err syscall.Errno) {
	if f.wdata == nil {
		f.wdata = f.fs.writer.Open(f.inode, uint64(f.info.Size()))
	}
	err = f.wdata.Write(ctx, uint64(offset), b)
	if err != 0 {
		_ = f.wdata.Close(meta.Background())
		f.wdata = nil
		return
	}
	if offset+int64(len(b)) > int64(f.info.attr.Length) {
		f.info.attr.Length = uint64(offset + int64(len(b)))
	}
	f.fs.writtenSizeHistogram.Observe(float64(len(b)))
	return len(b), 0
}

func (f *File) Truncate(ctx meta.Context, length uint64) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Truncate").End()
	f.Lock()
	defer f.Unlock()
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Truncate (%s,%d): %s", f.path, length, errstr(err)) }()
	if f.wdata != nil {
		err = f.wdata.Flush(ctx)
		if err != 0 {
			return
		}
	}
	err = f.fs.m.Truncate(ctx, f.inode, 0, length, nil, false)
	if err == 0 {
		_ = f.fs.m.InvalidateChunkCache(ctx, f.inode, uint32(((length - 1) >> meta.ChunkBits)))
		f.fs.writer.Truncate(f.inode, length)
		f.fs.reader.Truncate(f.inode, length)
		f.info.attr.Length = length
		f.fs.InvalidateAttr(f.inode)
	}
	return
}

func (f *File) Flush(ctx meta.Context) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Flush").End()
	f.Lock()
	defer f.Unlock()
	if f.wdata == nil {
		return
	}
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Flush (%s): %s", f.path, errstr(err)) }()
	err = f.wdata.Flush(ctx)
	f.fs.InvalidateAttr(f.inode)
	return
}

func (f *File) Fsync(ctx meta.Context) (err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Fsync").End()
	f.Lock()
	defer f.Unlock()
	if f.wdata == nil {
		return 0
	}
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Fsync (%s): %s", f.path, errstr(err)) }()
	err = f.wdata.Flush(ctx)
	f.fs.InvalidateAttr(f.inode)
	return
}

func (f *File) Close(ctx meta.Context) (err syscall.Errno) {
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Close (%s): %s", f.path, errstr(err)) }()
	f.Lock()
	defer f.Unlock()
	if f.flags != 0 && !f.info.IsDir() {
		f.offset = 0
		if f.rdata != nil {
			rdata := f.rdata
			f.rdata = nil
			time.AfterFunc(time.Second, func() {
				rdata.Close(meta.Background())
			})
		}
		if f.wdata != nil {
			err = f.wdata.Close(meta.Background())
			f.fs.InvalidateAttr(f.inode)
			f.wdata = nil
		}
		_ = f.fs.m.Close(ctx, f.inode)
	}
	return
}

func (f *File) Readdir(ctx meta.Context, count int) (fi []os.FileInfo, err syscall.Errno) {
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "Readdir (%s,%d): (%s,%d)", f.path, count, errstr(err), len(fi)) }()
	f.Lock()
	defer f.Unlock()
	fi = f.dircache
	if fi == nil {
		var inodes []*meta.Entry
		err = f.fs.m.Readdir(ctx, f.inode, 1, &inodes)
		if err != 0 {
			return
		}
		if f.fs.conf.Meta.SortDir {
			sort.Slice(inodes[2:], func(i, j int) bool {
				return string(inodes[i].Name) < string(inodes[j].Name)
			})
		}
		// skip . and ..
		for _, n := range inodes[2:] {
			i := AttrToFileInfo(n.Inode, n.Attr)
			i.name = string(n.Name)
			fi = append(fi, i)
		}
		f.dircache = fi
	}

	if len(fi) < int(f.offset) {
		return nil, 0
	}
	fi = fi[f.offset:]
	if count > 0 && len(fi) > count {
		fi = fi[:count]
	}
	f.offset += int64(len(fi))
	return
}

func (f *File) ReaddirPlus(ctx meta.Context, offset int) (entries []*meta.Entry, err syscall.Errno) {
	l := vfs.NewLogContext(ctx)
	defer func() { f.fs.log(l, "ReaddirPlus (%s,%d): (%s,%d)", f.path, offset, errstr(err), len(entries)) }()
	f.Lock()
	defer f.Unlock()
	if f.entries == nil {
		var es []*meta.Entry
		err = f.fs.m.Readdir(ctx, f.inode, 1, &es)
		if err != 0 {
			return
		}
		// filter out . and ..
		f.entries = make([]*meta.Entry, 0, len(es))
		for _, e := range es {
			if !bytes.Equal(e.Name, []byte{'.'}) && !bytes.Equal(e.Name, []byte("..")) {
				f.entries = append(f.entries, e)
			}
		}
		if f.fs.conf.Meta.SortDir {
			sort.Slice(f.entries, func(i, j int) bool {
				return string(f.entries[i].Name) < string(f.entries[j].Name)
			})
		}
	}
	if offset >= len(f.entries) {
		offset = len(f.entries)
	}
	entries = f.entries[offset:]
	return
}

func (f *File) Summary(ctx meta.Context, recursive, strict bool) (s *meta.Summary, err syscall.Errno) {
	defer trace.StartRegion(context.TODO(), "fs.Summary").End()
	l := vfs.NewLogContext(ctx)
	defer func() {
		f.fs.log(l, "Summary (%s): %s (%d,%d,%d,%d)", f.path, errstr(err), s.Length, s.Size, s.Files, s.Dirs)
	}()
	s = &meta.Summary{}
	err = f.fs.m.GetSummary(ctx, f.inode, s, recursive, strict)
	return
}

func (f *File) GetTreeSummary(ctx meta.Context, depth, entries uint8, strict bool) (s *meta.TreeSummary, err syscall.Errno) {
	s = &meta.TreeSummary{
		Inode: f.inode,
		Path:  "",
		Type:  meta.TypeDirectory,
	}

	l := vfs.NewLogContext(ctx)
	defer func() {
		f.fs.log(l, "GetTreeSummary (%s,%d,%d,%t): %s (%d,%d,%d)", f.path, depth, entries, strict, errstr(err), s.Size, s.Files, s.Dirs)
	}()
	err = f.fs.m.GetTreeSummary(ctx, s, depth, entries, strict, nil)
	s.Path = path.Base(f.path)
	return
}

func (f *File) GetQuota(ctx meta.Context) (quota *meta.Quota, err error) {
	defer trace.StartRegion(context.TODO(), "fs.getQuota").End()
	l := vfs.NewLogContext(ctx)
	defer func() {
		f.fs.log(l, "getQuota (%s): %s", f.path, errstr(err))
	}()
	err = nil
	qs := make(map[string]*meta.Quota)
	// get filesystem quota if root
	if f.inode == meta.RootInode {
		format := f.fs.m.GetFormat()
		quota = &meta.Quota{
			MaxSpace:  int64(format.Capacity),
			MaxInodes: int64(format.Inodes),
		}
		return quota, err
	}
	// get directory quota
	err = f.fs.m.HandleQuota(ctx, meta.QuotaGet, f.path, 0, 0, qs, false, false, false)
	if err != nil {
		return nil, err
	}
	quota = qs[f.path]
	return quota, err
}


================================================
FILE: pkg/fs/fs_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs

import (
	"io"
	"os"
	"sort"
	"syscall"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/vfs"
)

// mutate_test_job_number: 5
func TestFileStat(t *testing.T) {
	attr := meta.Attr{
		Typ:   meta.TypeDirectory,
		Mode:  07740,
		Atime: 1,
		Mtime: 2,
	}
	st := AttrToFileInfo(2, &attr)
	if st.Inode() != 2 {
		t.Fatalf("inode should be 2")
	}
	if !st.IsDir() {
		t.Fatalf("should be a dir")
	}
	mode := st.Mode()
	if mode&os.ModeSticky == 0 {
		t.Fatalf("sticky bit should be set")
	}
	if mode&os.ModeSetuid == 0 {
		t.Fatalf("suid should be set")
	}
	if mode&os.ModeSetgid == 0 {
		t.Fatalf("sgid should be set")
	}
	if st.ModTime().Unix() != 2 {
		t.Fatalf("unixtimestamp : %d", st.ModTime().Unix())
	}
	if st.Sys() != &attr {
		t.Fatalf("sys should be meta attr")
	}
	attr.Typ = meta.TypeSymlink
	if !st.IsSymlink() {
		t.Fatalf("should be a symlink")
	}
}

// nolint:errcheck
func TestFileSystem(t *testing.T) {
	fs := createTestFS(t)
	ctx := meta.NewContext(1, 1, []uint32{2})
	if total, avail := fs.StatFS(ctx); total != 1<<30 || avail != (1<<30) {
		t.Fatalf("statfs: %d %d", total, avail)
	}
	if e := fs.Access(ctx, "/", 7); e != 0 {
		t.Fatalf("access /: %s", e)
	}
	f, err := fs.Create(ctx, "/hello", 0666, 022)
	if err != 0 {
		t.Fatalf("create /hello: %s", err)
	}
	if f.Name() != "/hello" {
		t.Fatalf("name: %s", f.Name())
	}
	_ = f.Close(ctx)
	f, err = fs.Open(ctx, "/hello", meta.MODE_MASK_R|meta.MODE_MASK_W)
	if err != 0 {
		t.Fatalf("open %s", err)
	}
	if fi, err := f.Stat(); err != nil || fi.Mode() != 0644 {
		t.Fatalf("stat: %s %+v", err, fi)
	}
	if n, err := f.Write(ctx, []byte("world")); err != 0 || n != 5 {
		t.Fatalf("write 5 bytes: %d %s", n, err)
	}
	if err := f.Fsync(ctx); err != 0 {
		t.Fatalf("fsync: %s", err)
	}
	var buf = make([]byte, 10)
	if n, err := f.Pread(ctx, buf, 2); err != nil || n != 3 || string(buf[:n]) != "rld" {
		t.Fatalf("pread(2): %d %s %s", n, err, string(buf[:n]))
	}
	if n, err := f.Seek(ctx, -3, io.SeekEnd); err != nil || n != 2 {
		t.Fatalf("seek 3 bytes before end: %d %s", n, err)
	}
	if n, err := f.Write(ctx, []byte("t")); err != 0 || n != 1 {
		t.Fatalf("write 1 bytes: %d %s", n, err)
	}
	if n, err := f.Seek(ctx, -2, io.SeekCurrent); err != nil || n != 1 {
		t.Fatalf("seek 2 bytes before current: %d %s", n, err)
	}
	if n, err := f.Read(ctx, buf); err != nil || n != 4 || string(buf[:n]) != "otld" {
		t.Fatalf("read(): %d %s %s", n, err, string(buf[:n]))
	}
	if n, err := f.Read(ctx, buf); err != io.EOF || n != 0 {
		t.Fatalf("read(): %d %s %s", n, err, string(buf[:n]))
	}
	if n, err := f.Pwrite(ctx, []byte("t"), 1); err != 0 || n != 1 {
		t.Fatalf("write 1 bytes: %d %s", n, err)
	}
	if e := f.Flush(ctx); e != 0 {
		t.Fatalf("flush /hello: %s", e)
	}

	if e := f.Chmod(ctx, 0640); e != 0 {
		t.Fatalf("chown: %s", e)
	}
	if e := f.Chown(ctx, 1, 2); e != 0 {
		t.Fatalf("chown: %s", e)
	}
	if e := f.Utime(ctx, 1, 2); e != 0 {
		t.Fatalf("utime: %s", e)
	}
	if s, e := f.Summary(ctx, true, true); e != 0 || s.Dirs != 0 || s.Files != 1 || s.Length != 5 || s.Size != 4<<10 {
		t.Fatalf("summary: %s %+v", e, s)
	}
	if e := f.Close(ctx); e != 0 {
		t.Fatalf("close /hello: %s", e)
	}
	if fi, err := fs.Stat(ctx, "/hello"); err != 0 {
		t.Fatalf("stat /hello: %s", err)
	} else if fi.Mode() != 0640 || fi.Uid() != 1 || fi.Gid() != 2 || fi.Atime() != 1 || fi.Mtime() != 2 {
		t.Fatalf("stat /hello: %+v", fi)
	}
	if e := fs.Truncate(ctx, "/hello", 2); e != 0 {
		t.Fatalf("truncate : %s", e)
	}
	if n, e := fs.CopyFileRange(ctx, "/hello", 0, "/hello", 5, 5); e != 0 || n != 2 {
		t.Fatalf("copyfilerange: %s %d", e, n)
	}

	if e := fs.SetXattr(ctx, "/hello", "k", []byte("value"), 0); e != 0 {
		t.Fatalf("setxattr /hello: %s", e)
	}
	if v, e := fs.GetXattr(ctx, "/hello", "k"); e != 0 || string(v) != "value" {
		t.Fatalf("getxattr /hello: %s %s", e, string(v))
	}
	if names, e := fs.ListXattr(ctx, "/hello"); e != 0 || string(names) != "k\x00" {
		t.Fatalf("listxattr /hello: %s %+v", e, names)
	}
	if e := fs.RemoveXattr(ctx, "/hello", "k"); e != 0 {
		t.Fatalf("removexattr /hello: %s", e)
	}

	if e := fs.Symlink(ctx, "hello", "/sym"); e != 0 {
		t.Fatalf("symlink: %s", e)
	}
	if target, e := fs.Readlink(ctx, "/sym"); e != 0 || string(target) != "hello" {
		t.Fatalf("readlink: %s", string(target))
	}
	if fi, err := fs.Stat(ctx, "/sym"); err != 0 || fi.name != "sym" || fi.IsSymlink() {
		t.Fatalf("stat symlink: %s %+v", err, fi)
	}
	if fi, err := fs.Lstat(ctx, "/sym"); err != 0 || fi.name != "sym" || !fi.IsSymlink() {
		t.Fatalf("lstat symlink: %s %+v", err, fi)
	}
	if err := fs.Delete(ctx, "/sym"); err != 0 {
		t.Fatalf("delete /sym: %s", err)
	}

	if _, e := fs.Open(meta.NewContext(2, 2, []uint32{3}), "/hello", meta.MODE_MASK_W); e == 0 || e != syscall.EACCES {
		t.Fatalf("open without permission: %s", e)
	}

	if err := fs.Mkdir(ctx, "/d", 0777, 022); err != 0 {
		t.Fatalf("mkdir /d: %s", err)
	}
	d, e := fs.Open(ctx, "/", 0)
	if e != 0 {
		t.Fatalf("open /: %s", e)
	}
	defer d.Close(ctx)
	if fis, e := d.Readdir(ctx, 0); e != 0 || len(fis) != 2 {
		t.Fatalf("readdir /: %s, %d entries", e, len(fis))
	} else {
		sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
		if fis[0].Name() != "d" || fis[1].Name() != "hello" {
			t.Fatalf("readdir names: %+v", fis)
		}
	}
	if es, e := d.ReaddirPlus(ctx, 0); e != 0 || len(es) != 2 {
		t.Fatalf("readdirplus: %s, %d entries", e, len(es))
	} else {
		sort.Slice(es, func(i, j int) bool { return es[i].Inode < es[j].Inode })
		if string(es[0].Name) != "hello" || string(es[1].Name) != "d" {
			t.Fatalf("readdirplus names: %+v", es)
		}
	}
	if e := fs.Rename(ctx, "/hello", "/d/f", 0); e != 0 {
		t.Fatalf("rename: %s", e)
	}
	if e := fs.Symlink(ctx, "d", "/sd"); e != 0 {
		t.Fatalf("symlink: %s", e)
	}
	if fi, e := fs.Stat(ctx, "/sd/f"); e != 0 || fi.name != "f" {
		t.Fatalf("follow symlink: %s %+v", e, fi)
	}

	if s, e := d.Summary(ctx, true, true); e != 0 || s.Dirs != 2 || s.Files != 2 || s.Length != 7 || s.Size != 16<<10 {
		t.Fatalf("summary: %s %+v", e, s)
	}
	if q, e := d.GetQuota(ctx); e != nil || q.MaxInodes != 0 || q.MaxSpace != (1<<30) {
		t.Fatalf("quota: %s %+v", e, q)
	}
	if e := fs.Delete(ctx, "/d"); e == 0 || !IsNotEmpty(e) {
		t.Fatalf("rmdir: %s", e)
	}
	if err := fs.Delete(ctx, "/d/f"); err != 0 {
		t.Fatalf("delete /d/f: %s", err)
	}
	if err := fs.Delete(ctx, "/d/f"); err == 0 || !IsNotExist(err) {
		t.Fatalf("delete /d/f: %s", err)
	}
	if e := fs.Rmr(ctx, "/d", false, meta.RmrDefaultThreads); e != 0 {
		t.Fatalf("delete /d -r: %s", e)
	}

	time.Sleep(time.Second * 2)
	if e := fs.Flush(); e != nil {
		t.Fatalf("flush : %s", e)
	}
	if e := fs.Close(); e != nil {
		t.Fatalf("close: %s", e)
	}
	if e := fs.Close(); e != nil {
		t.Fatalf("close: %s", e)
	}

	// path with trailing /
	if err := fs.Mkdir(ctx, "/ddd/", 0777, 000); err != 0 {
		t.Fatalf("mkdir /ddd/: %s", err)
	}
	if _, err := fs.Create(ctx, "/ddd/ddd", 0777, 000); err != 0 {
		t.Fatalf("create /ddd/ddd: %s", err)
	}
	if _, err := fs.Create(ctx, "/ddd/fff/", 0777, 000); err != syscall.EINVAL {
		t.Fatalf("create /ddd/fff/: %s", err)
	}
	if err := fs.Delete(ctx, "/ddd/"); err != syscall.ENOTEMPTY {
		t.Fatalf("delete /ddd/: %s", err)
	}
	if err := fs.Rename(ctx, "/ddd/", "/ttt/", 0); err != 0 {
		t.Fatalf("delete /ddd/: %s", err)
	}
	if err := fs.Rmr(ctx, "/ttt/", false, meta.RmrDefaultThreads); err != 0 {
		t.Fatalf("rmr /ttt/: %s", err)
	}
	if _, err := fs.Stat(ctx, "/ttt/"); err != syscall.ENOENT {
		t.Fatalf("stat /ttt/: %s", err)
	}
}

func createTestFS(t *testing.T) *FileSystem {
	m := meta.NewClient("memkv://", nil)
	format := &meta.Format{
		Name:      "test",
		BlockSize: 4096,
		Capacity:  1 << 30,
		DirStats:  true,
	}
	_ = m.Init(format, true)
	var conf = vfs.Config{
		Meta: meta.DefaultConf(),
		Chunk: &chunk.Config{
			BlockSize:   format.BlockSize << 10,
			MaxUpload:   1,
			MaxDownload: 200,
			BufferSize:  100 << 20,
		},
		DirEntryTimeout: time.Millisecond * 100,
		EntryTimeout:    time.Millisecond * 100,
		AttrTimeout:     time.Millisecond * 100,
		AccessLog:       "/tmp/juicefs.access.log",
	}
	objStore, _ := object.CreateStorage("mem", "", "", "", "")
	store := chunk.NewCachedStore(objStore, *conf.Chunk, nil)
	jfs, err := NewFileSystem(&conf, m, store, nil)
	jfs.checkAccessFile = time.Millisecond
	jfs.rotateAccessLog = 500
	if err != nil {
		t.Fatalf("initialize  failed: %s", err)
	}
	return jfs
}


================================================
FILE: pkg/fs/http.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs

import (
	"compress/gzip"
	"context"
	"encoding/json"
	"encoding/xml"
	"errors"
	"io"
	"net/http"
	"os"
	"strings"
	"syscall"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"golang.org/x/net/webdav"
)

type gzipResponseWriter struct {
	io.Writer
	http.ResponseWriter
}

func (w gzipResponseWriter) Write(b []byte) (int, error) {
	return w.Writer.Write(b)
}

type gzipHandler struct {
	handler http.Handler
}

func (g *gzipHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	if !strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
		g.handler.ServeHTTP(w, r)
		return
	}
	w.Header().Set("Content-Encoding", "gzip")
	gz := gzip.NewWriter(w)
	defer gz.Close()
	gzr := gzipResponseWriter{Writer: gz, ResponseWriter: w}
	g.handler.ServeHTTP(gzr, r)
}

func makeGzipHandler(h http.Handler) http.Handler {
	return &gzipHandler{h}
}

var errmap = map[syscall.Errno]error{
	0:              nil,
	syscall.EPERM:  os.ErrPermission,
	syscall.ENOENT: os.ErrNotExist,
	syscall.EEXIST: os.ErrExist,
}

func econv(err error) error {
	if err == nil {
		return nil
	}
	eno, ok := err.(syscall.Errno)
	if !ok {
		return err
	}
	if e, ok := errmap[eno]; ok {
		return e
	}
	return err
}

type webdavFS struct {
	ctx    meta.Context
	fs     *FileSystem
	umask  uint16
	config WebdavConfig
}

func (hfs *webdavFS) Mkdir(ctx context.Context, name string, perm os.FileMode) error {
	return econv(hfs.fs.Mkdir(hfs.ctx, name, uint16(perm), hfs.umask))
}

func (hfs *webdavFS) OpenFile(ctx context.Context, name string, flag int, perm os.FileMode) (webdav.File, error) {
	var mode int
	if flag&(os.O_RDONLY|os.O_RDWR) != 0 {
		mode |= vfs.MODE_MASK_R
	}
	if flag&(os.O_APPEND|os.O_RDWR|os.O_WRONLY) != 0 {
		mode |= vfs.MODE_MASK_W
	}
	if flag&(os.O_EXCL) != 0 {
		mode |= vfs.MODE_MASK_X
	}
	name = strings.TrimRight(name, "/")
	f, err := hfs.fs.Open(hfs.ctx, name, uint32(mode))
	if err != 0 {
		if err == syscall.ENOENT && flag&os.O_CREATE != 0 {
			f, err = hfs.fs.Create(hfs.ctx, name, uint16(perm), hfs.umask)
		}
	} else if flag&os.O_TRUNC != 0 {
		if errno := hfs.fs.Truncate(hfs.ctx, name, 0); errno != 0 {
			return nil, errno
		}
	} else if flag&os.O_APPEND != 0 {
		if _, err := f.Seek(hfs.ctx, 0, 2); err != nil {
			return nil, err
		}
	}
	return &davFile{f, hfs.ctx, hfs.fs, hfs.config}, econv(err)
}

func (hfs *webdavFS) RemoveAll(ctx context.Context, name string) error {
	return econv(hfs.fs.Rmr(hfs.ctx, name, false, hfs.config.MaxDeletes))
}

func (hfs *webdavFS) Rename(ctx context.Context, oldName, newName string) error {
	return econv(hfs.fs.Rename(hfs.ctx, oldName, newName, 0))
}

func (hfs *webdavFS) Stat(ctx context.Context, name string) (os.FileInfo, error) {
	fi, err := hfs.fs.Stat(hfs.ctx, removeNewLine(name))
	return fi, econv(err)
}

type davFile struct {
	*File
	mctx   meta.Context
	fs     *FileSystem
	config WebdavConfig
}

const webdavDeadProps = "webdav-dead-props"

type localProperty struct {
	N xml.Name        `json:"name"`
	P webdav.Property `json:"property"`
}

func (f *davFile) DeadProps() (map[xml.Name]webdav.Property, error) {
	if !f.config.EnableProppatch {
		return nil, nil
	}
	result, err := f.fs.GetXattr(f.mctx, f.path, webdavDeadProps)
	if err != 0 {
		if errors.Is(err, meta.ENOATTR) {
			return nil, nil
		}
		return nil, econv(err)
	}

	var lProperty []localProperty
	if err := json.Unmarshal(result, &lProperty); err != nil {
		return nil, econv(err)
	}
	var property = make(map[xml.Name]webdav.Property)
	for _, p := range lProperty {
		property[p.N] = p.P
	}
	return property, nil
}

func (f *davFile) Patch(patches []webdav.Proppatch) ([]webdav.Propstat, error) {
	if !f.config.EnableProppatch {
		return nil, nil
	}
	pstat := webdav.Propstat{Status: http.StatusOK}
	deadProps, err := f.DeadProps()
	if err != nil {
		return nil, err
	}
	for _, patch := range patches {
		for _, p := range patch.Props {
			pstat.Props = append(pstat.Props, webdav.Property{XMLName: p.XMLName})
			if patch.Remove && deadProps != nil {
				delete(deadProps, p.XMLName)
				continue
			}
			if deadProps == nil {
				deadProps = map[xml.Name]webdav.Property{}
			}
			deadProps[p.XMLName] = p
		}
	}

	if deadProps != nil {
		var property []localProperty
		for name, p := range deadProps {
			property = append(property, localProperty{N: name, P: p})
		}

		jsonData, err := json.Marshal(&property)
		if err != nil {
			return nil, err
		}
		errno := f.fs.SetXattr(f.mctx, f.path, webdavDeadProps, jsonData, 0)
		if errno != 0 {
			return nil, econv(errno)
		}
	}
	return []webdav.Propstat{pstat}, nil
}

func (f *davFile) Seek(offset int64, whence int) (int64, error) {
	n, err := f.File.Seek(meta.Background(), offset, whence)
	return n, econv(err)
}

func (f *davFile) Read(b []byte) (n int, err error) {
	n, err = f.File.Read(meta.Background(), b)
	return n, econv(err)
}

func (f *davFile) Write(buf []byte) (n int, err error) {
	n, err = f.File.Write(meta.Background(), buf)
	return n, econv(err)
}

func (f *davFile) Readdir(count int) (fi []os.FileInfo, err error) {
	fi, err = f.File.Readdir(meta.Background(), count)
	// skip the first two (. and ..)
	for len(fi) > 0 && (fi[0].Name() == "." || fi[0].Name() == "..") {
		fi = fi[1:]
	}
	return fi, econv(err)
}

func (f *davFile) Close() error {
	return econv(f.File.Close(meta.Background()))
}

type WebdavConfig struct {
	Addr            string
	DisallowList    bool
	EnableProppatch bool
	EnableGzip      bool
	Username        string
	Password        string
	CertFile        string
	KeyFile         string
	MaxDeletes      int
}

type indexHandler struct {
	*webdav.Handler
	WebdavConfig
}

func (h *indexHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {

	// http://www.webdav.org/specs/rfc4918.html#n-guidance-for-clients-desiring-to-authenticate
	if h.Username != "" && h.Password != "" {
		userName, pwd, ok := r.BasicAuth()
		if !ok {
			w.Header().Set("WWW-Authenticate", `Basic realm="Restricted"`)
			w.WriteHeader(http.StatusUnauthorized)
			return
		}
		if userName != h.Username || pwd != h.Password {
			http.Error(w, "WebDAV: need authorized!", http.StatusUnauthorized)
			return
		}
	}

	// Excerpt from RFC4918, section 9.4:
	//
	// 		GET, when applied to a collection, may return the contents of an
	//		"index.html" resource, a human-readable view of the contents of
	//		the collection, or something else altogether.
	//
	// Get, when applied to collection, will return the same as PROPFIND method.
	if r.Method == "GET" && strings.HasPrefix(r.URL.Path, h.Handler.Prefix) {
		info, err := h.Handler.FileSystem.Stat(context.TODO(), strings.TrimPrefix(r.URL.Path, h.Handler.Prefix))
		if err == nil && info.IsDir() {
			if h.DisallowList {
				http.Error(w, "Forbidden", http.StatusForbidden)
				return
			}
			r.Method = "PROPFIND"
			if r.Header.Get("Depth") == "" {
				r.Header.Add("Depth", "1")
			}
		}
	}

	// The next line would normally be:
	//	http.Handle("/", h)
	// but we wrap that HTTP handler h to cater for a special case.
	//
	// The propfind_invalid2 litmus test case expects an empty namespace prefix
	// declaration to be an error. The FAQ in the webdav litmus test says:
	//
	// "What does the "propfind_invalid2" test check for?...
	//
	// If a request was sent with an XML body which included an empty namespace
	// prefix declaration (xmlns:ns1=""), then the server must reject that with
	// a "400 Bad Request" response, as it is invalid according to the XML
	// Namespace specification."
	//
	// On the other hand, the Go standard library's encoding/xml package
	// accepts an empty xmlns namespace, as per the discussion at
	// https://github.com/golang/go/issues/8068
	//
	// Empty namespaces seem disallowed in the second (2006) edition of the XML
	// standard, but allowed in a later edition. The grammar differs between
	// http://www.w3.org/TR/2006/REC-xml-names-20060816/#ns-decl and
	// http://www.w3.org/TR/REC-xml-names/#dt-prefix
	//
	// Thus, we assume that the propfind_invalid2 test is obsolete, and
	// hard-code the 400 Bad Request response that the test expects.
	if r.Header.Get("X-Litmus") == "props: 3 (propfind_invalid2)" {
		http.Error(w, "400 Bad Request", http.StatusBadRequest)
		return
	}

	if !h.EnableProppatch && r.Method == "PROPPATCH" {
		http.Error(w, "The PROPPATCH method is not currently enabled,please add the --enable-proppatch parameter and run it again", http.StatusNotImplemented)
		return
	}

	h.Handler.ServeHTTP(w, r)
}

func StartHTTPServer(fs *FileSystem, config WebdavConfig) {
	ctx := meta.NewContext(uint32(os.Getpid()), uint32(utils.GetCurrentUID()), []uint32{uint32(utils.GetCurrentGID())})
	hfs := &webdavFS{ctx, fs, uint16(utils.GetUmask()), config}
	srv := &webdav.Handler{
		FileSystem: hfs,
		LockSystem: webdav.NewMemLS(),
		Logger: func(r *http.Request, err error) {
			if err != nil {
				logger.Errorf("WEBDAV [%s]: %s, ERROR: %s", r.Method, r.URL, err)
			} else {
				logger.Debugf("WEBDAV [%s]: %s", r.Method, r.URL)
			}
		},
	}
	var h http.Handler = &indexHandler{Handler: srv, WebdavConfig: config}
	if config.EnableGzip {
		h = makeGzipHandler(h)
	}
	http.Handle("/", h)
	logger.Infof("WebDAV listening on %s", config.Addr)
	var err error
	if config.CertFile != "" && config.KeyFile != "" {
		err = http.ListenAndServeTLS(config.Addr, config.CertFile, config.KeyFile, nil)
	} else {
		err = http.ListenAndServe(config.Addr, nil)
	}
	if err != nil {
		logger.Fatalf("Error with WebDAV server: %v", err)
	}
}

func removeNewLine(input string) string {
	return strings.Replace(strings.Replace(input, "\n", "", -1), "\r", "", -1)
}


================================================
FILE: pkg/fs/http_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs

import (
	"context"
	"io"
	"io/fs"
	"os"
	"testing"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
)

func TestWebdav(t *testing.T) {
	jfs := createTestFS(t)
	webdavFS := &webdavFS{meta.NewContext(uint32(os.Getpid()), uint32(os.Getuid()), []uint32{uint32(os.Getgid())}), jfs, uint16(utils.GetUmask()), WebdavConfig{EnableProppatch: true}}
	ctx := context.Background()
	_, err := webdavFS.Stat(ctx, "/")
	if err != nil {
		t.Fatalf("webdavFS stat failed: %s", err)
	}
	aFile, err := webdavFS.OpenFile(ctx, "/a", os.O_CREATE, 0644)
	if err != nil {
		t.Fatalf("webdavFS create failed: %s", err)
	}
	_, err = webdavFS.OpenFile(ctx, "/b/", os.O_CREATE, 0644)
	if err != nil {
		t.Fatalf("webdavFS create failed: %s", err)
	}
	aInfo, err := aFile.Stat()
	if err != nil || aInfo.Name() != "a" || aInfo.Mode().Perm() != fs.FileMode(0644) {
		t.Fatalf("webdavFS stat failed: %s", err)
	}
	if n, err := aFile.Write([]byte("world")); err != nil || n != 5 {
		t.Fatalf("webdavFS write 5 bytes: %d %s", n, err)
	}
	if n, err := aFile.Seek(-3, io.SeekEnd); err != nil || n != 2 {
		t.Fatalf("webdavFS seek 3 bytes before end: %d %s", n, err)
	}
	buf := make([]byte, 100)
	if n, err := aFile.Read(buf); err != nil || n != 3 || string(buf[:n]) != "rld" {
		t.Fatalf("webdavFS read(): %d %s %s", n, err, string(buf[:n]))
	}

	if err = webdavFS.Mkdir(ctx, "/d1", 0755); err != nil {
		t.Fatalf("webdavFS mkdir failed: %s", err)
	}
	if d1Info, err := webdavFS.Stat(ctx, "/d1"); err != nil || d1Info.Name() != "d1" || d1Info.Mode().Perm() != fs.FileMode(0755) {
		t.Fatalf("webdavFS stat failed: %s", err)
	}
	if webdavFS.Rename(ctx, "/d1", "/d2") != nil {
		t.Fatalf("webdavFS rename failed: %s", err)
	}
	if stat, err := webdavFS.Stat(ctx, "/d2"); err != nil || !stat.IsDir() {
		t.Fatalf("webdavFS rename failed: %s", err)
	}
	for _, name := range []string{"/d2/a", "/d2/b", "/d2/c", "/d2/d"} {
		if _, err := webdavFS.OpenFile(ctx, name, os.O_CREATE, 0644); err != nil {
			t.Fatalf("webdavFS create failed: %s", err)
		}
	}
	if webdavFS.RemoveAll(ctx, "/d2") != nil {
		t.Fatalf("webdavFS removeAll failed: %s", err)
	}
	if _, err = webdavFS.Stat(ctx, "/d2"); err != os.ErrNotExist {
		t.Fatalf("webdavFS removeAll failed: %s", err)
	}
	if err = aFile.Close(); err != nil {
		t.Fatalf("webdavFS close file failed: %s", err)
	}
}


================================================
FILE: pkg/fs/metrics.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs


================================================
FILE: pkg/fuse/context.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fuse

import (
	"context"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/vfs"

	"github.com/hanwen/go-fuse/v2/fuse"
)

// Ino is an alias to meta.Ino
type Ino = meta.Ino

// Attr is an alias to meta.Attr
type Attr = meta.Attr

// Context is an alias to vfs.LogContext
type Context = vfs.LogContext

type fuseContext struct {
	context.Context
	start    time.Time
	header   *fuse.InHeader
	canceled bool
	cancel   <-chan struct{}

	checkPermission bool
}

var gidcache = newGidCache(time.Minute * 5)

var contextPool = sync.Pool{
	New: func() interface{} {
		return &fuseContext{}
	},
}

func (fs *fileSystem) newContext(cancel <-chan struct{}, header *fuse.InHeader) *fuseContext {
	ctx := contextPool.Get().(*fuseContext)
	ctx.Context = context.Background()
	ctx.start = time.Now()
	ctx.canceled = false
	ctx.cancel = cancel
	ctx.header = header
	ctx.checkPermission = fs.conf.NonDefaultPermission && header.Uid != 0
	if header.Uid == 0 && fs.conf.RootSquash != nil {
		ctx.checkPermission = true
		ctx.header.Uid = fs.conf.RootSquash.Uid
		ctx.header.Gid = fs.conf.RootSquash.Gid
	}
	if fs.conf.AllSquash != nil {
		ctx.checkPermission = true
		ctx.header.Uid = fs.conf.AllSquash.Uid
		ctx.header.Gid = fs.conf.AllSquash.Gid
	}
	return ctx
}

func releaseContext(ctx *fuseContext) {
	contextPool.Put(ctx)
}

func (c *fuseContext) Uid() uint32 {
	return c.header.Uid
}

func (c *fuseContext) Gid() uint32 {
	return c.header.Gid
}

func (c *fuseContext) Gids() []uint32 {
	if c.checkPermission {
		return gidcache.get(c.Pid(), c.Gid())
	}
	return []uint32{c.header.Gid}
}

func (c *fuseContext) Pid() uint32 {
	return c.header.Pid
}

func (c *fuseContext) Duration() time.Duration {
	return time.Since(c.start)
}

func (c *fuseContext) Cancel() {
	c.canceled = true
}

func (c *fuseContext) CheckPermission() bool {
	return c.checkPermission
}

func (c *fuseContext) Canceled() bool {
	if c.Duration() < time.Second {
		return false
	}
	if c.canceled {
		return true
	}
	select {
	case <-c.cancel:
		return true
	default:
		return false
	}
}

func (c *fuseContext) WithValue(k, v interface{}) meta.Context {
	wc := *c // gids is a const, so it's safe to shallow copy
	wc.Context = context.WithValue(c.Context, k, v)
	return &wc
}

func (c *fuseContext) Err() error {
	return syscall.EINTR
}

// func (c *fuseContext) Done() <-chan struct{} {
// 	return c.cancel
// }


================================================
FILE: pkg/fuse/device_darwin.go
================================================
// Copyright 2020 Chaos Mesh Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package fuse

func ensureFuseDev() {}

func grantAccess() error {
	return nil
}


================================================
FILE: pkg/fuse/device_linux.go
================================================
// Copyright 2020 Chaos Mesh Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package fuse

import (
	"bufio"
	"fmt"
	"os"
	"path"
	"strings"
	"syscall"

	"github.com/pkg/errors"

	"golang.org/x/sys/unix"
)

// ensureFuseDev ensures /dev/fuse exists. If not, it will create one
func ensureFuseDev() {
	if _, err := os.Open("/dev/fuse"); os.IsNotExist(err) {
		// 10, 229 according to https://www.kernel.org/doc/Documentation/admin-guide/devices.txt
		fuse := unix.Mkdev(10, 229)
		if err := syscall.Mknod("/dev/fuse", 0o666|syscall.S_IFCHR, int(fuse)); err != nil {
			logger.Errorf("mknod /dev/fuse: %v", err)
		}
	}
}

// grantAccess appends 'c 10:229 rwm' to devices.allow
func grantAccess() error {
	pid := os.Getpid()
	cgroupPath := fmt.Sprintf("/proc/%d/cgroup", pid)
	cgroupFile, err := os.Open(cgroupPath)
	if err != nil {
		return errors.Wrapf(err, "open %s", cgroupPath)
	}
	defer cgroupFile.Close()

	cgroupScanner := bufio.NewScanner(cgroupFile)
	var deviceCgroup string
	for cgroupScanner.Scan() {
		if err := cgroupScanner.Err(); err != nil {
			return errors.Wrap(err, "read cgroup file")
		}
		var (
			text  = cgroupScanner.Text()
			parts = strings.SplitN(text, ":", 3)
		)
		if len(parts) < 3 {
			return errors.Errorf("invalid cgroup entry: %q", text)
		}

		if parts[1] == "devices" {
			deviceCgroup = parts[2]
		}
	}

	if len(deviceCgroup) == 0 {
		return errors.Errorf("fail to find device cgroup")
	}

	deviceListPath := path.Join("/sys/fs/cgroup/devices" + deviceCgroup, "/devices.list")
	deviceAllowPath := path.Join("/sys/fs/cgroup/devices" + deviceCgroup, "/devices.allow")

	// check if fuse is already allowed
	deviceListFile, err := os.OpenFile(deviceListPath, os.O_RDONLY, 0)
	if err != nil {
		return errors.Wrapf(err, "open %s", deviceListPath)
	}
	defer deviceListFile.Close()
	deviceListScanner := bufio.NewScanner(deviceListFile)
	for deviceListScanner.Scan() {
		if err := deviceListScanner.Err(); err != nil {
			return errors.Wrap(err, "read device list file")
		}
		var (
			text  = deviceListScanner.Text()
			parts = strings.SplitN(text, " ", 3)
		)
		if len(parts) < 3 {
			return errors.Errorf("invalid device list entry: %q", text)
		}

		if (parts[0] == "c" || parts[0] == "a") && (parts[1] == "10:229" || parts[1] == "*:*") && parts[2] == "rwm" {
			logger.Debug("/dev/fuse is already granted")
			// fuse is already allowed
			return nil
		}
	}

	f, err := os.OpenFile(deviceAllowPath, os.O_WRONLY, 0)
	if err != nil {
		return errors.Wrapf(err, "open %s", deviceAllowPath)
	}
	defer f.Close()
	// 10, 229 according to https://www.kernel.org/doc/Documentation/admin-guide/devices.txt
	content := "c 10:229 rwm"
	_, err = f.WriteString(content)
	if err != nil {
		return errors.Wrapf(err, "write %s to %s", content, deviceAllowPath)
	}
	logger.Debug("/dev/fuse is granted")
	return nil
}


================================================
FILE: pkg/fuse/fuse.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fuse

import (
	"fmt"
	"log"
	"math"
	"os"
	"os/exec"
	"runtime"
	"strings"
	"syscall"
	"time"

	"github.com/hanwen/go-fuse/v2/fuse"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
)

var logger = utils.GetLogger("juicefs")

type fileSystem struct {
	fuse.RawFileSystem
	conf *vfs.Config
	v    *vfs.VFS
}

func newFileSystem(conf *vfs.Config, v *vfs.VFS) *fileSystem {
	return &fileSystem{
		RawFileSystem: fuse.NewDefaultRawFileSystem(),
		conf:          conf,
		v:             v,
	}
}

type setTimeout func(time.Duration)

func (fs *fileSystem) replyAttr(ctx *fuseContext, entry *meta.Entry, attr *fuse.Attr, set setTimeout) {
	if vfs.IsSpecialNode(entry.Inode) {
		set(time.Hour)
	} else if entry.Attr.Typ == meta.TypeFile && fs.v.ModifiedSince(entry.Inode, ctx.start) {
		logger.Debugf("refresh attr for %d", entry.Inode)
		var attr meta.Attr
		st := fs.v.Meta.GetAttr(ctx, entry.Inode, &attr)
		if st == 0 {
			*entry.Attr = attr
			set(fs.conf.AttrTimeout)
		}
	} else {
		set(fs.conf.AttrTimeout)
	}
	fs.v.UpdateLength(entry.Inode, entry.Attr)
	attrToStat(entry.Inode, entry.Attr, attr)
}

func (fs *fileSystem) replyEntry(ctx *fuseContext, out *fuse.EntryOut, e *meta.Entry) fuse.Status {
	out.NodeId = uint64(e.Inode)
	out.Generation = 1
	if e.Attr.Typ == meta.TypeDirectory {
		out.SetEntryTimeout(fs.conf.DirEntryTimeout)
	} else {
		out.SetEntryTimeout(fs.conf.EntryTimeout)
	}
	fs.replyAttr(ctx, e, &out.Attr, out.SetAttrTimeout)
	return 0
}

func (fs *fileSystem) Lookup(cancel <-chan struct{}, header *fuse.InHeader, name string, out *fuse.EntryOut) (status fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	entry, err := fs.v.Lookup(ctx, Ino(header.NodeId), name)
	if err != 0 {
		if fs.conf.NegEntryTimeout != 0 && err == syscall.ENOENT {
			out.NodeId = 0 // zero nodeid is same as ENOENT, but with valid timeout
			out.SetEntryTimeout(fs.conf.NegEntryTimeout)
			return 0
		}
		return fuse.Status(err)
	}
	return fs.replyEntry(ctx, out, entry)
}

func (fs *fileSystem) GetAttr(cancel <-chan struct{}, in *fuse.GetAttrIn, out *fuse.AttrOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	var opened uint8
	if in.Fh() != 0 {
		opened = 1
	}
	entry, err := fs.v.GetAttr(ctx, Ino(in.NodeId), opened)
	if err != 0 {
		return fuse.Status(err)
	}
	fs.replyAttr(ctx, entry, &out.Attr, out.SetTimeout)
	return 0
}

func (fs *fileSystem) SetAttr(cancel <-chan struct{}, in *fuse.SetAttrIn, out *fuse.AttrOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entry, err := fs.v.SetAttr(ctx, Ino(in.NodeId), int(in.Valid), in.Fh, in.Mode, in.Uid, in.Gid, int64(in.Atime), int64(in.Mtime), in.Atimensec, in.Mtimensec, in.Size)
	if err != 0 {
		return fuse.Status(err)
	}
	fs.replyAttr(ctx, entry, &out.Attr, out.SetTimeout)
	return 0
}

func (fs *fileSystem) Mknod(cancel <-chan struct{}, in *fuse.MknodIn, name string, out *fuse.EntryOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entry, err := fs.v.Mknod(ctx, Ino(in.NodeId), name, uint16(in.Mode), getUmask(in.Umask, fs.v.Conf.UMask, false), in.Rdev)
	if err != 0 {
		return fuse.Status(err)
	}
	return fs.replyEntry(ctx, out, entry)
}

func (fs *fileSystem) Mkdir(cancel <-chan struct{}, in *fuse.MkdirIn, name string, out *fuse.EntryOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entry, err := fs.v.Mkdir(ctx, Ino(in.NodeId), name, uint16(in.Mode), getUmask(in.Umask, fs.v.Conf.UMask, true))
	if err != 0 {
		return fuse.Status(err)
	}
	return fs.replyEntry(ctx, out, entry)
}

func (fs *fileSystem) Unlink(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	err := fs.v.Unlink(ctx, Ino(header.NodeId), name)
	return fuse.Status(err)
}

func (fs *fileSystem) Rmdir(cancel <-chan struct{}, header *fuse.InHeader, name string) (code fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	err := fs.v.Rmdir(ctx, Ino(header.NodeId), name)
	return fuse.Status(err)
}

func (fs *fileSystem) Rename(cancel <-chan struct{}, in *fuse.RenameIn, oldName string, newName string) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Rename(ctx, Ino(in.NodeId), oldName, Ino(in.Newdir), newName, in.Flags)
	return fuse.Status(err)
}

func (fs *fileSystem) Link(cancel <-chan struct{}, in *fuse.LinkIn, name string, out *fuse.EntryOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entry, err := fs.v.Link(ctx, Ino(in.Oldnodeid), Ino(in.NodeId), name)
	if err != 0 {
		return fuse.Status(err)
	}
	return fs.replyEntry(ctx, out, entry)
}

func (fs *fileSystem) Symlink(cancel <-chan struct{}, header *fuse.InHeader, target string, name string, out *fuse.EntryOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	entry, err := fs.v.Symlink(ctx, target, Ino(header.NodeId), name)
	if err != 0 {
		return fuse.Status(err)
	}
	return fs.replyEntry(ctx, out, entry)
}

func (fs *fileSystem) Readlink(cancel <-chan struct{}, header *fuse.InHeader) (out []byte, code fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	path, err := fs.v.Readlink(ctx, Ino(header.NodeId))
	return path, fuse.Status(err)
}

func (fs *fileSystem) GetXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string, dest []byte) (sz uint32, code fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	value, err := fs.v.GetXattr(ctx, Ino(header.NodeId), attr, uint32(len(dest)))
	if err != 0 {
		return 0, fuse.Status(err)
	}
	copy(dest, value)
	return uint32(len(value)), 0
}

func (fs *fileSystem) ListXAttr(cancel <-chan struct{}, header *fuse.InHeader, dest []byte) (uint32, fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	data, err := fs.v.ListXattr(ctx, Ino(header.NodeId), len(dest))
	if err != 0 {
		return 0, fuse.Status(err)
	}
	copy(dest, data)
	return uint32(len(data)), 0
}

func (fs *fileSystem) SetXAttr(cancel <-chan struct{}, in *fuse.SetXAttrIn, attr string, data []byte) fuse.Status {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.SetXattr(ctx, Ino(in.NodeId), attr, data, in.Flags)
	return fuse.Status(err)
}

func (fs *fileSystem) RemoveXAttr(cancel <-chan struct{}, header *fuse.InHeader, attr string) (code fuse.Status) {
	ctx := fs.newContext(cancel, header)
	defer releaseContext(ctx)
	err := fs.v.RemoveXattr(ctx, Ino(header.NodeId), attr)
	return fuse.Status(err)
}

func (fs *fileSystem) Create(cancel <-chan struct{}, in *fuse.CreateIn, name string, out *fuse.CreateOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entry, fh, err := fs.v.Create(ctx, Ino(in.NodeId), name, uint16(in.Mode), getCreateUmask(in.Umask, fs.v.Conf.UMask), in.Flags)
	if err != 0 {
		return fuse.Status(err)
	}
	out.Fh = fh
	return fs.replyEntry(ctx, &out.EntryOut, entry)
}

func (fs *fileSystem) Open(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut) (status fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entry, fh, err := fs.v.Open(ctx, Ino(in.NodeId), in.Flags)
	if err != 0 {
		return fuse.Status(err)
	}
	out.Fh = fh
	if vfs.IsSpecialNode(Ino(in.NodeId)) {
		out.OpenFlags |= fuse.FOPEN_DIRECT_IO
	} else if entry.Attr.KeepCache {
		out.OpenFlags |= fuse.FOPEN_KEEP_CACHE
	} else {
		if runtime.GOOS == "darwin" {
			go fsserv.InodeNotify(uint64(in.NodeId), -1, 0)
		} else {
			fsserv.InodeNotify(uint64(in.NodeId), -1, 0)
		}
	}
	return 0
}

func (fs *fileSystem) Read(cancel <-chan struct{}, in *fuse.ReadIn, buf []byte) (fuse.ReadResult, fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	n, err := fs.v.Read(ctx, Ino(in.NodeId), buf, in.Offset, in.Fh)
	if err != 0 {
		return nil, fuse.Status(err)
	}
	return fuse.ReadResultData(buf[:n]), 0
}

func (fs *fileSystem) Release(cancel <-chan struct{}, in *fuse.ReleaseIn) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	fs.v.Release(ctx, Ino(in.NodeId), in.Fh)
}

func (fs *fileSystem) Write(cancel <-chan struct{}, in *fuse.WriteIn, data []byte) (written uint32, code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Write(ctx, Ino(in.NodeId), data, in.Offset, in.Fh)
	if err != 0 {
		return 0, fuse.Status(err)
	}
	return uint32(len(data)), 0
}

func (fs *fileSystem) Flush(cancel <-chan struct{}, in *fuse.FlushIn) fuse.Status {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Flush(ctx, Ino(in.NodeId), in.Fh, in.LockOwner)
	return fuse.Status(err)
}

func (fs *fileSystem) Fsync(cancel <-chan struct{}, in *fuse.FsyncIn) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Fsync(ctx, Ino(in.NodeId), int(in.FsyncFlags), in.Fh)
	return fuse.Status(err)
}

func (fs *fileSystem) Fallocate(cancel <-chan struct{}, in *fuse.FallocateIn) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Fallocate(ctx, Ino(in.NodeId), uint8(in.Mode), int64(in.Offset), int64(in.Length), in.Fh)
	return fuse.Status(err)
}

func (fs *fileSystem) CopyFileRange(cancel <-chan struct{}, in *fuse.CopyFileRangeIn) (written uint32, code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	var len = in.Len
	if len > math.MaxUint32 {
		// written may overflow
		len = math.MaxUint32 + 1 - meta.ChunkSize
	}
	copied, err := fs.v.CopyFileRange(ctx, Ino(in.NodeId), in.FhIn, in.OffIn, Ino(in.NodeIdOut), in.FhOut, in.OffOut, len, uint32(in.Flags))
	if err != 0 {
		return 0, fuse.Status(err)
	}
	return uint32(copied), 0
}

func (fs *fileSystem) GetLk(cancel <-chan struct{}, in *fuse.LkIn, out *fuse.LkOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	l := in.Lk
	err := fs.v.Getlk(ctx, Ino(in.NodeId), in.Fh, in.Owner, &l.Start, &l.End, &l.Typ, &l.Pid)
	if err == 0 {
		out.Lk = l
	}
	return fuse.Status(err)
}

func (fs *fileSystem) SetLk(cancel <-chan struct{}, in *fuse.LkIn) (code fuse.Status) {
	return fs.setLk(cancel, in, false)
}

func (fs *fileSystem) SetLkw(cancel <-chan struct{}, in *fuse.LkIn) (code fuse.Status) {
	return fs.setLk(cancel, in, true)
}

func (fs *fileSystem) setLk(cancel <-chan struct{}, in *fuse.LkIn, block bool) (code fuse.Status) {
	if in.LkFlags&fuse.FUSE_LK_FLOCK != 0 {
		return fs.Flock(cancel, in, block)
	}
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	l := in.Lk
	err := fs.v.Setlk(ctx, Ino(in.NodeId), in.Fh, in.Owner, l.Start, l.End, l.Typ, l.Pid, block)
	return fuse.Status(err)
}

func (fs *fileSystem) Flock(cancel <-chan struct{}, in *fuse.LkIn, block bool) (code fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Flock(ctx, Ino(in.NodeId), in.Fh, in.Owner, in.Lk.Typ, block)
	return fuse.Status(err)
}

func (fs *fileSystem) OpenDir(cancel <-chan struct{}, in *fuse.OpenIn, out *fuse.OpenOut) (status fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	ino := Ino(in.NodeId)
	fh, err := fs.v.Opendir(ctx, ino, in.Flags)
	out.Fh = fh
	if fs.conf.ReaddirCache && !vfs.IsSpecialNode(ino) {
		out.OpenFlags |= fuse.FOPEN_CACHE_DIR | fuse.FOPEN_KEEP_CACHE // both flags are required
	}
	return fuse.Status(err)
}

func (fs *fileSystem) ReadDir(cancel <-chan struct{}, in *fuse.ReadIn, out *fuse.DirEntryList) fuse.Status {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entries, _, err := fs.v.Readdir(ctx, Ino(in.NodeId), in.Size, int(in.Offset), in.Fh, false)
	var de fuse.DirEntry
	for i, e := range entries {
		de.Ino = uint64(e.Inode)
		de.Name = string(e.Name)
		de.Mode = e.Attr.SMode()
		if !out.AddDirEntry(de) {
			fs.v.UpdateReaddirOffset(ctx, Ino(in.NodeId), in.Fh, int(in.Offset)+i)
			break
		}
	}
	return fuse.Status(err)
}

func (fs *fileSystem) ReadDirPlus(cancel <-chan struct{}, in *fuse.ReadIn, out *fuse.DirEntryList) fuse.Status {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	entries, readAt, err := fs.v.Readdir(ctx, Ino(in.NodeId), in.Size, int(in.Offset), in.Fh, true)
	ctx.start = readAt
	var de fuse.DirEntry
	for i, e := range entries {
		de.Ino = uint64(e.Inode)
		de.Name = string(e.Name)
		de.Mode = e.Attr.SMode()
		eo := out.AddDirLookupEntry(de)
		if eo == nil {
			fs.v.UpdateReaddirOffset(ctx, Ino(in.NodeId), in.Fh, int(in.Offset)+i)
			break
		}
		if e.Attr.Full {
			fs.replyEntry(ctx, eo, e)
		} else {
			eo.Ino = uint64(e.Inode)
			eo.Generation = 1
		}
	}
	return fuse.Status(err)
}

var cancelReleaseDir = make(chan struct{})

func (fs *fileSystem) ReleaseDir(in *fuse.ReleaseIn) {
	ctx := fs.newContext(cancelReleaseDir, &in.InHeader)
	defer releaseContext(ctx)
	fs.v.Releasedir(ctx, Ino(in.NodeId), in.Fh)
}

func (fs *fileSystem) StatFs(cancel <-chan struct{}, in *fuse.InHeader, out *fuse.StatfsOut) (code fuse.Status) {
	ctx := fs.newContext(cancel, in)
	defer releaseContext(ctx)
	st, err := fs.v.StatFS(ctx, Ino(in.NodeId))
	if err != 0 {
		return fuse.Status(err)
	}
	out.NameLen = 255
	out.Frsize = 4096
	out.Bsize = 4096
	out.Blocks = st.Total / uint64(out.Bsize)
	if out.Blocks < 1 {
		out.Blocks = 1
	}
	out.Bavail = st.Avail / uint64(out.Bsize)
	out.Bfree = out.Bavail
	out.Files = st.Files
	out.Ffree = st.Favail
	return 0
}

func (fs *fileSystem) Ioctl(cancel <-chan struct{}, in *fuse.IoctlIn, out *fuse.IoctlOut, bufIn, bufOut []byte) (status fuse.Status) {
	ctx := fs.newContext(cancel, &in.InHeader)
	defer releaseContext(ctx)
	err := fs.v.Ioctl(ctx, Ino(in.NodeId), in.Cmd, in.Arg, bufIn, bufOut)
	return fuse.Status(err)
}

// Serve starts a server to serve requests from FUSE.
func Serve(v *vfs.VFS, options string, xattrs, ioctl bool) error {
	if err := syscall.Setpriority(syscall.PRIO_PROCESS, os.Getpid(), -19); err != nil {
		logger.Warnf("setpriority: %s", err)
	}
	err := grantAccess()
	if err != nil {
		logger.Debugf("grant access to /dev/fuse: %s", err)
	}
	ensureFuseDev()

	conf := v.Conf
	imp := newFileSystem(conf, v)

	var opt fuse.MountOptions
	opt.FsName = "JuiceFS:" + conf.Format.Name
	opt.Name = "juicefs"
	opt.SingleThreaded = false
	opt.MaxBackground = 50
	opt.EnableLocks = true
	opt.EnableSymlinkCaching = conf.FuseOpts.EnableSymlinkCaching
	opt.EnableAcl = conf.Format.EnableACL
	opt.DontUmask = conf.Format.EnableACL
	opt.DisableXAttrs = !xattrs
	opt.EnableIoctl = ioctl
	opt.MaxWrite = conf.FuseOpts.MaxWrite
	opt.MaxReadAhead = 1 << 20
	opt.DirectMount = true
	opt.AllowOther = os.Getuid() == 0
	opt.Timeout = conf.FuseOpts.Timeout
	opt.EnableReadDirPlusAuto = conf.FuseOpts.EnableReadDirPlusAuto

	if opt.EnableAcl && conf.NonDefaultPermission {
		logger.Warnf("it is recommended to turn on 'default-permissions' when enable acl")
	}

	if opt.EnableAcl && opt.DisableXAttrs {
		logger.Infof("The format \"enable-acl\" flag will enable the xattrs feature.")
		opt.DisableXAttrs = false
	}
	opt.IgnoreSecurityLabels = false

	for _, n := range strings.Split(options, ",") {
		if n == "allow_other" || n == "allow_root" {
			opt.AllowOther = true
		} else if n == "nonempty" || n == "ro" {
		} else if n == "debug" {
			opt.Debug = true
		} else if n == "writeback_cache" {
			opt.EnableWriteback = true
		} else if n == "async_dio" {
			opt.OtherCaps |= fuse.CAP_ASYNC_DIO
		} else if strings.TrimSpace(n) != "" {
			opt.Options = append(opt.Options, strings.TrimSpace(n))
		}
	}
	if !conf.NonDefaultPermission {
		opt.Options = append(opt.Options, "default_permissions")
	}
	if runtime.GOOS == "darwin" {
		opt.Options = append(opt.Options, "fssubtype=juicefs")
		opt.Options = append(opt.Options, "volname="+conf.Format.Name)
		opt.Options = append(opt.Options, "daemon_timeout=60", "iosize=65536", "novncache")
	}
	fssrv, err := fuse.NewServer(imp, conf.Meta.MountPoint, &opt)
	if err != nil {
		if execErr, ok := err.(*exec.Error); ok {
			if pathErr, ok := execErr.Unwrap().(*os.PathError); ok &&
				strings.Contains(pathErr.Path, "fusermount") &&
				pathErr.Unwrap() == syscall.ENOENT {
				return fmt.Errorf("fuse is not installed. Please install it first")
			}
		}
		return fmt.Errorf("fuse: %s", err)
	}
	defer func() {
		if runtime.GOOS == "darwin" {
			_ = fssrv.Unmount()
		}
	}()

	if runtime.GOOS == "linux" {
		v.InvalidateEntry = func(parent Ino, name string) syscall.Errno {
			return syscall.Errno(fssrv.EntryNotify(uint64(parent), name))
		}
	}

	fsserv = fssrv
	fssrv.Serve()
	return nil
}

func GenFuseOpt(conf *vfs.Config, options string, mt int, noxattr, noacl bool, maxWrite int) fuse.MountOptions {
	var opt fuse.MountOptions
	opt.FsName = "JuiceFS:" + conf.Format.Name
	opt.Name = "juicefs"
	opt.SingleThreaded = mt == 0
	opt.MaxBackground = 200
	opt.EnableLocks = true
	opt.EnableSymlinkCaching = true
	opt.DisableXAttrs = noxattr
	opt.EnableAcl = !noacl
	opt.IgnoreSecurityLabels = false
	opt.MaxWrite = maxWrite
	opt.MaxReadAhead = 1 << 20
	opt.DirectMount = true
	opt.DontUmask = true
	opt.Timeout = time.Minute * 15
	opt.EnableReadDirPlusAuto = true
	for _, n := range strings.Split(options, ",") {
		// TODO allow_root
		if n == "allow_other" {
			opt.AllowOther = true
		} else if strings.HasPrefix(n, "fsname=") {
			opt.FsName = n[len("fsname="):]
		} else if n == "writeback_cache" {
			opt.EnableWriteback = true
		} else if n == "debug" {
			opt.Debug = true
			log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds)
		} else if strings.TrimSpace(n) != "" {
			opt.Options = append(opt.Options, strings.TrimSpace(n))
		}
	}
	opt.Options = append(opt.Options, "default_permissions")
	if runtime.GOOS == "darwin" {
		opt.Options = append(opt.Options, "fssubtype=juicefs", "volname="+conf.Format.Name)
		opt.Options = append(opt.Options, "daemon_timeout=60", "iosize=65536", "novncache")
	}
	return opt
}

var fsserv *fuse.Server

func Shutdown() bool {
	if fsserv != nil {
		return fsserv.Shutdown()
	}
	return false
}


================================================
FILE: pkg/fuse/fuse_darwin.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fuse

import (
	"github.com/hanwen/go-fuse/v2/fuse"
)

func getCreateUmask(mask uint32, defMask uint16) uint16 {
	if defMask != 0xFFFF {
		return defMask
	}
	return 0
}

func getUmask(mask uint32, defMask uint16, isDir bool) uint16 {
	if defMask != 0xFFFF {
		return defMask
	}
	if isDir {
		return uint16(mask)
	}
	return 0
}

func setBlksize(out *fuse.Attr, size uint32) {
}


================================================
FILE: pkg/fuse/fuse_linux.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fuse

import (
	"github.com/hanwen/go-fuse/v2/fuse"
)

func getCreateUmask(mask uint32, defMask uint16) uint16 {
	if defMask != 0xFFFF {
		return defMask
	}
	return uint16(mask)
}

func getUmask(mask uint32, defMask uint16, isDir bool) uint16 {
	if defMask != 0xFFFF {
		return defMask
	}
	return uint16(mask)
}

func setBlksize(out *fuse.Attr, size uint32) {
	out.Blksize = size
}


================================================
FILE: pkg/fuse/fuse_test.go
================================================
//go:build linux
// +build linux

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//nolint:errcheck
package fuse

import (
	"bytes"
	"errors"
	"io"
	"log"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"syscall"
	"testing"
	"time"

	"github.com/gofrs/flock"
	"github.com/google/uuid"
	"github.com/hanwen/go-fuse/v2/posixtest"
	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/pkg/xattr"
)

func format(url string) {
	m := meta.NewClient(url, nil)
	format := &meta.Format{
		Name:      "test",
		UUID:      uuid.New().String(),
		Storage:   "file",
		Bucket:    os.TempDir() + "/",
		BlockSize: 4096,
		DirStats:  true,
	}
	err := m.Init(format, true)
	if err != nil {
		log.Fatalf("format: %s", err)
	}
}

func mount(url, mp string) {
	if err := os.MkdirAll(mp, 0777); err != nil {
		log.Fatalf("create %s: %s", mp, err)
	}

	metaConf := meta.DefaultConf()
	metaConf.MountPoint = mp
	m := meta.NewClient(url, metaConf)
	format, err := m.Load(true)
	if err != nil {
		log.Fatalf("load setting: %s", err)
	}

	chunkConf := chunk.Config{
		BlockSize:   format.BlockSize * 1024,
		Compress:    format.Compression,
		MaxUpload:   20,
		MaxDownload: 200,
		BufferSize:  300 << 20,
		CacheSize:   1024,
		CacheDir:    "memory",
	}

	blob, err := object.CreateStorage(strings.ToLower(format.Storage), format.Bucket, format.AccessKey, format.SecretKey, format.SessionToken)
	if err != nil {
		log.Fatalf("object storage: %s", err)
	}
	blob = object.WithPrefix(blob, format.Name+"/")
	store := chunk.NewCachedStore(blob, chunkConf, nil)

	m.OnMsg(meta.CompactChunk, meta.MsgCallback(func(args ...interface{}) error {
		slices := args[0].([]meta.Slice)
		sliceId := args[1].(uint64)
		return vfs.Compact(chunkConf, store, slices, sliceId)
	}))

	conf := &vfs.Config{
		Meta:     metaConf,
		Format:   *format,
		Chunk:    &chunkConf,
		FuseOpts: &vfs.FuseOptions{},
	}

	err = m.NewSession(true)
	if err != nil {
		log.Fatalf("new session: %s", err)
	}

	conf.AttrTimeout = time.Second
	conf.EntryTimeout = time.Second
	conf.DirEntryTimeout = time.Second
	conf.HideInternal = true
	v := vfs.NewVFS(conf, m, store, nil, nil)
	err = Serve(v, "", true, true)
	if err != nil {
		log.Fatalf("fuse server err: %s\n", err)
	}
	_ = m.CloseSession()
}

func umount(mp string, force bool) {
	var cmd *exec.Cmd
	if _, err := exec.LookPath("fusermount"); err == nil {
		if force {
			cmd = exec.Command("fusermount", "-uz", mp)
		} else {
			cmd = exec.Command("fusermount", "-u", mp)
		}
	} else {
		if force {
			cmd = exec.Command("umount", "-l", mp)
		} else {
			cmd = exec.Command("umount", mp)
		}
	}

	out, err := cmd.CombinedOutput()
	if err != nil {
		log.Print(string(out))
	}
}

func waitMountpoint(mp string) chan error {
	ch := make(chan error, 1)
	for i := 0; i < 20; i++ {
		time.Sleep(time.Millisecond * 500)
		st, err := os.Stat(mp)
		if err == nil {
			if sys, ok := st.Sys().(*syscall.Stat_t); ok && sys.Ino == 1 {
				ch <- nil
				return ch
			}
		}
	}
	ch <- errors.New("not ready in 10 seconds")
	return ch
}

func setUp(metaUrl, mp string) error {
	format(metaUrl)
	go mount(metaUrl, mp)
	return <-waitMountpoint(mp)
}

func cleanup(mp string) {
	parent, err := os.Open(mp)
	if err != nil {
		return
	}
	defer parent.Close()
	names, err := parent.Readdirnames(-1)
	if err != nil {
		return
	}
	for _, n := range names {
		os.RemoveAll(filepath.Join(mp, n))
	}
}

func StatFS(t *testing.T, mp string) {
	var st syscall.Statfs_t
	if err := syscall.Statfs(mp, &st); err != nil {
		t.Fatal(err)
	}
	if st.Bsize != 4096 {
		t.Fatalf("bsize should be 4096 but got %d ", st.Bsize)
	}
	if st.Blocks-st.Bavail != 0 {
		t.Fatalf("used blocks should be 0 but got %d", st.Blocks-st.Bavail)
	}
	if st.Files-st.Ffree != 0 {
		t.Fatalf("used files should be 0 but got %d", st.Files)
	}
}

func Xattrs(t *testing.T, mp string) {
	path := filepath.Join(mp, "myfile")
	os.WriteFile(path, []byte(""), 0644)

	const prefix = "user."
	var value = []byte("test-attr-value")
	if err := xattr.Set(path, prefix+"test", value); err != nil {
		t.Fatal(err)
	}
	if _, err := xattr.List(path); err != nil {
		t.Fatal(err)
	}

	if data, err := xattr.Get(path, prefix+"test"); err != nil {
		t.Fatal(err)
	} else if !bytes.Equal(data, value) {
		t.Fatalf("expect %v bot got %v", value, data)
	}
	if err := xattr.Remove(path, prefix+"test"); err != nil {
		t.Fatal(err)
	}
	// One can also specify the flags parameter to be passed to the OS.
	if err := xattr.SetWithFlags(path, prefix+"test", []byte("test-attr-value2"), xattr.XATTR_CREATE); err != nil {
		t.Fatal(err)
	}
}

func Flock(t *testing.T, mp string) {
	path := filepath.Join(mp, "go-lock.lock")
	os.WriteFile(path, []byte(""), 0644)

	fileLock := flock.New(path)
	locked, err := fileLock.TryLock()
	if err != nil {
		t.Fatalf("try lock: %s", err)
	}
	if locked {
		fileLock.Unlock()
	} else {
		t.Fatal("no lock")
	}
}

func PosixLock(t *testing.T, mp string) {
	path := filepath.Join(mp, "go-lock.lock")
	f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644)
	if err != nil {
		t.Fatal(err)
	}
	defer f.Close()
	f.WriteString("hello")
	if err := f.Sync(); err != nil {
		t.Fatalf("fsync: %s", err)
	}
	var fl syscall.Flock_t
	fl.Pid = int32(os.Getpid())
	fl.Type = syscall.F_WRLCK
	fl.Whence = io.SeekStart
	err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &fl)
	for err == syscall.EAGAIN {
		err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &fl)
	}
	if err != nil {
		t.Fatalf("lock: %s", err)
	}
	if err = syscall.FcntlFlock(f.Fd(), syscall.F_GETLK, &fl); err != nil {
		t.Fatalf("getlk: %s", err)
	}
	if int(fl.Pid) != os.Getpid() {
		t.Fatalf("pid: %d != %d", fl.Pid, os.Getpid())
	}
	fl.Type = syscall.F_UNLCK
	if err = syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &fl); err != nil {
		t.Fatalf("unlock: %s", err)
	}
}

func TestFUSE(t *testing.T) {
	f, err := os.CreateTemp("", "meta")
	if err != nil {
		t.Fatal(err)
	}
	defer os.Remove(f.Name())
	metaUrl := "sqlite3://" + f.Name()
	mp, err := os.MkdirTemp("", "mp")
	if err != nil {
		t.Fatal(err)
	}
	err = setUp(metaUrl, mp)
	if err != nil {
		t.Fatalf("setup: %s", err)
	}
	defer umount(mp, true)

	t.Run("StatFS", func(t *testing.T) {
		StatFS(t, mp)
	})
	delete(posixtest.All, "FdLeak")
	delete(posixtest.All, "FcntlFlockLocksFile") // FIXME: check gofuse in posixtest/posixtest_test.go
	posixtest.All["Xattrs"] = Xattrs
	posixtest.All["Flock"] = Flock
	posixtest.All["POSIXLock"] = PosixLock
	for c, f := range posixtest.All {
		cleanup(mp)
		t.Run(c, func(t *testing.T) {
			f(t, mp)
		})
	}
}


================================================
FILE: pkg/fuse/gidcache.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fuse

import (
	"bytes"
	"fmt"
	"io"
	"os"
	"runtime"
	"strconv"
	"sync"
	"time"
)

type cItem struct {
	gids   []uint32
	expire time.Time
}

type gidCache struct {
	sync.Mutex
	groups  map[uint32]*cItem
	cacheto time.Duration
}

func newGidCache(cacheto time.Duration) *gidCache {
	g := &gidCache{
		groups:  make(map[uint32]*cItem),
		cacheto: cacheto,
	}
	go g.cleanup()
	return g
}

func (g *gidCache) cleanup() {
	for {
		g.Lock()
		now := time.Now()
		for k, gs := range g.groups {
			if gs.expire.Before(now) {
				delete(g.groups, k)
			}
		}
		g.Unlock()
		time.Sleep(time.Second * 10)
	}
}

func findProcessGroups(pid, gid uint32) []uint32 {
	if runtime.GOOS == "darwin" {
		return []uint32{gid}
	}
	path := fmt.Sprintf("/proc/%d/status", pid)
	f, err := os.Open(path)
	if err != nil {
		return []uint32{gid}
	}
	defer f.Close()
	buf, err := io.ReadAll(f)
	if err != nil {
		return []uint32{gid}
	}

	p := bytes.Index(buf, []byte("Groups:"))
	if p < 0 {
		return []uint32{gid}
	}
	buf = buf[p+7:]
	last := bytes.IndexByte(buf, '\n')
	if last >= 0 {
		buf = buf[:last]
	}
	parts := bytes.Split(buf, []byte(" "))
	gids := []uint32{gid}
	for _, p := range parts {
		g, err := strconv.Atoi(string(bytes.TrimSpace(p)))
		if err == nil && uint32(g) != gid {
			gids = append(gids, uint32(g))
		}
	}
	return gids
}

func (g *gidCache) get(pid, gid uint32) []uint32 {
	if g.cacheto == 0 || pid == 0 || gid == 0 {
		return []uint32{gid}
	}
	now := time.Now()
	g.Lock()
	defer g.Unlock()
	it := g.groups[pid]
	if it != nil && it.expire.Before(now) {
		it = nil
	}
	if it == nil {
		it = &cItem{findProcessGroups(pid, gid), now.Add(g.cacheto)}
		g.groups[pid] = it
	}
	return it.gids
}


================================================
FILE: pkg/fuse/utils.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fuse

import (
	"github.com/juicedata/juicefs/pkg/meta"

	"github.com/hanwen/go-fuse/v2/fuse"
)

func attrToStat(inode Ino, attr *Attr, out *fuse.Attr) {
	out.Ino = uint64(inode)
	out.Uid = attr.Uid
	out.Gid = attr.Gid
	out.Mode = attr.SMode()
	out.Nlink = attr.Nlink
	out.Atime = uint64(attr.Atime)
	out.Atimensec = attr.Atimensec
	out.Mtime = uint64(attr.Mtime)
	out.Mtimensec = attr.Mtimensec
	out.Ctime = uint64(attr.Ctime)
	out.Ctimensec = attr.Ctimensec

	var size, blocks uint64
	switch attr.Typ {
	case meta.TypeDirectory:
		fallthrough
	case meta.TypeSymlink:
		fallthrough
	case meta.TypeFile:
		size = attr.Length
		blocks = (size + 511) / 512
	case meta.TypeBlockDev:
		fallthrough
	case meta.TypeCharDev:
		out.Rdev = attr.Rdev
	}
	out.Size = size
	out.Blocks = blocks
	setBlksize(out, 0x10000)
}


================================================
FILE: pkg/gateway/gateway.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gateway

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"os"
	"path"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	jsoniter "github.com/json-iterator/go"
	"github.com/minio/minio-go/v7/pkg/tags"
	"github.com/minio/minio/pkg/bucket/policy"
	"github.com/minio/minio/pkg/madmin"
	"golang.org/x/sync/errgroup"

	"github.com/google/uuid"
	"github.com/minio/minio-go/v7/pkg/s3utils"
	minio "github.com/minio/minio/cmd"
	xhttp "github.com/minio/minio/cmd/http"

	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
)

const (
	sep          = "/"
	metaBucket   = ".sys"
	subDirPrefix = 3 // 16^3=4096 slots
)

var mctx meta.Context
var logger = utils.GetLogger("juicefs")

type Config struct {
	MultiBucket bool
	KeepEtag    bool
	Umask       uint16
	ObjTag      bool
	ObjMeta     bool
	HeadDir     bool
	HideDir     bool
	ReadOnly    bool
}

func NewJFSGateway(jfs *fs.FileSystem, conf *vfs.Config, gConf *Config) (minio.ObjectLayer, error) {
	mctx = meta.NewContext(uint32(os.Getpid()), uint32(utils.GetCurrentUID()), []uint32{uint32(utils.GetCurrentGID())})
	jfsObj := &jfsObjects{fs: jfs, conf: conf, listPool: minio.NewTreeWalkPool(time.Second * 10), gConf: gConf, nsMutex: minio.NewNSLock(false)}
	go jfsObj.cleanup()
	return jfsObj, nil
}

type jfsObjects struct {
	conf     *vfs.Config
	fs       *fs.FileSystem
	listPool *minio.TreeWalkPool
	nsMutex  *minio.NsLockMap
	gConf    *Config
}

func (n *jfsObjects) PutObjectMetadata(ctx context.Context, s string, s2 string, options minio.ObjectOptions) (minio.ObjectInfo, error) {
	return minio.ObjectInfo{}, minio.NotImplemented{}
}

func (n *jfsObjects) NSScanner(ctx context.Context, bf *minio.BloomFilter, updates chan<- madmin.DataUsageInfo) error {
	return nil
}

func (n *jfsObjects) IsCompressionSupported() bool {
	return false
}

func (n *jfsObjects) IsEncryptionSupported() bool {
	return false
}

// IsReady returns whether the layer is ready to take requests.
func (n *jfsObjects) IsReady(_ context.Context) bool {
	return true
}

func (n *jfsObjects) Shutdown(ctx context.Context) error {
	return n.fs.Close()
}

func (n *jfsObjects) StorageInfo(ctx context.Context) (info minio.StorageInfo, errors []error) {
	sinfo := minio.StorageInfo{}
	sinfo.Backend.Type = madmin.FS
	return sinfo, nil
}

func jfsToObjectErr(ctx context.Context, err error, params ...string) error {
	if err == nil {
		return nil
	}
	bucket := ""
	object := ""
	uploadID := ""
	switch len(params) {
	case 3:
		uploadID = params[2]
		fallthrough
	case 2:
		object = params[1]
		fallthrough
	case 1:
		bucket = params[0]
	}

	if eno, ok := err.(syscall.Errno); !ok {
		logger.Errorf("error: %s bucket: %s, object: %s, uploadID: %s", err, bucket, object, uploadID)
		return err
	} else if eno == 0 {
		return nil
	}

	switch {
	case fs.IsNotExist(err):
		if uploadID != "" {
			return minio.InvalidUploadID{
				UploadID: uploadID,
			}
		}
		if object != "" {
			return minio.ObjectNotFound{Bucket: bucket, Object: object}
		}
		return minio.BucketNotFound{Bucket: bucket}
	case fs.IsExist(err):
		if object != "" {
			return minio.PrefixAccessDenied{Bucket: bucket, Object: object}
		}
		return minio.BucketAlreadyOwnedByYou{Bucket: bucket}
	case fs.IsNotEmpty(err):
		if object != "" {
			return minio.PrefixAccessDenied{Bucket: bucket, Object: object}
		}
		return minio.BucketNotEmpty{Bucket: bucket}
	default:
		logger.Errorf("other error: %s bucket: %s, object: %s, uploadID: %s", err, bucket, object, uploadID)
		return err
	}
}

// isValidBucketName verifies whether a bucket name is valid.
func (n *jfsObjects) isValidBucketName(bucket string) error {
	if strings.HasPrefix(bucket, minio.MinioMetaBucket) {
		return nil
	}
	if s3utils.CheckValidBucketNameStrict(bucket) != nil {
		return minio.BucketNameInvalid{Bucket: bucket}
	}
	if !n.gConf.MultiBucket && bucket != n.conf.Format.Name {
		return minio.BucketNotFound{Bucket: bucket}
	}
	return nil
}

func (n *jfsObjects) path(p ...string) string {
	if !n.gConf.MultiBucket && len(p) > 0 && p[0] == n.conf.Format.Name {
		p = p[1:]
	}
	return sep + minio.PathJoin(p...)
}

func (n *jfsObjects) tpath(p ...string) string {
	return sep + metaBucket + n.path(p...)
}

func (n *jfsObjects) upath(bucket, uploadID string) string {
	return n.tpath(bucket, "uploads", uploadID[:subDirPrefix], uploadID)
}

func (n *jfsObjects) ppath(bucket, uploadID, part string) string {
	return n.tpath(bucket, "uploads", uploadID[:subDirPrefix], uploadID, part)
}

func (n *jfsObjects) ppathFlat(bucket, uploadID, part string) string { // compatible with tmp files uploaded by old versions(<1.2)
	return n.tpath(bucket, "uploads", uploadID, part)
}

func (n *jfsObjects) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error {
	if err := n.isValidBucketName(bucket); err != nil {
		return err
	}
	if !n.gConf.MultiBucket {
		return minio.BucketNotEmpty{Bucket: bucket}
	}
	if eno := n.fs.Delete(mctx, n.path(minio.MinioMetaBucket, minio.BucketMetaPrefix, bucket, minio.BucketMetadataFile)); eno != 0 {
		logger.Errorf("delete bucket metadata: %s", eno)
	}
	_ = n.fs.Delete(mctx, n.path(minio.MinioMetaBucket, minio.BucketMetaPrefix, bucket))
	eno := n.fs.Delete(mctx, n.path(bucket))
	return jfsToObjectErr(ctx, eno, bucket)
}

func (n *jfsObjects) MakeBucketWithLocation(ctx context.Context, bucket string, options minio.BucketOptions) error {
	if bucket != minio.MinioMetaBucket {
		if err := n.isValidBucketName(bucket); err != nil {
			return err
		}
		if !n.gConf.MultiBucket {
			return nil
		}
	}
	eno := n.fs.Mkdir(mctx, n.path(bucket), 0777, n.gConf.Umask)
	if eno == 0 {
		metadata := minio.NewBucketMetadata(bucket)
		if err := metadata.Save(ctx, n); err != nil {
			return err
		}
	}
	return jfsToObjectErr(ctx, eno, bucket)
}

func (n *jfsObjects) GetBucketInfo(ctx context.Context, bucket string) (bi minio.BucketInfo, err error) {
	if err := n.isValidBucketName(bucket); err != nil {
		return bi, err
	}
	fi, eno := n.fs.Stat(mctx, n.path(bucket))
	if eno == 0 {
		bi = minio.BucketInfo{
			Name:    bucket,
			Created: time.Unix(fi.Atime()/1000, 0),
		}
	}
	return bi, jfsToObjectErr(ctx, eno, bucket)
}

// Ignores all reserved bucket names or invalid bucket names.
func isReservedOrInvalidBucket(bucketEntry string, strict bool) bool {
	if err := s3utils.CheckValidBucketName(bucketEntry); err != nil {
		return true
	}
	return bucketEntry == metaBucket
}

func (n *jfsObjects) ListBuckets(ctx context.Context) (buckets []minio.BucketInfo, err error) {
	if !n.gConf.MultiBucket {
		fi, eno := n.fs.Stat(mctx, "/")
		if eno != 0 {
			return nil, jfsToObjectErr(ctx, eno)
		}
		buckets = []minio.BucketInfo{{
			Name:    n.conf.Format.Name,
			Created: time.Unix(fi.Atime()/1000, 0),
		}}
		return buckets, nil
	}
	f, eno := n.fs.Open(mctx, sep, 0)
	if eno != 0 {
		return nil, jfsToObjectErr(ctx, eno)
	}
	defer f.Close(mctx)
	entries, eno := f.Readdir(mctx, 10000)
	if eno != 0 {
		return nil, jfsToObjectErr(ctx, eno)
	}

	for _, entry := range entries {
		// Ignore all reserved bucket names and invalid bucket names.
		if isReservedOrInvalidBucket(entry.Name(), false) || n.isValidBucketName(entry.Name()) != nil {
			continue
		}
		if entry.IsDir() {
			buckets = append(buckets, minio.BucketInfo{
				Name:    entry.Name(),
				Created: time.Unix(entry.(*fs.FileStat).Atime()/1000, 0),
			})
		}
	}

	// Sort bucket infos by bucket name.
	sort.Slice(buckets, func(i, j int) bool {
		return buckets[i].Name < buckets[j].Name
	})
	return buckets, nil
}

func (n *jfsObjects) isLeafDir(bucket, leafPath string) bool {
	return false
}

func (n *jfsObjects) isLeaf(bucket, leafPath string) bool {
	return !strings.HasSuffix(leafPath, "/")
}

func (n *jfsObjects) listDirFactory() minio.ListDirFunc {
	return func(bucket, prefixDir, prefixEntry string) (emptyDir bool, entries []*minio.Entry, delayIsLeaf bool) {
		f, eno := n.fs.Open(mctx, n.path(bucket, prefixDir), 0)
		if eno != 0 {
			return fs.IsNotExist(eno), nil, false
		}
		defer f.Close(mctx)
		if !n.gConf.HideDir {
			if fi, _ := f.Stat(); fi.(*fs.FileStat).Atime() == 0 && prefixEntry == "" {
				entries = append(entries, &minio.Entry{Name: ""})
			}
		}

		fis, eno := f.Readdir(mctx, 0)
		if eno != 0 {
			return
		}
		root := n.path(bucket, prefixDir) == "/"
		for _, fi := range fis {
			if root && (fi.Name() == metaBucket || fi.Name() == minio.MinioMetaBucket) {
				continue
			}
			if stat, ok := fi.(*fs.FileStat); ok && stat.IsSymlink() {
				var err syscall.Errno
				p := n.path(bucket, prefixDir, fi.Name())
				if fi, err = n.fs.Stat(mctx, p); err != 0 {
					logger.Errorf("stat %s: %s", p, err)
					continue
				}
			}
			entry := &minio.Entry{Name: fi.Name(), Info: fi}

			if fi.IsDir() {
				entry.Name += sep
			}
			entries = append(entries, entry)
		}
		if len(entries) == 0 {
			return true, nil, false
		}
		entries, delayIsLeaf = minio.FilterListEntries(bucket, prefixDir, entries, prefixEntry, n.isLeaf)
		return false, entries, delayIsLeaf
	}
}

func (n *jfsObjects) checkBucket(ctx context.Context, bucket string) error {
	if err := n.isValidBucketName(bucket); err != nil {
		return err
	}
	bucketPath := n.path(bucket)
	if bucketPath != "/" { // no need to stat "/" in every request
		if _, eno := n.fs.Stat(mctx, bucketPath); eno != 0 {
			return jfsToObjectErr(ctx, eno, bucket)
		}
	}
	return nil
}

// ListObjects lists all blobs in JFS bucket filtered by prefix.
func (n *jfsObjects) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi minio.ListObjectsInfo, err error) {
	if err := n.checkBucket(ctx, bucket); err != nil {
		return loi, err
	}
	getObjectInfo := func(ctx context.Context, bucket, object string, fi_ any) (obj minio.ObjectInfo, err error) {
		var eno syscall.Errno
		var info *minio.ObjectInfo
		if fi_ == nil {
			var fi *fs.FileStat
			fi, eno = n.fs.Stat(mctx, n.path(bucket, object))
			if eno == 0 {
				size := fi.Size()
				if fi.IsDir() {
					size = 0
				}
				info = &minio.ObjectInfo{
					Bucket:   bucket,
					ModTime:  fi.ModTime(),
					Size:     size,
					IsDir:    fi.IsDir(),
					AccTime:  fi.ModTime(),
					IsLatest: true,
				}
			}

			// replace links to external file systems with empty files
			if errors.Is(eno, syscall.ENOTSUP) {
				now := time.Now()
				info = &minio.ObjectInfo{
					Bucket:   bucket,
					ModTime:  now,
					Size:     0,
					IsDir:    false,
					AccTime:  now,
					IsLatest: true,
				}
				eno = 0
			}
		} else {
			fi := fi_.(*fs.FileStat)
			info = &minio.ObjectInfo{
				Bucket:   bucket,
				Name:     fi.Name(),
				ModTime:  fi.ModTime(),
				Size:     fi.Size(),
				IsDir:    fi.IsDir(),
				AccTime:  fi.ModTime(),
				IsLatest: true,
			}
			if fi.IsDir() {
				info.Size = 0
			}
		}

		if info == nil {
			return obj, jfsToObjectErr(ctx, eno, bucket, object)
		}
		info.Name = object
		if n.gConf.KeepEtag && !strings.HasSuffix(object, sep) {
			etag, _ := n.fs.GetXattr(mctx, n.path(bucket, object), s3Etag)
			info.ETag = string(etag)
		}
		return *info, jfsToObjectErr(ctx, eno, bucket, object)
	}

	if maxKeys == 0 {
		maxKeys = -1 // list as many objects as possible
	}
	return minio.ListObjects(ctx, n, bucket, prefix, marker, delimiter, maxKeys, n.listPool, n.listDirFactory(), n.isLeaf, n.isLeafDir, getObjectInfo, getObjectInfo)
}

// ListObjectsV2 lists all blobs in JFS bucket filtered by prefix
func (n *jfsObjects) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int,
	fetchOwner bool, startAfter string) (loi minio.ListObjectsV2Info, err error) {
	if err := n.isValidBucketName(bucket); err != nil {
		return minio.ListObjectsV2Info{}, err
	}
	// fetchOwner is not supported and unused.
	marker := continuationToken
	if marker == "" {
		marker = startAfter
	}
	resultV1, err := n.ListObjects(ctx, bucket, prefix, marker, delimiter, maxKeys)
	if err == nil {
		loi = minio.ListObjectsV2Info{
			Objects:               resultV1.Objects,
			Prefixes:              resultV1.Prefixes,
			ContinuationToken:     continuationToken,
			NextContinuationToken: resultV1.NextMarker,
			IsTruncated:           resultV1.IsTruncated,
		}
	}
	return loi, err
}

func (n *jfsObjects) setFileAtime(p string, atime int64) {
	if f, eno := n.fs.Open(mctx, p, 0); eno == 0 {
		defer f.Close(mctx)
		if eno := f.Utime(mctx, atime, -1); eno != 0 {
			logger.Warnf("set atime of %s: %s", p, eno)
		}
	} else if eno != syscall.ENOENT {
		logger.Warnf("open %s: %s", p, eno)
	}
}

func (n *jfsObjects) DeleteObject(ctx context.Context, bucket, object string, options minio.ObjectOptions) (info minio.ObjectInfo, err error) {
	if err = n.checkBucket(ctx, bucket); err != nil {
		return
	}
	err = n.delObj(bucket, object)
	info.Bucket = bucket
	info.Name = object
	return info, jfsToObjectErr(ctx, err, bucket, object)
}

func (n *jfsObjects) delObj(bucket string, object string) error {
	p := path.Clean(n.path(bucket, object))
	root := n.path(bucket)
	if strings.HasSuffix(object, sep) {
		// reset atime
		n.setFileAtime(p, time.Now().Unix())
	}
	var err error
	for p != root {
		if eno := n.fs.Delete(mctx, p); eno != 0 {
			if fs.IsNotEmpty(eno) || fs.IsNotExist(eno) {
				err = nil
			} else {
				err = eno
			}
			break
		}
		p = path.Dir(p)
		if fi, _ := n.fs.Stat(mctx, p); fi == nil || fi.Atime() == 0 {
			break
		}
	}
	return err
}

func (n *jfsObjects) DeleteObjects(ctx context.Context, bucket string, objects []minio.ObjectToDelete, options minio.ObjectOptions) (objs []minio.DeletedObject, errs []error) {
	objs = make([]minio.DeletedObject, len(objects))
	errs = make([]error, len(objects))
	if err := n.checkBucket(ctx, bucket); err != nil {
		for idx := range objects {
			errs[idx] = minio.BucketNotFound{Bucket: bucket}
		}
		return
	}
	delMap := make(map[string][]int)
	for idx, o := range objects {
		p := path.Dir(path.Clean(n.path(bucket, o.ObjectName)))
		delMap[p] = append(delMap[p], idx)
	}
	var g errgroup.Group
	g.SetLimit(runtime.NumCPU())
	for ppath := range delMap {
		ppath := ppath
		idxs := delMap[ppath]
		ps := make([]string, len(idxs))
		for i, idx := range idxs {
			ps[i] = n.path(bucket, objects[idx].ObjectName)
		}
		g.Go(func() error {
			// will ignore dir
			err := n.fs.BatchDeleteEntries(mctx, ppath, ps)
			if err != 0 {
				for _, idx := range idxs {
					errs[idx] = jfsToObjectErr(ctx, err, bucket, objects[idx].ObjectName)
				}
				return err
			}
			if e := n.delObj(bucket, ppath); e != nil {
				for _, idx := range idxs {
					errs[idx] = e
				}
				return err
			}
			return err
		})
	}
	_ = g.Wait()
	return
}

type fReader struct {
	*fs.File
}

func (f *fReader) Read(b []byte) (int, error) {
	return f.File.Read(mctx, b)
}

func (n *jfsObjects) GetObjectNInfo(ctx context.Context, bucket, object string, rs *minio.HTTPRangeSpec, h http.Header, lockType minio.LockType, opts minio.ObjectOptions) (gr *minio.GetObjectReader, err error) {
	objInfo, err := n.GetObjectInfo(ctx, bucket, object, opts)
	if err != nil {
		return nil, err
	}

	var startOffset, length int64
	startOffset, length, err = rs.GetOffsetLength(objInfo.Size)
	if err != nil {
		return
	}
	f, eno := n.fs.Open(mctx, n.path(bucket, object), vfs.MODE_MASK_R)
	if eno != 0 {
		return nil, jfsToObjectErr(ctx, eno, bucket, object)
	}
	_, _ = f.Seek(mctx, startOffset, 0)
	r := &io.LimitedReader{R: &fReader{f}, N: length}
	closer := func() { _ = f.Close(mctx) }
	return minio.NewGetObjectReaderFromReader(r, objInfo, opts, closer)
}

func (n *jfsObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo minio.ObjectInfo, srcOpts, dstOpts minio.ObjectOptions) (info minio.ObjectInfo, err error) {
	if err = n.checkBucket(ctx, srcBucket); err != nil {
		return
	}
	if err = n.checkBucket(ctx, dstBucket); err != nil {
		return
	}
	dst := n.path(dstBucket, dstObject)
	src := n.path(srcBucket, srcObject)

	if minio.IsStringEqual(src, dst) {
		// if we copy the same object for set metadata
		err = n.setObjMeta(dst, srcInfo.UserDefined)
		if err != nil {
			logger.Errorf("set object metadata error, path: %s error %s", dst, err)
		}
		return n.GetObjectInfo(ctx, srcBucket, srcObject, minio.ObjectOptions{})
	}
	uuid := minio.MustGetUUID()
	tmp := n.tpath(dstBucket, "tmp", uuid[:subDirPrefix], uuid)
	f, eno := n.fs.Create(mctx, tmp, 0666, n.gConf.Umask)
	if eno == syscall.ENOENT {
		_ = n.mkdirAll(ctx, path.Dir(tmp))
		f, eno = n.fs.Create(mctx, tmp, 0666, n.gConf.Umask)
	}
	if eno != 0 {
		logger.Errorf("create %s: %s", tmp, eno)
		return
	}
	defer func() {
		_ = f.Close(mctx)
		if err != nil {
			_ = n.fs.Delete(mctx, tmp)
		}
	}()

	_, eno = n.fs.CopyFileRange(mctx, src, 0, tmp, 0, 1<<63)
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, srcBucket, srcObject)
		logger.Errorf("copy %s to %s: %s", src, tmp, err)
		return
	}

	var etag []byte
	if n.gConf.KeepEtag {
		etag, _ = n.fs.GetXattr(mctx, src, s3Etag)
		if len(etag) != 0 {
			eno = n.fs.SetXattr(mctx, tmp, s3Etag, etag, 0)
			if eno != 0 {
				logger.Warnf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", tmp, s3Etag, etag, 0)
			}
		}
	}

	var tagStr string
	if n.gConf.ObjTag && srcInfo.UserDefined != nil {
		if tagStr = srcInfo.UserDefined[xhttp.AmzObjectTagging]; tagStr != "" {
			if eno := n.fs.SetXattr(mctx, tmp, s3Tags, []byte(tagStr), 0); eno != 0 {
				logger.Errorf("set object tags error, path: %s, value: %s error %s", tmp, tagStr, eno)
			}
		}
	}
	err = n.setObjMeta(tmp, srcInfo.UserDefined)
	if err != nil {
		logger.Errorf("set object metadata error, path: %s error %s", dst, err)
	}

	eno = n.fs.Rename(mctx, tmp, dst, 0)
	if eno == syscall.ENOENT {
		if err = n.mkdirAll(ctx, path.Dir(dst)); err != nil {
			logger.Errorf("mkdirAll %s: %s", path.Dir(dst), err)
			err = jfsToObjectErr(ctx, err, dstBucket, dstObject)
			return
		}
		eno = n.fs.Rename(mctx, tmp, dst, 0)
	}
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, srcBucket, srcObject)
		logger.Errorf("rename %s to %s: %s", tmp, dst, err)
		return
	}
	fi, eno := n.fs.Stat(mctx, dst)
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, dstBucket, dstObject)
		return
	}

	return minio.ObjectInfo{
		Bucket:      dstBucket,
		Name:        dstObject,
		ETag:        string(etag),
		ModTime:     fi.ModTime(),
		Size:        fi.Size(),
		IsDir:       fi.IsDir(),
		AccTime:     fi.ModTime(),
		UserTags:    tagStr,
		UserDefined: minio.CleanMetadata(srcInfo.UserDefined),
		IsLatest:    true,
	}, nil
}

var buffPool = sync.Pool{
	New: func() interface{} {
		buf := make([]byte, 1<<17)
		return &buf
	},
}

func (n *jfsObjects) GetObjectInfo(ctx context.Context, bucket, object string, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) {
	if err = n.checkBucket(ctx, bucket); err != nil {
		return
	}
	fi, eno := n.fs.Stat(mctx, n.path(bucket, object))
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, bucket, object)
		return
	}
	// put /dir1/key1; head /dir1 return 404; head /dir1/ return 404; head /dir1/key1 return 200
	// put /dir1/key1/; head /dir1/key1 return 404; head /dir1/key1/ return 200
	var isObject bool
	if strings.HasSuffix(object, sep) && fi.IsDir() && fi.Atime() == 0 {
		isObject = true
	} else if !strings.HasSuffix(object, sep) && !fi.IsDir() {
		isObject = true
	}
	if !n.gConf.HeadDir && !isObject {
		err = jfsToObjectErr(ctx, syscall.ENOENT, bucket, object)
		return
	}
	var etag []byte
	if n.gConf.KeepEtag && !fi.IsDir() {
		etag, _ = n.fs.GetXattr(mctx, n.path(bucket, object), s3Etag)
	}
	size := fi.Size()
	if fi.IsDir() {
		size = 0
	}
	// key1=value1&key2=value2
	var tagStr []byte
	if n.gConf.ObjTag {
		var errno syscall.Errno
		if tagStr, errno = n.fs.GetXattr(mctx, n.path(bucket, object), s3Tags); errno != 0 && errno != meta.ENOATTR {
			return minio.ObjectInfo{}, errno
		}
	}
	objMeta, err := n.getObjMeta(n.path(bucket, object))
	if err != nil {
		return minio.ObjectInfo{}, err
	}
	if opts.UserDefined == nil {
		opts.UserDefined = make(map[string]string)
	}
	for k, v := range objMeta {
		opts.UserDefined[k] = v
	}
	contentType := utils.GuessMimeType(object)
	if c, exist := objMeta["content-type"]; exist && len(c) > 0 {
		contentType = c
	}
	return minio.ObjectInfo{
		Bucket:      bucket,
		Name:        object,
		ModTime:     fi.ModTime(),
		Size:        size,
		IsDir:       fi.IsDir(),
		AccTime:     fi.ModTime(),
		ETag:        string(etag),
		ContentType: contentType,
		UserTags:    string(tagStr),
		UserDefined: minio.CleanMetadata(opts.UserDefined),
		IsLatest:    true,
	}, nil
}

func (n *jfsObjects) mkdirAll(ctx context.Context, p string) error {
	if fi, eno := n.fs.Stat(mctx, p); eno == 0 {
		if !fi.IsDir() {
			return fmt.Errorf("%s is not directory", p)
		}
		return nil
	}
	eno := n.fs.Mkdir(mctx, p, 0777, n.gConf.Umask)
	if eno != 0 && fs.IsNotExist(eno) {
		if err := n.mkdirAll(ctx, path.Dir(p)); err != nil {
			return err
		}
		eno = n.fs.Mkdir(mctx, p, 0777, n.gConf.Umask)
	}
	if eno != 0 && fs.IsExist(eno) {
		eno = 0
	}
	if eno == 0 {
		return nil
	}
	return eno
}

func (n *jfsObjects) putObject(ctx context.Context, bucket, object string, r *minio.PutObjReader, opts minio.ObjectOptions, applyObjTaggingFunc func(tmpName string)) (err error) {
	uuid := minio.MustGetUUID()
	tmpname := n.tpath(bucket, "tmp", uuid[:subDirPrefix], uuid)
	f, eno := n.fs.Create(mctx, tmpname, 0666, n.gConf.Umask)
	if eno == syscall.ENOENT {
		_ = n.mkdirAll(ctx, path.Dir(tmpname))
		f, eno = n.fs.Create(mctx, tmpname, 0666, n.gConf.Umask)
	}
	if eno != 0 {
		logger.Errorf("create %s: %s", tmpname, eno)
		err = eno
		return
	}
	defer func() {
		if err != nil {
			_ = n.fs.Delete(mctx, tmpname)
		}
	}()
	var buf = buffPool.Get().(*[]byte)
	defer buffPool.Put(buf)
	for {
		var n int
		n, err = io.ReadFull(r, *buf)
		if n == 0 {
			if err == io.EOF {
				err = nil
			}
			break
		}
		_, eno := f.Write(mctx, (*buf)[:n])
		if eno != 0 {
			err = eno
			break
		}
	}
	if err == nil {
		eno = f.Close(mctx)
		if eno != 0 {
			err = eno
		}
	} else {
		_ = f.Close(mctx)
	}
	if err != nil {
		return
	}

	applyObjTaggingFunc(tmpname)

	eno = n.fs.Rename(mctx, tmpname, object, 0)
	if eno == syscall.ENOENT {
		if err = n.mkdirAll(ctx, path.Dir(object)); err != nil {
			logger.Errorf("mkdirAll %s: %s", path.Dir(object), err)
			err = jfsToObjectErr(ctx, err, bucket, object)
			return
		}
		eno = n.fs.Rename(mctx, tmpname, object, 0)
	}
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, bucket, object)
	}
	return
}

func (n *jfsObjects) PutObject(ctx context.Context, bucket string, object string, r *minio.PutObjReader, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) {
	if err = n.checkBucket(ctx, bucket); err != nil {
		return
	}
	var tagStr string
	var etag string
	p := n.path(bucket, object)
	if strings.HasSuffix(object, sep) {
		if err = n.mkdirAll(ctx, p); err != nil {
			err = jfsToObjectErr(ctx, err, bucket, object)
			return
		}
		if r.Size() > 0 {
			err = minio.ObjectExistsAsDirectory{
				Bucket: bucket,
				Object: object,
				Err:    syscall.EEXIST,
			}
			return
		}
		// if the put object is a directory, set its atime to 0
		n.setFileAtime(p, 0)
	} else {
		if err = n.putObject(ctx, bucket, p, r, opts, func(tmpName string) {
			etag = r.MD5CurrentHexString()
			if n.gConf.KeepEtag && !strings.HasSuffix(object, sep) {
				if eno := n.fs.SetXattr(mctx, tmpName, s3Etag, []byte(etag), 0); eno != 0 {
					logger.Errorf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", tmpName, s3Etag, etag, 0)
				}
			}
			// tags: key1=value1&key2=value2&key3=value3
			if n.gConf.ObjTag && opts.UserDefined != nil {
				if tagStr = opts.UserDefined[xhttp.AmzObjectTagging]; tagStr != "" {
					if eno := n.fs.SetXattr(mctx, tmpName, s3Tags, []byte(tagStr), 0); eno != 0 {
						logger.Errorf("set object tags error, path: %s, value: %s error: %s", tmpName, tagStr, eno)
					}
				}
			}
			err = n.setObjMeta(tmpName, opts.UserDefined)
			if err != nil {
				logger.Errorf("set object metadata error, path: %s error %s", p, err)
			}
		}); err != nil {
			return
		}
	}
	fi, eno := n.fs.Stat(mctx, p)
	if eno != 0 {
		return objInfo, jfsToObjectErr(ctx, eno, bucket, object)
	}

	return minio.ObjectInfo{
		Bucket:      bucket,
		Name:        object,
		ETag:        etag,
		ModTime:     fi.ModTime(),
		Size:        fi.Size(),
		IsDir:       fi.IsDir(),
		AccTime:     fi.ModTime(),
		UserTags:    tagStr,
		UserDefined: minio.CleanMetadata(opts.UserDefined),
		IsLatest:    true,
	}, nil
}

func (n *jfsObjects) NewMultipartUpload(ctx context.Context, bucket string, object string, opts minio.ObjectOptions) (uploadID string, err error) {
	if err = n.checkBucket(ctx, bucket); err != nil {
		return
	}
	uploadID = minio.MustGetUUID()
	p := n.upath(bucket, uploadID)
	err = n.mkdirAll(ctx, p)
	if err == nil {
		eno := n.fs.SetXattr(mctx, p, uploadKeyName, []byte(object), 0)
		if eno != 0 {
			logger.Warnf("set object %s on upload %s: %s", object, uploadID, eno)
		}
		if n.gConf.ObjTag && opts.UserDefined != nil {
			if tagStr := opts.UserDefined[xhttp.AmzObjectTagging]; tagStr != "" {
				if eno := n.fs.SetXattr(mctx, p, s3Tags, []byte(tagStr), 0); eno != 0 {
					logger.Errorf("set object tags error, path: %s, value: %s errors: %s", p, tagStr, eno)
				}
			}
		}
		err = n.setObjMeta(p, opts.UserDefined)
		if err != nil {
			logger.Errorf("set object metadata error, path: %s  error %s", p, err)
		}
	}
	return
}

const uploadKeyName = "s3-object"
const s3Etag = "s3-etag"

// less than 64k ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions
const s3Tags = "s3-tags"

// S3 object metadata
const s3Meta = "s3-meta"
const amzMeta = "x-amz-meta-"

var s3UserControlledSystemMeta = []string{
	"cache-control",
	"content-disposition",
	"content-type",
}

func (n *jfsObjects) getObjMeta(p string) (objMeta map[string]string, err error) {
	if n.gConf.ObjMeta {
		var errno syscall.Errno
		var metadataStr []byte
		if metadataStr, errno = n.fs.GetXattr(mctx, p, s3Meta); errno != 0 && errno != meta.ENOATTR {
			return objMeta, errno
		}
		if len(metadataStr) > 0 {
			err = json.Unmarshal(metadataStr, &objMeta)
			return objMeta, err
		}
	} else {
		objMeta = make(map[string]string)
	}
	return objMeta, nil
}

func (n *jfsObjects) setObjMeta(p string, metadata map[string]string) error {
	if n.gConf.ObjMeta && metadata != nil {
		meta := make(map[string]string)
		for k, v := range metadata {
			k = strings.ToLower(k)
			if strings.HasPrefix(k, amzMeta) {
				meta[k] = v
			} else {
				for _, systemMetaKey := range s3UserControlledSystemMeta {
					if k == systemMetaKey {
						meta[k] = v
						break
					}
				}
			}
		}
		if len(meta) > 0 {
			s3MetadataValue, err := json.Marshal(meta)
			if err != nil {
				return err
			}
			if eno := n.fs.SetXattr(mctx, p, s3Meta, s3MetadataValue, 0); eno != 0 {
				logger.Errorf("set object metadata error, path: %s,value: %s error: %s", p, string(s3Meta), eno)
			}
		}
	}
	return nil
}

func (n *jfsObjects) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi minio.ListMultipartsInfo, err error) {
	if err = n.checkBucket(ctx, bucket); err != nil {
		return
	}
	f, eno := n.fs.Open(mctx, n.tpath(bucket, "uploads"), 0)
	if eno != 0 {
		return // no found
	}
	defer f.Close(mctx)
	parents, eno := f.ReaddirPlus(mctx, 0)
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, bucket)
		return
	}
	lmi.Prefix = prefix
	lmi.KeyMarker = keyMarker
	lmi.UploadIDMarker = uploadIDMarker
	lmi.MaxUploads = maxUploads
	lmi.Delimiter = delimiter
	commPrefixSet := make(map[string]struct{})
	for _, p := range parents {
		f, eno := n.fs.Open(mctx, n.tpath(bucket, "uploads", string(p.Name)), 0)
		if eno != 0 {
			return
		}
		defer f.Close(mctx)
		entries, eno := f.ReaddirPlus(mctx, 0)
		if eno != 0 {
			err = jfsToObjectErr(ctx, eno, bucket)
			return
		}

		for _, e := range entries {
			if len(e.Name) != 36 {
				continue // not an uuid
			}
			uploadID := string(e.Name)
			// todo: parallel
			object_, eno := n.fs.GetXattr(mctx, n.upath(bucket, uploadID), uploadKeyName)
			if eno != 0 {
				logger.Warnf("get object xattr error %s: %s, ignore this item", n.upath(bucket, uploadID), eno)
				continue
			}
			object := string(object_)
			if strings.HasPrefix(object, prefix) {
				if keyMarker != "" && object+uploadID > keyMarker+uploadIDMarker || keyMarker == "" {
					lmi.Uploads = append(lmi.Uploads, minio.MultipartInfo{
						Object:    object,
						UploadID:  uploadID,
						Initiated: time.Unix(e.Attr.Atime, int64(e.Attr.Atimensec)),
					})
				}
			}
		}
	}

	sort.Slice(lmi.Uploads, func(i, j int) bool {
		if lmi.Uploads[i].Object == lmi.Uploads[j].Object {
			return lmi.Uploads[i].UploadID < lmi.Uploads[j].UploadID
		} else {
			return lmi.Uploads[i].Object < lmi.Uploads[j].Object
		}
	})

	if delimiter != "" {
		var tmp []minio.MultipartInfo
		for _, info := range lmi.Uploads {
			if maxUploads == 0 {
				lmi.IsTruncated = true
				break
			}
			index := strings.Index(strings.TrimPrefix(info.Object, prefix), delimiter)
			if index == -1 {
				tmp = append(tmp, info)
				maxUploads--
			} else {
				commPrefix := info.Object[:index+1]
				if _, ok := commPrefixSet[commPrefix]; ok {
					continue
				}
				commPrefixSet[commPrefix] = struct{}{}
				maxUploads--
			}
		}
		lmi.Uploads = tmp
		for prefix := range commPrefixSet {
			lmi.CommonPrefixes = append(lmi.CommonPrefixes, prefix)
		}
		sort.Strings(lmi.CommonPrefixes)
	} else if len(lmi.Uploads) > maxUploads {
		lmi.IsTruncated = true
		lmi.Uploads = lmi.Uploads[:maxUploads]
	}

	if len(lmi.Uploads) != 0 {
		lmi.NextKeyMarker = lmi.Uploads[len(lmi.Uploads)-1].Object
		lmi.NextUploadIDMarker = lmi.Uploads[len(lmi.Uploads)-1].UploadID
	}
	return lmi, jfsToObjectErr(ctx, err, bucket)
}

func (n *jfsObjects) checkUploadIDExists(ctx context.Context, bucket, object, uploadID string) (err error) {
	if err = n.checkBucket(ctx, bucket); err != nil {
		return
	}
	_, eno := n.fs.Stat(mctx, n.upath(bucket, uploadID))
	return jfsToObjectErr(ctx, eno, bucket, object, uploadID)
}

func (n *jfsObjects) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker int, maxParts int, opts minio.ObjectOptions) (result minio.ListPartsInfo, err error) {
	if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil {
		return result, err
	}
	f, e := n.fs.Open(mctx, n.upath(bucket, uploadID), 0)
	if e != 0 {
		err = jfsToObjectErr(ctx, e, bucket, object, uploadID)
		return
	}
	defer func() { _ = f.Close(mctx) }()
	entries, e := f.ReaddirPlus(mctx, 0)
	if e != 0 {
		err = jfsToObjectErr(ctx, e, bucket, object, uploadID)
		return
	}
	result.Bucket = bucket
	result.Object = object
	result.UploadID = uploadID
	result.PartNumberMarker = partNumberMarker
	result.MaxParts = maxParts
	for _, entry := range entries {
		num, er := strconv.Atoi(string(entry.Name))
		if er == nil && num > partNumberMarker {
			etag, _ := n.fs.GetXattr(mctx, n.ppath(bucket, uploadID, string(entry.Name)), s3Etag)
			result.Parts = append(result.Parts, minio.PartInfo{
				PartNumber:   num,
				Size:         int64(entry.Attr.Length),
				LastModified: time.Unix(entry.Attr.Mtime, 0),
				ETag:         string(etag),
			})
		}
	}
	sort.Slice(result.Parts, func(i, j int) bool {
		return result.Parts[i].PartNumber < result.Parts[j].PartNumber
	})
	if len(result.Parts) > maxParts {
		result.IsTruncated = true
		result.Parts = result.Parts[:maxParts]
		result.NextPartNumberMarker = result.Parts[maxParts-1].PartNumber
	}
	return
}

func (n *jfsObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int,
	startOffset int64, length int64, srcInfo minio.ObjectInfo, srcOpts, dstOpts minio.ObjectOptions) (result minio.PartInfo, err error) {
	if err = n.isValidBucketName(srcBucket); err != nil {
		return
	}
	if err = n.checkUploadIDExists(ctx, dstBucket, dstObject, uploadID); err != nil {
		return
	}
	// TODO: use CopyFileRange
	return n.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, srcInfo.PutObjReader, dstOpts)
}

func (n *jfsObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, r *minio.PutObjReader, opts minio.ObjectOptions) (info minio.PartInfo, err error) {
	if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil {
		return
	}
	p := n.ppath(bucket, uploadID, strconv.Itoa(partID))
	var etag string
	if err = n.putObject(ctx, bucket, p, r, opts, func(tmpName string) {
		etag = r.MD5CurrentHexString()
		if n.fs.SetXattr(mctx, tmpName, s3Etag, []byte(etag), 0) != 0 {
			logger.Warnf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", tmpName, s3Etag, etag, 0)
		}
	}); err != nil {
		err = jfsToObjectErr(ctx, err, bucket, object)
		return
	}
	info.PartNumber = partID
	info.ETag = etag
	info.LastModified = minio.UTCNow()
	info.Size = r.Reader.Size()
	return
}

func (n *jfsObjects) GetMultipartInfo(ctx context.Context, bucket, object, uploadID string, opts minio.ObjectOptions) (result minio.MultipartInfo, err error) {
	if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil {
		return
	}
	result.Bucket = bucket
	result.Object = object
	result.UploadID = uploadID
	return
}

func (n *jfsObjects) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, parts []minio.CompletePart, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) {
	if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil {
		return
	}
	g, ectx := errgroup.WithContext(ctx)
	g.SetLimit(10)
	for i := 0; i < len(parts); i++ {
		i := i
		g.Go(func() error {
			select {
			case <-ectx.Done():
				return ectx.Err()
			default:
			}
			ppath := n.ppath(bucket, uploadID, strconv.Itoa(parts[i].PartNumber))
			etag, _ := n.fs.GetXattr(mctx, ppath, s3Etag)
			if string(etag) != "" && string(etag) != parts[i].ETag {
				logger.Warnf("path: %s,expect etag: %s,but got: %s", ppath, etag, parts[i].ETag)
				return minio.ErrInvalidEtag
			}
			return nil
		})
	}
	if err = g.Wait(); err != nil {
		return objInfo, err
	}
	tmp := n.ppath(bucket, uploadID, "complete")
	_ = n.fs.Delete(mctx, tmp)
	f, eno := n.fs.Create(mctx, tmp, 0666, n.gConf.Umask)
	if eno != 0 {
		err = jfsToObjectErr(ctx, eno, bucket, object, uploadID)
		logger.Errorf("create complete: %s", err)
		return
	}
	defer func() {
		_ = f.Close(mctx)
	}()
	var total uint64
	for _, part := range parts {
		p := n.ppath(bucket, uploadID, strconv.Itoa(part.PartNumber))
		copied, eno := n.fs.CopyFileRange(mctx, p, 0, tmp, total, 5<<30)
		if eno == syscall.ENOENT { // try lookup from old path
			p = n.ppathFlat(bucket, uploadID, strconv.Itoa(part.PartNumber))
			copied, eno = n.fs.CopyFileRange(mctx, p, 0, tmp, total, 5<<30)
		}
		if eno != 0 {
			err = jfsToObjectErr(ctx, eno, bucket, object, uploadID)
			logger.Errorf("merge parts: %s", err)
			return
		}
		total += copied
	}

	// Calculate s3 compatible md5sum for complete multipart.
	s3MD5 := minio.ComputeCompleteMultipartMD5(parts)
	if n.gConf.KeepEtag {
		eno = n.fs.SetXattr(mctx, tmp, s3Etag, []byte(s3MD5), 0)
		if eno != 0 {
			logger.Warnf("set xattr error, path: %s,xattr: %s,value: %s,flags: %d", tmp, s3Etag, s3MD5, 0)
		}
	}

	var tagStr []byte
	if n.gConf.ObjTag {
		var eno syscall.Errno
		if tagStr, eno = n.fs.GetXattr(mctx, n.upath(bucket, uploadID), s3Tags); eno != 0 {
			if eno != meta.ENOATTR {
				logger.Errorf("get object tags error, path: %s, error: %s", n.upath(bucket, uploadID), eno)
			}
		} else if len(tagStr) > 0 {
			if eno = n.fs.SetXattr(mctx, tmp, s3Tags, tagStr, 0); eno != 0 {
				logger.Errorf("set object tags error, path: %s, tags: %s, error: %s", tmp, string(tagStr), eno)
			}
		}
	}

	var objMeta map[string]string
	if n.gConf.ObjMeta {
		if objMeta, err = n.getObjMeta(n.upath(bucket, uploadID)); err != nil {
			logger.Errorf("get object meta error, path: %s, error: %s", n.upath(bucket, uploadID), err)
		} else if err = n.setObjMeta(tmp, objMeta); err != nil {
			logger.Errorf("set object meta error, path: %s, error: %s", tmp, err)
		}
	}

	name := n.path(bucket, object)
	eno = n.fs.Rename(mctx, tmp, name, 0)
	if eno == syscall.ENOENT {
		if err = n.mkdirAll(ctx, path.Dir(name)); err != nil {
			logger.Errorf("mkdirAll %s: %s", path.Dir(name), err)
			_ = n.fs.Delete(mctx, tmp)
			err = jfsToObjectErr(ctx, err, bucket, object, uploadID)
			return
		}
		eno = n.fs.Rename(mctx, tmp, name, 0)
	}
	if eno != 0 {
		_ = n.fs.Delete(mctx, tmp)
		err = jfsToObjectErr(ctx, eno, bucket, object, uploadID)
		logger.Errorf("Rename %s -> %s: %s", tmp, name, err)
		return
	}

	fi, eno := n.fs.Stat(mctx, name)
	if eno != 0 {
		_ = n.fs.Delete(mctx, name)
		err = jfsToObjectErr(ctx, eno, bucket, object, uploadID)
		return
	}

	// remove parts
	_ = n.fs.Rmr(mctx, n.upath(bucket, uploadID), true, meta.RmrDefaultThreads)
	return minio.ObjectInfo{
		Bucket:      bucket,
		Name:        object,
		ETag:        s3MD5,
		ModTime:     fi.ModTime(),
		Size:        fi.Size(),
		IsDir:       fi.IsDir(),
		AccTime:     fi.ModTime(),
		UserTags:    string(tagStr),
		UserDefined: minio.CleanMetadata(opts.UserDefined),
		IsLatest:    true,
	}, nil
}

func (n *jfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, option minio.ObjectOptions) (err error) {
	if err = n.checkUploadIDExists(ctx, bucket, object, uploadID); err != nil {
		return
	}
	eno := n.fs.Rmr(mctx, n.upath(bucket, uploadID), true, meta.RmrDefaultThreads)
	return jfsToObjectErr(ctx, eno, bucket, object, uploadID)
}

func (n *jfsObjects) cleanup() {
	for range time.Tick(24 * time.Hour) {
		// default bucket tmp dirs
		tmpDirs := []string{".sys/tmp/", ".sys/uploads/"}
		if n.gConf.MultiBucket {
			buckets, err := n.ListBuckets(context.Background())
			if err != nil {
				logger.Errorf("list buckets error: %v", err)
				continue
			}
			for _, bucket := range buckets {
				tmpDirs = append(tmpDirs, fmt.Sprintf(".sys/%s/tmp", bucket.Name))
				tmpDirs = append(tmpDirs, fmt.Sprintf(".sys/%s/uploads", bucket.Name))
			}
		}
		for _, dir := range tmpDirs {
			n.cleanupDir(dir)
		}
	}
}

func (n *jfsObjects) cleanupDir(dir string) bool {
	f, errno := n.fs.Open(mctx, dir, 0)
	if errno != 0 {
		return false
	}
	defer f.Close(mctx)
	entries, _ := f.ReaddirPlus(mctx, 0)
	now := time.Now()
	deleted := 0
	for _, entry := range entries {
		dirPath := n.path(dir, string(entry.Name))
		if entry.Attr.Typ == meta.TypeDirectory && len(entry.Name) == subDirPrefix {
			if !n.cleanupDir(strings.TrimPrefix(dirPath, "/")) {
				continue
			}
		} else if _, err := uuid.Parse(string(entry.Name)); err != nil {
			logger.Warnf("unexpected file path: %s", dirPath)
			continue
		}
		if now.Sub(time.Unix(entry.Attr.Mtime, 0)) > 7*24*time.Hour {
			if errno = n.fs.Rmr(mctx, dirPath, true, meta.RmrDefaultThreads); errno != 0 {
				logger.Errorf("failed to delete expired temporary files path: %s, err: %s", dirPath, errno)
			} else {
				deleted += 1
				logger.Infof("delete expired temporary files path: %s, mtime: %s", dirPath, time.Unix(entry.Attr.Mtime, 0).Format(time.RFC3339))
			}
		}
	}
	return deleted == len(entries)
}

type jfsFLock struct {
	inode     meta.Ino
	owner     uint64
	meta      meta.Meta
	localLock sync.RWMutex
	readonly  bool
}

func (j *jfsFLock) GetLock(ctx context.Context, timeout *minio.DynamicTimeout) (newCtx context.Context, timedOutErr error) {
	return j.getFlockWithTimeOut(ctx, meta.F_WRLCK, timeout)
}

func (j *jfsFLock) getFlockWithTimeOut(ctx context.Context, ltype uint32, timeout *minio.DynamicTimeout) (context.Context, error) {
	if j.readonly || j.inode == 0 {
		return ctx, nil
	}
	start := time.Now()
	deadline := start.Add(timeout.Timeout())
	lockStr := "write"

	var getLockFunc func() bool
	var unlockFunc func()
	var getLock bool
	if ltype == meta.F_RDLCK {
		getLockFunc = j.localLock.TryRLock
		unlockFunc = j.localLock.RUnlock
		lockStr = "read"
	} else {
		getLockFunc = j.localLock.TryLock
		unlockFunc = j.localLock.Unlock
	}

	for {
		getLock = getLockFunc()
		if getLock {
			break
		}
		if time.Now().After(deadline) {
			timeout.LogFailure()
			logger.Errorf("get %s lock timed out ino:%d", lockStr, j.inode)
			return ctx, minio.OperationTimedOut{}
		}
		time.Sleep(5 * time.Millisecond)
	}

	for {
		if errno := j.meta.Flock(mctx, j.inode, j.owner, ltype, false); errno != 0 {
			if !errors.Is(errno, syscall.EAGAIN) {
				logger.Errorf("failed to get %s lock for inode %d by owner %d, error : %s", lockStr, j.inode, j.owner, errno)
			}
		} else {
			timeout.LogSuccess(time.Since(start))
			return ctx, nil
		}

		if time.Now().After(deadline) {
			unlockFunc()
			timeout.LogFailure()
			logger.Errorf("get %s lock timed out ino:%d", lockStr, j.inode)
			return ctx, minio.OperationTimedOut{}
		}
		time.Sleep(5 * time.Millisecond)
	}
}

func (j *jfsFLock) Unlock() {
	if j.inode == 0 || j.readonly {
		return
	}
	if errno := j.meta.Flock(mctx, j.inode, j.owner, meta.F_UNLCK, true); errno != 0 {
		logger.Errorf("failed to release lock for inode %d by owner %d, error : %s", j.inode, j.owner, errno)
	}
	j.localLock.Unlock()
}

func (j *jfsFLock) GetRLock(ctx context.Context, timeout *minio.DynamicTimeout) (newCtx context.Context, timedOutErr error) {
	return j.getFlockWithTimeOut(ctx, meta.F_RDLCK, timeout)
}

func (j *jfsFLock) RUnlock() {
	if j.inode == 0 || j.readonly {
		return
	}
	if errno := j.meta.Flock(mctx, j.inode, j.owner, meta.F_UNLCK, true); errno != 0 {
		logger.Errorf("failed to release lock for inode %d by owner %d, error : %s", j.inode, j.owner, errno)
	}
	j.localLock.RUnlock()
}

func (n *jfsObjects) NewNSLock(bucket string, objects ...string) minio.RWLocker {
	if n.gConf.ReadOnly {
		return &jfsFLock{readonly: true}
	}
	if len(objects) != 1 {
		panic(fmt.Errorf("jfsObjects.NewNSLock: the length of the objects parameter must be 1, current %s", objects))
	}

	lockfile := path.Join(minio.MinioMetaBucket, minio.MinioMetaLockFile)
	var file *fs.File
	var errno syscall.Errno
	file, errno = n.fs.Open(mctx, lockfile, vfs.MODE_MASK_W)
	if errno != 0 && !errors.Is(errno, syscall.ENOENT) {
		logger.Errorf("failed to open the file to be locked: %s error %s", lockfile, errno)
		return &jfsFLock{}
	}
	if errors.Is(errno, syscall.ENOENT) {
		if file, errno = n.fs.Create(mctx, lockfile, 0666, n.gConf.Umask); errno != 0 {
			if errors.Is(errno, syscall.EEXIST) {
				if file, errno = n.fs.Open(mctx, lockfile, vfs.MODE_MASK_W); errno != 0 {
					logger.Errorf("failed to open the file to be locked: %s error %s", lockfile, errno)
					return &jfsFLock{}
				}
			} else {
				logger.Errorf("failed to create gateway lock file err %s", errno)
				return &jfsFLock{}
			}
		}
	}
	defer file.Close(mctx)
	return &jfsFLock{owner: n.conf.Meta.Sid, inode: file.Inode(), meta: n.fs.Meta()}
}

func (n *jfsObjects) BackendInfo() madmin.BackendInfo {
	return madmin.BackendInfo{Type: madmin.FS}
}

func (n *jfsObjects) LocalStorageInfo(ctx context.Context) (minio.StorageInfo, []error) {
	return n.StorageInfo(ctx)
}

func (n *jfsObjects) ListObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (loi minio.ListObjectVersionsInfo, err error) {
	objs, err := n.ListObjects(ctx, bucket, prefix, marker, delimiter, maxKeys)
	if err == nil {
		loi.Objects = objs.Objects
		loi.Prefixes = objs.Prefixes
	}
	return loi, err
}

func (n *jfsObjects) getObjectInfoNoFSLock(ctx context.Context, bucket, object string, info any) (oi minio.ObjectInfo, e error) {
	if info != nil {
		fi := info.(*fs.FileStat)
		oi = minio.ObjectInfo{
			Bucket:   bucket,
			Name:     object,
			ModTime:  fi.ModTime(),
			Size:     fi.Size(),
			IsDir:    fi.IsDir(),
			AccTime:  fi.ModTime(),
			IsLatest: true,
		}
		if fi.IsDir() {
			oi.Size = 0
		}
		return
	}
	return n.GetObjectInfo(ctx, bucket, object, minio.ObjectOptions{})
}

func (n *jfsObjects) Walk(ctx context.Context, bucket, prefix string, results chan<- minio.ObjectInfo, opts minio.ObjectOptions) error {
	return minio.FsWalk(ctx, n, bucket, prefix, n.listDirFactory(), n.isLeaf, n.isLeafDir, results, n.getObjectInfoNoFSLock, n.getObjectInfoNoFSLock)
}

func (n *jfsObjects) SetBucketPolicy(ctx context.Context, bucket string, policy *policy.Policy) error {
	meta, err := minio.LoadBucketMetadata(ctx, n, bucket)
	if err != nil {
		return err
	}

	json := jsoniter.ConfigCompatibleWithStandardLibrary
	configData, err := json.Marshal(policy)
	if err != nil {
		return err
	}
	meta.PolicyConfigJSON = configData

	return meta.Save(ctx, n)
}

func (n *jfsObjects) GetBucketPolicy(ctx context.Context, bucket string) (*policy.Policy, error) {
	meta, err := minio.LoadBucketMetadata(ctx, n, bucket)
	if err != nil {
		return nil, err
	}
	if meta.PolicyConfig == nil {
		return nil, minio.BucketPolicyNotFound{Bucket: bucket}
	}
	return meta.PolicyConfig, nil
}

func (n *jfsObjects) DeleteBucketPolicy(ctx context.Context, bucket string) error {
	meta, err := minio.LoadBucketMetadata(ctx, n, bucket)
	if err != nil {
		return err
	}
	meta.PolicyConfigJSON = nil
	return meta.Save(ctx, n)
}

func (n *jfsObjects) SetDriveCounts() []int {
	return nil
}

func (n *jfsObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
	return madmin.HealResultItem{}, minio.NotImplemented{}
}

func (n *jfsObjects) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
	return madmin.HealResultItem{}, minio.NotImplemented{}
}

func (n *jfsObjects) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (res madmin.HealResultItem, err error) {
	return res, minio.NotImplemented{}
}

func (n *jfsObjects) HealObjects(ctx context.Context, bucket, prefix string, opts madmin.HealOpts, fn minio.HealObjectFn) error {
	return minio.NotImplemented{}
}

func (n *jfsObjects) GetMetrics(ctx context.Context) (*minio.BackendMetrics, error) {
	return &minio.BackendMetrics{}, minio.NotImplemented{}
}

func (n *jfsObjects) Health(ctx context.Context, opts minio.HealthOptions) minio.HealthResult {
	if _, errno := n.fs.Stat(mctx, minio.MinioMetaBucket); errno != 0 {
		return minio.HealthResult{}
	}
	return minio.HealthResult{
		Healthy: true,
	}
}

func (n *jfsObjects) ReadHealth(ctx context.Context) bool {
	_, errno := n.fs.Stat(mctx, minio.MinioMetaBucket)
	return errno == 0
}

func (n *jfsObjects) PutObjectTags(ctx context.Context, bucket, object string, tags string, opts minio.ObjectOptions) (minio.ObjectInfo, error) {
	if !n.gConf.ObjTag {
		return minio.ObjectInfo{}, minio.NotImplemented{}
	}
	if eno := n.fs.SetXattr(mctx, n.path(bucket, object), s3Tags, []byte(tags), 0); eno != 0 {
		return minio.ObjectInfo{}, eno
	}
	return n.GetObjectInfo(ctx, bucket, object, opts)
}

func (n *jfsObjects) GetObjectTags(ctx context.Context, bucket, object string, opts minio.ObjectOptions) (*tags.Tags, error) {
	if !n.gConf.ObjTag {
		return nil, minio.NotImplemented{}
	}
	oi, err := n.GetObjectInfo(ctx, bucket, object, minio.ObjectOptions{})
	if err != nil {
		return nil, err
	}

	return tags.ParseObjectTags(oi.UserTags)
}

func (n *jfsObjects) DeleteObjectTags(ctx context.Context, bucket, object string, opts minio.ObjectOptions) (minio.ObjectInfo, error) {
	if !n.gConf.ObjTag {
		return minio.ObjectInfo{}, minio.NotImplemented{}
	}
	if errno := n.fs.RemoveXattr(mctx, n.path(bucket, object), s3Tags); errno != 0 && errno != meta.ENOATTR {
		return minio.ObjectInfo{}, errno
	}
	return n.GetObjectInfo(ctx, bucket, object, opts)
}

func (n *jfsObjects) IsNotificationSupported() bool {
	return true
}

func (n *jfsObjects) IsListenSupported() bool {
	return true
}

func (n *jfsObjects) IsTaggingSupported() bool {
	return true
}


================================================
FILE: pkg/gateway/gateway_test.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gateway

import (
	"context"
	"errors"
	"os"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/vfs"
	minio "github.com/minio/minio/cmd"
)

func TestGatewayLock(t *testing.T) {
	m := meta.NewClient("memkv://", nil)
	format := &meta.Format{
		Name:      "test",
		BlockSize: 4096,
		Capacity:  1 << 30,
		DirStats:  true,
	}
	_ = m.Init(format, true)
	var conf = vfs.Config{
		Meta: meta.DefaultConf(),
		Chunk: &chunk.Config{
			BlockSize:   format.BlockSize << 10,
			MaxUpload:   1,
			MaxDownload: 200,
			BufferSize:  100 << 20,
		},
		DirEntryTimeout: time.Millisecond * 100,
		EntryTimeout:    time.Millisecond * 100,
		AttrTimeout:     time.Millisecond * 100,
	}
	objStore, _ := object.CreateStorage("mem", "", "", "", "")
	store := chunk.NewCachedStore(objStore, *conf.Chunk, nil)
	jfs, err := fs.NewFileSystem(&conf, m, store, nil)
	if err != nil {
		t.Fatalf("initialize  failed: %s", err)
	}
	jfsObj := &jfsObjects{fs: jfs, conf: &conf, listPool: minio.NewTreeWalkPool(time.Minute * 30), gConf: &Config{Umask: 022}, nsMutex: minio.NewNSLock(false)}
	mctx = meta.NewContext(uint32(os.Getpid()), uint32(os.Getuid()), []uint32{uint32(os.Getgid())})
	if err := jfs.Mkdir(mctx, minio.MinioMetaBucket, 0777, 022); err != 0 {
		t.Fatalf("mkdir failed: %s", err)
	}

	rwLocker := jfsObj.NewNSLock(minio.MinioMetaBucket, minio.MinioMetaLockFile)

	if _, err := rwLocker.GetLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); err != nil {
		t.Fatalf("get lock failed: %s", err)
	}
	if _, err := rwLocker.GetLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); !errors.As(err, &minio.OperationTimedOut{}) {
		t.Fatalf("GetLock should return timeout error: %s", err)
	}
	rwLocker.Unlock()

	if _, err := rwLocker.GetRLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); err != nil {
		t.Fatalf("get lock failed: %s", err)
	}
	if _, err := rwLocker.GetRLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); err != nil {
		t.Fatalf("GetRLock should return nil: %s", err)
	}
	rwLocker.RUnlock()
	rwLocker.RUnlock()

	if _, err := rwLocker.GetLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); err != nil {
		t.Fatalf("get lock failed: %s", err)
	}
	if _, err := rwLocker.GetRLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); !errors.As(err, &minio.OperationTimedOut{}) {
		t.Fatalf("GetRLock should return timeout error: %s", err)
	}
	rwLocker.Unlock()

	if _, err := rwLocker.GetRLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); err != nil {
		t.Fatalf("GetRLock failed: %s", err)
	}
	if _, err := rwLocker.GetLock(context.Background(), minio.NewDynamicTimeout(2*time.Second, 1*time.Second)); !errors.As(err, &minio.OperationTimedOut{}) {
		t.Fatalf("GetRLock should return timeout error: %s", err)
	}
	rwLocker.RUnlock()

}


================================================
FILE: pkg/meta/backup.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"unsafe"

	"github.com/juicedata/juicefs/pkg/meta/pb"
	"google.golang.org/protobuf/encoding/protojson"
	"google.golang.org/protobuf/proto"
	"google.golang.org/protobuf/reflect/protoreflect"
	"google.golang.org/protobuf/reflect/protoregistry"
)

const (
	BakMagic   = 0x747083
	BakVersion = 1
	BakEOS     = BakMagic // end of segments
)

const (
	segTypeUnknown = iota
	segTypeFormat
	segTypeCounter
	segTypeNode
	segTypeEdge
	segTypeChunk
	segTypeSliceRef
	segTypeSymlink
	segTypeSustained
	segTypeDelFile
	segTypeXattr
	segTypeAcl
	segTypeStat
	segTypeQuota
	segTypeParent // for redis/tkv only
	segTypeMax
)

var SegType2Name = map[int]string{
	segTypeFormat:    "format",
	segTypeCounter:   "counter",
	segTypeNode:      "node",
	segTypeEdge:      "edge",
	segTypeChunk:     "chunk",
	segTypeSliceRef:  "sliceRef",
	segTypeSymlink:   "symlink",
	segTypeSustained: "sustained",
	segTypeDelFile:   "delFile",
	segTypeXattr:     "xattr",
	segTypeAcl:       "acl",
	segTypeStat:      "stat",
	segTypeQuota:     "quota",
	segTypeParent:    "parent",
}

var errBakEOF = fmt.Errorf("reach backup EOF")

func getMessageFromType(typ int) (proto.Message, error) {
	var name protoreflect.FullName
	if typ == segTypeFormat {
		name = proto.MessageName(&pb.Format{})
	} else if typ < segTypeMax {
		name = proto.MessageName(&pb.Batch{})
	}
	if name == "" {
		return nil, fmt.Errorf("unknown message type %d", typ)
	}
	return createMessageByName(name)
}

func createMessageByName(name protoreflect.FullName) (proto.Message, error) {
	typ, err := protoregistry.GlobalTypes.FindMessageByName(name)
	if err != nil {
		return nil, fmt.Errorf("failed to find message %s's type: %v", name, err)
	}
	return typ.New().Interface(), nil
}

// BakFormat: BakSegment... + BakEOS + BakFooter
type BakFormat struct {
	Pos    uint64
	Footer *BakFooter
}

func newBakFormat() *BakFormat {
	return &BakFormat{
		Footer: &BakFooter{
			Msg: &pb.Footer{
				Magic:   BakMagic,
				Version: BakVersion,
				Infos:   make(map[string]*pb.Footer_SegInfo),
			},
		},
	}
}

func (f *BakFormat) writeSegment(w io.Writer, seg *BakSegment) error {
	if seg == nil {
		return nil
	}

	n, err := seg.Marshal(w)
	if err != nil {
		return fmt.Errorf("failed to marshal segment %s: %v", seg, err)
	}

	name := seg.Name()
	info, ok := f.Footer.Msg.Infos[name]
	if !ok {
		info = &pb.Footer_SegInfo{Offset: []uint64{}, Num: 0}
		f.Footer.Msg.Infos[name] = info
	}

	info.Offset = append(info.Offset, f.Pos)
	info.Num += seg.num()
	f.Pos += uint64(n)
	return nil
}

func (f *BakFormat) ReadSegment(r io.Reader) (*BakSegment, error) {
	seg := &BakSegment{}
	if err := seg.Unmarshal(r); err != nil {
		return nil, err
	}
	return seg, nil
}

func (f *BakFormat) writeFooter(w io.Writer) error {
	if err := f.writeEOS(w); err != nil {
		return err
	}
	return f.Footer.Marshal(w)
}

func (f *BakFormat) writeEOS(w io.Writer) error {
	if n, err := w.Write(binary.BigEndian.AppendUint32(nil, BakEOS)); err != nil && n != 4 {
		return fmt.Errorf("failed to write EOS: err %w, write len %d, expect len 4", err, n)
	}
	return nil
}

func (f *BakFormat) ReadFooter(r io.ReadSeeker) (*BakFooter, error) { // nolint:unused
	footer := &BakFooter{}
	if err := footer.Unmarshal(r); err != nil {
		return nil, err
	}
	if footer.Msg.Magic != BakMagic {
		return nil, fmt.Errorf("invalid magic number %d, expect %d", footer.Msg.Magic, BakMagic)
	}
	f.Footer = footer
	return footer, nil
}

type BakFooter struct {
	Msg *pb.Footer
	Len uint64
}

func (h *BakFooter) Marshal(w io.Writer) error {
	data, err := proto.Marshal(h.Msg)
	if err != nil {
		return fmt.Errorf("failed to marshal footer: %w", err)
	}

	if n, err := w.Write(data); err != nil && n != len(data) {
		return fmt.Errorf("failed to write footer data: err %w, write len %d, expect len %d", err, n, len(data))
	}

	h.Len = uint64(len(data))
	if n, err := w.Write(binary.BigEndian.AppendUint64(nil, h.Len)); err != nil && n != 8 {
		return fmt.Errorf("failed to write footer length: err %w, write len %d, expect len 8", err, n)
	}
	return nil
}

func (h *BakFooter) Unmarshal(r io.ReadSeeker) error {
	lenSize := int64(unsafe.Sizeof(h.Len))
	_, _ = r.Seek(-lenSize, io.SeekEnd)

	data := make([]byte, lenSize)
	if n, err := r.Read(data); err != nil && n != int(lenSize) {
		return fmt.Errorf("failed to read footer length: err %w, read len %d, expect len %d", err, n, lenSize)
	}

	h.Len = binary.BigEndian.Uint64(data)
	_, _ = r.Seek(-int64(h.Len)-lenSize, io.SeekEnd)
	data = make([]byte, h.Len)
	if n, err := r.Read(data); err != nil && n != int(h.Len) {
		return fmt.Errorf("failed to read footer: err %w, read len %d, expect len %d", err, n, h.Len)
	}

	h.Msg = &pb.Footer{}
	if err := proto.Unmarshal(data, h.Msg); err != nil {
		return fmt.Errorf("failed to unmarshal footer: %w", err)
	}
	return nil
}

type BakSegment struct {
	typ uint32
	len uint64
	val proto.Message
}

func (s *BakSegment) Name() string {
	if name, ok := SegType2Name[int(s.typ)]; ok {
		return name
	}
	return fmt.Sprintf("type-%d", s.typ)
}

func (s *BakSegment) String() string {
	switch s.val.(type) {
	case *pb.Format:
		return string(s.val.(*pb.Format).Data)
	case *pb.Batch:
		return protojson.Format(s.val)
	}
	return "unknown segment"
}

func newBakSegment(val proto.Message) *BakSegment {
	s := &BakSegment{val: val}
	switch v := s.val.(type) {
	case *pb.Format:
		s.typ = uint32(segTypeFormat)
	case *pb.Batch:
		if v.Counters != nil {
			s.typ = uint32(segTypeCounter)
		} else if v.Sustained != nil {
			s.typ = uint32(segTypeSustained)
		} else if v.Delfiles != nil {
			s.typ = uint32(segTypeDelFile)
		} else if v.Acls != nil {
			s.typ = uint32(segTypeAcl)
		} else if v.Xattrs != nil {
			s.typ = uint32(segTypeXattr)
		} else if v.Quotas != nil {
			s.typ = uint32(segTypeQuota)
		} else if v.Dirstats != nil {
			s.typ = uint32(segTypeStat)
		} else if v.Nodes != nil {
			s.typ = uint32(segTypeNode)
		} else if v.Chunks != nil {
			s.typ = uint32(segTypeChunk)
		} else if v.SliceRefs != nil {
			s.typ = uint32(segTypeSliceRef)
		} else if v.Edges != nil {
			s.typ = uint32(segTypeEdge)
		} else if v.Symlinks != nil {
			s.typ = uint32(segTypeSymlink)
		} else if v.Parents != nil {
			s.typ = uint32(segTypeParent)
		} else {
			return nil
		}
	}
	return s
}

func (s *BakSegment) num() uint64 {
	switch s.typ {
	case segTypeFormat:
		return 1
	default:
		b := s.val.(*pb.Batch)
		switch s.typ {
		case segTypeCounter:
			return uint64(len(b.Counters))
		case segTypeNode:
			return uint64(len(b.Nodes))
		case segTypeEdge:
			return uint64(len(b.Edges))
		case segTypeChunk:
			return uint64(len(b.Chunks))
		case segTypeSliceRef:
			return uint64(len(b.SliceRefs))
		case segTypeSymlink:
			return uint64(len(b.Symlinks))
		case segTypeSustained:
			return uint64(len(b.Sustained))
		case segTypeDelFile:
			return uint64(len(b.Delfiles))
		case segTypeXattr:
			return uint64(len(b.Xattrs))
		case segTypeAcl:
			return uint64(len(b.Acls))
		case segTypeStat:
			return uint64(len(b.Dirstats))
		case segTypeQuota:
			return uint64(len(b.Quotas))
		case segTypeParent:
			return uint64(len(b.Parents))
		}
		return 0
	}
}

func (s *BakSegment) Marshal(w io.Writer) (int, error) {
	if s == nil || s.val == nil {
		return 0, fmt.Errorf("segment %s is nil", s)
	}

	if err := binary.Write(w, binary.BigEndian, s.typ); err != nil {
		return 0, fmt.Errorf("failed to write segment type %s : %w", s, err)
	}
	data, err := proto.Marshal(s.val)
	if err != nil {
		return 0, fmt.Errorf("failed to marshal segment message %s : %w", s, err)
	}
	s.len = uint64(len(data))
	if err := binary.Write(w, binary.BigEndian, s.len); err != nil {
		return 0, fmt.Errorf("failed to write segment length %s: %w", s, err)
	}

	if n, err := w.Write(data); err != nil || n != len(data) {
		return 0, fmt.Errorf("failed to write segment data %s: err %w, write len %d, expect len %d", s, err, n, len(data))
	}

	return binary.Size(s.typ) + binary.Size(s.len) + len(data), nil
}

func (s *BakSegment) Unmarshal(r io.Reader) error {
	if err := binary.Read(r, binary.BigEndian, &s.typ); err != nil {
		return fmt.Errorf("failed to read segment type: %v", err)
	}

	if s.typ == BakEOS {
		return errBakEOF
	}

	if err := binary.Read(r, binary.BigEndian, &s.len); err != nil {
		return fmt.Errorf("failed to read segment %s length: %v", s, err)
	}
	data := make([]byte, s.len)
	n, err := r.Read(data)
	if err != nil && n != int(s.len) {
		return fmt.Errorf("failed to read segment value: err %v, read len %d, expect len %d", err, n, s.len)
	}

	msg, err := getMessageFromType(int(s.typ))
	if err != nil {
		return fmt.Errorf("failed to create message by type %d: %w", s.typ, err)
	}
	if err = proto.Unmarshal(data, msg); err != nil {
		return fmt.Errorf("failed to unmarshal segment msg %d: %w", s.typ, err)
	}
	s.val = msg
	return nil
}

type DumpOption struct {
	KeepSecret bool
	Threads    int
	Progress   func(name string, cnt int)
}

func (opt *DumpOption) check() *DumpOption {
	if opt == nil {
		opt = &DumpOption{}
	}
	if opt.Threads < 1 {
		opt.Threads = 10
	}
	return opt
}

func (m *baseMeta) dumpFormat(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	f := m.GetFormat()
	if !opt.KeepSecret {
		f.RemoveSecret()
	}
	data, err := json.MarshalIndent(f, "", "")
	if err != nil {
		logger.Errorf("failed to marshal format %s: %v", f.Name, err)
		return nil
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Format{Data: data}})
}

type dumpedResult struct {
	msg     proto.Message
	release func(m proto.Message)
}

func dumpResult(ctx context.Context, ch chan<- *dumpedResult, res *dumpedResult) error {
	select {
	case <-ctx.Done():
		return ctx.Err()
	case ch <- res:
		return nil
	}
}

type LoadOption struct {
	Threads  int
	Progress func(name string, cnt int)
}

func (opt *LoadOption) check() {
	if opt.Threads < 1 {
		opt.Threads = 10
	}
}

// transaction

type txSessionKey struct{}
type txMaxRetryKey struct{}


================================================
FILE: pkg/meta/base.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"context"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"math"
	"os"
	"path"
	"reflect"
	"runtime"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/pkg/errors"
	"github.com/prometheus/client_golang/prometheus"
	"golang.org/x/sync/errgroup"
	"google.golang.org/protobuf/proto"
)

const (
	inodeBatch     = 1 << 10
	sliceIdBatch   = 4 << 10
	nlocks         = 1024
	maxSymCacheNum = int32(10000)
	unknownUsage   = -1
)

var (
	DirBatchNum = map[string]int{
		"redis": 4096,
		"kv":    4096,
		"db":    40960,
	}
	maxCompactSlices  = 1000
	maxSlices         = 2500
	inodeNeedPrefetch = uint64(utils.JitterIt(inodeBatch * 0.1)) // Add jitter to reduce probability of txn conflicts
)

func checkInodeName(name string) syscall.Errno {
	if len(name) == 0 || strings.ContainsAny(name, "/\x00") {
		return syscall.EINVAL
	}
	return 0
}

type engine interface {
	// Get the value of counter name.
	getCounter(name string) (int64, error)
	// Increase counter name by value. Do not use this if value is 0, use getCounter instead.
	incrCounter(name string, value int64) (int64, error)
	// Set counter name to value if old <= value - diff.
	setIfSmall(name string, value, diff int64) (bool, error)
	updateStats(space int64, inodes int64)
	doFlushStats()

	doLoad() ([]byte, error)

	doNewSession(sinfo []byte, update bool) error
	doRefreshSession() error
	doFindStaleSessions(limit int) ([]uint64, error) // limit < 0 means all
	doCleanStaleSession(sid uint64) error
	doInit(format *Format, force bool) error

	scanAllChunks(ctx Context, ch chan<- cchunk, bar *utils.Bar) error
	doDeleteSustainedInode(sid uint64, inode Ino) error
	doFindDeletedFiles(ts int64, limit int) (map[Ino]uint64, error) // limit < 0 means all
	doDeleteFileData(inode Ino, length uint64)
	doCleanupSlices(ctx Context, count *uint64) error
	doCleanupDelayedSlices(ctx Context, edge int64) (int, error)
	doDeleteSlice(id uint64, size uint32) error

	doCloneEntry(ctx Context, srcIno Ino, parent Ino, name string, ino Ino, attr *Attr, cmode uint8, cumask uint16, top bool) syscall.Errno
	doBatchClone(ctx Context, srcParent Ino, dstParent Ino, entries []*Entry, cmode uint8, cumask uint16, result *batchCloneResult) syscall.Errno
	doAttachDirNode(ctx Context, parent Ino, dstIno Ino, name string) syscall.Errno
	doFindDetachedNodes(t time.Time) []Ino
	doCleanupDetachedNode(ctx Context, detachedNode Ino) syscall.Errno

	doGetQuota(ctx Context, qtype uint32, key uint64) (*Quota, error)
	// set quota, return true if there is no quota exists before
	doSetQuota(ctx Context, qtype uint32, key uint64, quota *Quota) (created bool, err error)
	doDelQuota(ctx Context, qtype uint32, key uint64) error
	doLoadQuotas(ctx Context) (map[uint64]*Quota, map[uint64]*Quota, map[uint64]*Quota, error)
	doFlushQuotas(ctx Context, quotas []*iQuota) error

	doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno
	doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr, oldAttr *Attr) syscall.Errno
	doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno
	doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, path string, inode *Ino, attr *Attr) syscall.Errno
	doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno
	doUnlink(ctx Context, parent Ino, name string, attr *Attr, skipCheckTrash ...bool) syscall.Errno
	doRmdir(ctx Context, parent Ino, name string, inode *Ino, attr *Attr, skipCheckTrash ...bool) syscall.Errno
	doBatchUnlink(ctx Context, parent Ino, entries []*Entry, delta *dirStat, skipCheckTrash ...bool) syscall.Errno
	doReadlink(ctx Context, inode Ino, noatime bool) (int64, []byte, error)
	doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry, limit int) syscall.Errno
	doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode, tinode *Ino, attr, tattr *Attr) syscall.Errno
	doSetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno
	doRemoveXattr(ctx Context, inode Ino, name string) syscall.Errno
	doRepair(ctx Context, inode Ino, attr *Attr) syscall.Errno
	doTouchAtime(ctx Context, inode Ino, attr *Attr, ts time.Time) (bool, error)
	doRead(ctx Context, inode Ino, indx uint32) ([]*slice, syscall.Errno)
	doList(ctx Context, inode Ino) ([]*slice, syscall.Errno)
	doWrite(ctx Context, inode Ino, indx uint32, off uint32, slice Slice, mtime time.Time, numSlices *int, delta *dirStat, attr *Attr) syscall.Errno
	doTruncate(ctx Context, inode Ino, flags uint8, length uint64, delta *dirStat, attr *Attr, skipPermCheck bool) syscall.Errno
	doFallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64, delta *dirStat, attr *Attr) syscall.Errno
	doCompactChunk(inode Ino, indx uint32, origin []byte, ss []*slice, skipped int, pos uint32, id uint64, size uint32, delayed []byte) syscall.Errno

	doGetParents(ctx Context, inode Ino) map[Ino]int
	doUpdateDirStat(ctx Context, batch map[Ino]dirStat) error
	// @trySync: try sync dir stat if broken or not existed
	doGetDirStat(ctx Context, ino Ino, trySync bool) (*dirStat, syscall.Errno)
	doSyncDirStat(ctx Context, ino Ino) (*dirStat, syscall.Errno)
	doSyncVolumeStat(ctx Context) error

	scanTrashSlices(Context, trashSliceScan) error
	scanPendingSlices(Context, pendingSliceScan) error
	scanPendingFiles(Context, pendingFileScan) error

	GetSession(sid uint64, detail bool) (*Session, error)

	doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno
	doGetFacl(ctx Context, ino Ino, aclType uint8, aclId uint32, rule *aclAPI.Rule) syscall.Errno
	cacheACLs(ctx Context) error

	// kerberos delegation token
	doStoreToken(ctx Context, token []byte) (id uint32, st syscall.Errno)
	doUpdateToken(ctx Context, id uint32, token []byte) syscall.Errno
	doLoadToken(ctx Context, id uint32) (token []byte, st syscall.Errno)
	doDeleteTokens(ctx Context, ids []uint32) syscall.Errno
	doListTokens(ctx Context) (tokens map[uint32][]byte, st syscall.Errno)

	newDirHandler(inode Ino, plus bool, entries []*Entry) DirHandler

	dump(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error
	load(ctx Context, typ int, opt *LoadOption, val proto.Message) error
	prepareLoad(ctx Context, opt *LoadOption) error
}

type trashSliceScan func(ss []Slice, ts int64) (clean bool, err error)
type pendingSliceScan func(id uint64, size uint32) (clean bool, err error)
type trashFileScan func(inode Ino, size uint64, ts time.Time) (clean bool, err error)
type pendingFileScan func(ino Ino, size uint64, ts int64) (clean bool, err error)

// fsStat aligned for atomic operations
// nolint:structcheck
type fsStat struct {
	newSpace   int64
	newInodes  int64
	usedSpace  int64
	usedInodes int64
}

// chunk for compaction
type cchunk struct {
	inode  Ino
	indx   uint32
	slices int
}

type symlinkCache struct {
	*sync.Map
	size atomic.Int32
	cap  int32
}

// ugQuotaDelta represents quota changes for a specific user and group.
type ugQuotaDelta struct {
	Uid    uint32
	Gid    uint32
	Space  int64
	Inodes int64
}

type ugQuotaDeltas map[uint64]*ugQuotaDelta

func (ds ugQuotaDeltas) add(delta *ugQuotaDelta) {
	key := ugKey(delta.Uid, delta.Gid)
	if existing, ok := ds[key]; ok {
		existing.Space += delta.Space
		existing.Inodes += delta.Inodes
	} else {
		ds[key] = delta
	}
}

type batchCloneResult struct {
	length int64
	space  int64
	inodes int64
	deltas ugQuotaDeltas
}

func ugKey(uid, gid uint32) uint64 {
	return (uint64(uid) << 32) | uint64(gid)
}

func newSymlinkCache(cap int32) *symlinkCache {
	return &symlinkCache{
		Map: &sync.Map{},
		cap: cap,
	}
}

func (symCache *symlinkCache) Store(inode Ino, path []byte) {
	if _, loaded := symCache.Swap(inode, path); !loaded {
		symCache.size.Add(1)
	}
}

func (symCache *symlinkCache) clean(ctx Context, wg *sync.WaitGroup) {
	defer wg.Done()
	ticker := time.NewTicker(time.Minute)
	defer ticker.Stop()
	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			symCache.doClean()
		}
	}
}

func (symCache *symlinkCache) doClean() {
	if symCache.size.Load() < int32(float64(symCache.cap)*0.75) {
		return
	}

	todo := symCache.size.Load() / 5
	cnt := int32(0)
	symCache.Range(func(key, value interface{}) bool {
		symCache.Delete(key)
		symCache.size.Add(-1)
		cnt++
		return cnt < todo
	})
}

type baseMeta struct {
	sync.Mutex
	addr string
	conf *Config
	fmt  *Format

	root         Ino
	txlocks      [nlocks]sync.Mutex // Pessimistic locks to reduce conflict
	subTrash     internalNode
	sid          uint64
	of           *openfiles
	removedFiles map[Ino]bool
	compacting   map[uint64]bool
	maxDeleting  chan struct{}
	dslices      chan Slice // slices to delete
	symlinks     *symlinkCache
	msgCallbacks *msgCallbacks
	reloadCb     []func(*Format)
	umounting    bool
	sesMu        sync.Mutex
	aclCache     aclAPI.Cache

	sessCtx Context
	sessWG  sync.WaitGroup

	dSliceMu sync.Mutex
	dSliceWG sync.WaitGroup

	dirStatsLock sync.Mutex
	dirStats     map[Ino]dirStat

	fsStatsLock sync.Mutex
	*fsStat

	parentMu    sync.Mutex        // protect dirParents
	quotaMu     sync.RWMutex      // protect dirQuotas
	dirParents  map[Ino]Ino       // directory inode -> parent inode
	dirQuotas   map[uint64]*Quota // directory inode -> quota
	userQuotas  map[uint64]*Quota // uid -> quota
	groupQuotas map[uint64]*Quota // gid -> quota

	quotaMetricMu        sync.Mutex
	dirQuotaMetricKeys   map[uint64]bool
	userQuotaMetricKeys  map[uint64]bool
	groupQuotaMetricKeys map[uint64]bool

	freeMu           sync.Mutex
	freeInodes       freeID
	freeSlices       freeID
	prefetchMu       sync.Mutex
	prefetchedInodes freeID

	usedSpaceG   prometheus.Gauge
	usedInodesG  prometheus.Gauge
	totalSpaceG  prometheus.Gauge
	totalInodesG prometheus.Gauge
	txDist       prometheus.Histogram
	txRestart    *prometheus.CounterVec
	opDist       prometheus.Histogram
	opCount      *prometheus.CounterVec
	opDuration   *prometheus.CounterVec

	// Subdir info metric
	subdirInfoG *prometheus.GaugeVec

	// Quota metrics
	dirQuotaMaxSpaceG   *prometheus.GaugeVec
	dirQuotaMaxInodesG  *prometheus.GaugeVec
	dirQuotaUsedSpaceG  *prometheus.GaugeVec
	dirQuotaUsedInodesG *prometheus.GaugeVec

	userQuotaMaxSpaceG   *prometheus.GaugeVec
	userQuotaMaxInodesG  *prometheus.GaugeVec
	userQuotaUsedSpaceG  *prometheus.GaugeVec
	userQuotaUsedInodesG *prometheus.GaugeVec

	groupQuotaMaxSpaceG   *prometheus.GaugeVec
	groupQuotaMaxInodesG  *prometheus.GaugeVec
	groupQuotaUsedSpaceG  *prometheus.GaugeVec
	groupQuotaUsedInodesG *prometheus.GaugeVec

	bgjobDels     *prometheus.CounterVec
	bgjobDuration *prometheus.HistogramVec

	en engine
}

func newBaseMeta(addr string, conf *Config) *baseMeta {
	return &baseMeta{
		addr:         utils.RemovePassword(addr),
		conf:         conf,
		sid:          conf.Sid,
		root:         RootInode,
		of:           newOpenFiles(conf.OpenCache, conf.OpenCacheLimit),
		removedFiles: make(map[Ino]bool),
		compacting:   make(map[uint64]bool),
		maxDeleting:  make(chan struct{}, 100),
		symlinks:     newSymlinkCache(maxSymCacheNum),
		fsStat: &fsStat{
			usedSpace:  unknownUsage,
			usedInodes: unknownUsage,
		},
		dirStats:    make(map[Ino]dirStat),
		dirParents:  make(map[Ino]Ino),
		dirQuotas:   make(map[uint64]*Quota),
		userQuotas:  make(map[uint64]*Quota),
		groupQuotas: make(map[uint64]*Quota),
		msgCallbacks: &msgCallbacks{
			callbacks: make(map[uint32]MsgCallback),
		},
		aclCache: aclAPI.NewCache(),

		usedSpaceG: prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "used_space",
			Help: "Total used space in bytes.",
		}),
		usedInodesG: prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "used_inodes",
			Help: "Total used number of inodes.",
		}),
		totalSpaceG: prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "total_space",
			Help: "Total space in bytes.",
		}),
		totalInodesG: prometheus.NewGauge(prometheus.GaugeOpts{
			Name: "total_inodes",
			Help: "Total number of inodes.",
		}),
		txDist: prometheus.NewHistogram(prometheus.HistogramOpts{
			Name:    "transaction_durations_histogram_seconds",
			Help:    "Transactions latency distributions.",
			Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30),
		}),
		txRestart: prometheus.NewCounterVec(prometheus.CounterOpts{
			Name: "transaction_restart",
			Help: "The number of times a transaction is restarted.",
		}, []string{"method"}),
		opDist: prometheus.NewHistogram(prometheus.HistogramOpts{
			Name:    "meta_ops_durations_histogram_seconds",
			Help:    "Operation latency distributions.",
			Buckets: prometheus.ExponentialBuckets(0.0001, 1.5, 30),
		}),
		opCount: prometheus.NewCounterVec(prometheus.CounterOpts{
			Name: "meta_ops_total",
			Help: "Meta operation count",
		}, []string{"method"}),
		opDuration: prometheus.NewCounterVec(prometheus.CounterOpts{
			Name: "meta_ops_duration_seconds",
			Help: "Meta operation duration in seconds.",
		}, []string{"method"}),

		// Subdir info metric
		subdirInfoG: prometheus.NewGaugeVec(prometheus.GaugeOpts{
			Name: "subdir_info",
			Help: "Subdir configuration for JuiceFS mount (empty string means root mount)",
		}, []string{"subdir"}),

		// quota metrics
		dirQuotaMaxSpaceG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "dir_quota_max_space_bytes",
				Help: "Directory quota maximum space in bytes.",
			},
			[]string{"inode"},
		),
		dirQuotaMaxInodesG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "dir_quota_max_inodes",
				Help: "Directory quota maximum number of inodes.",
			},
			[]string{"inode"},
		),
		dirQuotaUsedSpaceG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "dir_quota_used_space_bytes",
				Help: "Directory quota used space in bytes.",
			},
			[]string{"inode"},
		),
		dirQuotaUsedInodesG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "dir_quota_used_inodes",
				Help: "Directory quota used number of inodes.",
			},
			[]string{"inode"},
		),
		userQuotaMaxSpaceG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "user_quota_max_space_bytes",
				Help: "User quota maximum space in bytes.",
			},
			[]string{"uid"},
		),
		userQuotaMaxInodesG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "user_quota_max_inodes",
				Help: "User quota maximum number of inodes.",
			},
			[]string{"uid"},
		),
		userQuotaUsedSpaceG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "user_quota_used_space_bytes",
				Help: "User quota used space in bytes.",
			},
			[]string{"uid"},
		),
		userQuotaUsedInodesG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "user_quota_used_inodes",
				Help: "User quota used number of inodes.",
			},
			[]string{"uid"},
		),
		groupQuotaMaxSpaceG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "group_quota_max_space_bytes",
				Help: "Group quota maximum space in bytes.",
			},
			[]string{"gid"},
		),
		groupQuotaMaxInodesG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "group_quota_max_inodes",
				Help: "Group quota maximum number of inodes.",
			},
			[]string{"gid"},
		),
		groupQuotaUsedSpaceG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "group_quota_used_space_bytes",
				Help: "Group quota used space in bytes.",
			},
			[]string{"gid"},
		),
		groupQuotaUsedInodesG: prometheus.NewGaugeVec(
			prometheus.GaugeOpts{
				Name: "group_quota_used_inodes",
				Help: "Group quota used number of inodes.",
			},
			[]string{"gid"},
		),

		bgjobDuration: prometheus.NewHistogramVec(
			prometheus.HistogramOpts{
				Name:    "juicefs_bgjob_duration_seconds",
				Help:    "Background job duration in seconds.",
				Buckets: prometheus.ExponentialBuckets(1, 2, 13),
			},
			[]string{"job", "status"},
		),
		bgjobDels: prometheus.NewCounterVec(
			prometheus.CounterOpts{
				Name: "juicefs_bgjob_deletions_total",
				Help: "Number of deletions (files or slices) by background jobs.",
			},
			[]string{"job"},
		),

		dirQuotaMetricKeys:   make(map[uint64]bool),
		userQuotaMetricKeys:  make(map[uint64]bool),
		groupQuotaMetricKeys: make(map[uint64]bool),
	}
}

// InitSharedMetrics initialize the metrics that are same for all clients.
func (m *baseMeta) InitSharedMetrics(reg prometheus.Registerer) {
	if reg == nil {
		return
	}

	reg.MustRegister(m.usedSpaceG)
	reg.MustRegister(m.usedInodesG)
	reg.MustRegister(m.totalSpaceG)
	reg.MustRegister(m.totalInodesG)
	reg.MustRegister(m.dirQuotaMaxSpaceG)
	reg.MustRegister(m.dirQuotaMaxInodesG)
	reg.MustRegister(m.dirQuotaUsedSpaceG)
	reg.MustRegister(m.dirQuotaUsedInodesG)
	reg.MustRegister(m.userQuotaMaxSpaceG)
	reg.MustRegister(m.userQuotaMaxInodesG)
	reg.MustRegister(m.userQuotaUsedSpaceG)
	reg.MustRegister(m.userQuotaUsedInodesG)
	reg.MustRegister(m.groupQuotaMaxSpaceG)
	reg.MustRegister(m.groupQuotaMaxInodesG)
	reg.MustRegister(m.groupQuotaUsedSpaceG)
	reg.MustRegister(m.groupQuotaUsedInodesG)
	reg.MustRegister(m.bgjobDuration)
	reg.MustRegister(m.bgjobDels)
	reg.MustRegister(m.subdirInfoG)

	// Initialize subdir info metric
	subdir := m.conf.Subdir
	if subdir == "/" {
		subdir = ""
	}
	m.subdirInfoG.WithLabelValues(subdir).Set(1)

	go func() {
		for {
			if m.sessCtx != nil && m.sessCtx.Canceled() {
				return
			}
			var totalSpace, availSpace, iused, iavail uint64
			err := m.StatFS(Background(), m.root, &totalSpace, &availSpace, &iused, &iavail)
			if err == 0 {
				m.usedSpaceG.Set(float64(totalSpace - availSpace))
				m.usedInodesG.Set(float64(iused))
				m.totalSpaceG.Set(float64(totalSpace))
				m.totalInodesG.Set(float64(iused + iavail))
			}
			m.updateQuotaMetrics()
			utils.SleepWithJitter(time.Second * 10)
		}
	}()

	go func() {
		for {
			if m.sessCtx != nil && m.sessCtx.Canceled() {
				return
			}
			m.cleanupQuotaMetrics()
			utils.SleepWithJitter(time.Hour)
		}
	}()
}

func (m *baseMeta) InitMetrics(reg prometheus.Registerer) {
	if reg == nil {
		return
	}
	reg.MustRegister(m.txDist)
	reg.MustRegister(m.txRestart)
	reg.MustRegister(m.opDist)
	reg.MustRegister(m.opCount)
	reg.MustRegister(m.opDuration)
}

func (m *baseMeta) timeit(method string, start time.Time) {
	used := time.Since(start).Seconds()
	m.opDist.Observe(used)
	m.opCount.WithLabelValues(method).Inc()
	m.opDuration.WithLabelValues(method).Add(used)
}

func (m *baseMeta) getBase() *baseMeta {
	return m
}

func (m *baseMeta) checkRoot(inode Ino) Ino {
	switch inode {
	case 0:
		return RootInode // force using Root inode
	case RootInode:
		return m.root
	default:
		return inode
	}
}

func (r *baseMeta) txLock(idx uint) {
	r.txlocks[idx%nlocks].Lock()
}

func (r *baseMeta) txUnlock(idx uint) {
	r.txlocks[idx%nlocks].Unlock()
}

func (r *baseMeta) txBatchLock(inodes ...Ino) func() {
	switch len(inodes) {
	case 0:
		return func() {}
	case 1: // most cases
		r.txLock(uint(inodes[0]))
		return func() { r.txUnlock(uint(inodes[0])) }
	default: // for rename and more
		inodeSlots := make([]int, len(inodes))
		for i, ino := range inodes {
			inodeSlots[i] = int(ino % nlocks)
		}
		sort.Ints(inodeSlots)
		uniqInodeSlots := inodeSlots[:0]
		for i := 0; i < len(inodeSlots); i++ { // Go does not support recursive locks
			if i == 0 || inodeSlots[i] != inodeSlots[i-1] {
				uniqInodeSlots = append(uniqInodeSlots, inodeSlots[i])
			}
		}
		for _, idx := range uniqInodeSlots {
			r.txlocks[idx].Lock()
		}
		return func() {
			for _, idx := range uniqInodeSlots {
				r.txlocks[idx].Unlock()
			}
		}
	}
}

func (r *baseMeta) OnMsg(mtype uint32, cb MsgCallback) {
	r.msgCallbacks.Lock()
	defer r.msgCallbacks.Unlock()
	r.msgCallbacks.callbacks[mtype] = cb
}

func (r *baseMeta) newMsg(mid uint32, args ...interface{}) error {
	r.msgCallbacks.Lock()
	cb, ok := r.msgCallbacks.callbacks[mid]
	r.msgCallbacks.Unlock()
	if ok {
		return cb(args...)
	}
	return fmt.Errorf("message %d is not supported", mid)
}

func (m *baseMeta) Load(checkVersion bool) (*Format, error) {
	body, err := m.en.doLoad()
	if err == nil && len(body) == 0 {
		err = fmt.Errorf("database is not formatted, please run `juicefs format ...` first")
	}
	if err != nil {
		return nil, err
	}
	var format = new(Format)
	if err = json.Unmarshal(body, format); err != nil {
		return nil, fmt.Errorf("json: %s", err)
	}
	if checkVersion {
		if err = format.CheckVersion(); err != nil {
			return nil, fmt.Errorf("check version: %s", err)
		}
	}
	m.Lock()
	m.fmt = format
	m.Unlock()
	return format, nil
}

func (m *baseMeta) newSessionInfo() []byte {
	host, err := os.Hostname()
	if err != nil {
		logger.Warnf("Failed to get hostname: %s", err)
	}
	ips, err := utils.FindLocalIPs(m.conf.NetworkInterfaces...)
	if err != nil {
		logger.Warnf("Failed to get local IP: %s", err)
	}
	addrs := make([]string, 0, len(ips))
	for _, i := range ips {
		if ip := i.String(); ip[0] == '?' {
			logger.Warnf("Invalid IP address: %s", ip)
		} else {
			addrs = append(addrs, ip)
		}
	}
	buf, err := json.Marshal(&SessionInfo{
		Version:    version.Version(),
		HostName:   host,
		IPAddrs:    addrs,
		MountPoint: m.conf.MountPoint,
		MountTime:  time.Now(),
		ProcessID:  os.Getpid(),
	})
	if err != nil {
		panic(err) // marshal SessionInfo should never fail
	}
	return buf
}

func (m *baseMeta) NewSession(record bool) error {
	m.sessCtx = Background()
	ctx := m.sessCtx
	go m.refresh(ctx)

	if err := m.en.cacheACLs(ctx); err != nil {
		return err
	}

	if m.conf.ReadOnly {
		logger.Infof("Create read-only session OK with version: %s", version.Version())
		return nil
	}

	if record {
		// use the original sid if it's not 0
		action := "Update"
		if m.sid == 0 {
			v, err := m.en.incrCounter("nextSession", 1)
			if err != nil {
				return fmt.Errorf("get session ID: %s", err)
			}
			m.sid = uint64(v)
			m.conf.Sid = m.sid
			action = "Create"
		}
		if err := m.en.doNewSession(m.newSessionInfo(), action == "Update"); err != nil {
			return fmt.Errorf("create session: %s", err)
		}
		logger.Infof("%s session %d OK with version: %s", action, m.sid, version.Version())
	}

	m.loadQuotas()

	m.sessWG.Add(3)
	go m.flushStats(ctx)
	go m.flushDirStat(ctx)
	go m.flushQuotas(ctx)
	m.startDeleteSliceTasks() // start MaxDeletes tasks

	if !m.conf.NoBGJob {
		m.sessWG.Add(4)
		go m.cleanupDeletedFiles(ctx)
		go m.cleanupSlices(ctx)
		go m.cleanupTrash(ctx)
		go m.symlinks.clean(ctx, &m.sessWG)
	}
	return nil
}

const (
	bgJobSucc     = "success"
	bgJobFail     = "failed"
	bgJobCanceled = "canceled"
)

func (m *baseMeta) startDeleteSliceTasks() {
	m.Lock()
	defer m.Unlock()
	if m.conf.MaxDeletes <= 0 || m.dslices != nil {
		return
	}
	m.sessWG.Add(m.conf.MaxDeletes)
	m.dSliceWG.Add(m.conf.MaxDeletes)
	m.dslices = make(chan Slice, m.conf.MaxDeletes*10240)
	for i := 0; i < m.conf.MaxDeletes; i++ {
		go func(dslices chan Slice) {
			defer m.sessWG.Done()
			defer m.dSliceWG.Done()
			for {
				select {
				case <-m.sessCtx.Done():
					return
				case s, ok := <-dslices:
					if !ok {
						return
					}
					m.deleteSlice_(s.Id, s.Size)
				}
			}
		}(m.dslices)
	}
}

func (m *baseMeta) stopDeleteSliceTasks() {
	m.dSliceMu.Lock()
	if m.conf.MaxDeletes <= 0 || m.dslices == nil {
		m.dSliceMu.Unlock()
		return
	}
	close(m.dslices)
	m.dslices = nil
	m.dSliceMu.Unlock()
	m.dSliceWG.Wait()
}

func (m *baseMeta) expireTime() int64 {
	if m.conf.Heartbeat > 0 {
		return time.Now().Add(m.conf.Heartbeat * 5).Unix()
	} else {
		return time.Now().Add(time.Hour * 24 * 365).Unix()
	}
}

func (m *baseMeta) OnReload(fn func(f *Format)) {
	m.msgCallbacks.Lock()
	defer m.msgCallbacks.Unlock()
	m.reloadCb = append(m.reloadCb, fn)
}

const UmountCode = 11

func (m *baseMeta) refresh(ctx Context) {
	for {
		if ctx.Canceled() {
			return
		}
		if m.conf.Heartbeat > 0 {
			utils.SleepWithJitter(m.conf.Heartbeat)
		} else { // use default value
			utils.SleepWithJitter(time.Second * 12)
		}
		m.sesMu.Lock()
		if m.umounting {
			m.sesMu.Unlock()
			return
		}
		if !m.conf.ReadOnly && m.conf.Heartbeat > 0 && m.sid > 0 {
			if err := m.en.doRefreshSession(); err != nil {
				logger.Errorf("Refresh session %d: %s", m.sid, err)
			}
		}
		m.sesMu.Unlock()

		old := m.getFormat()
		if format, err := m.Load(false); err != nil {
			if strings.HasPrefix(err.Error(), "database is not formatted") {
				logger.Errorf("reload setting: %s", err)
				os.Exit(UmountCode)
			}
			logger.Warnf("reload setting: %s", err)
		} else if format.MetaVersion > MaxVersion {
			logger.Errorf("incompatible metadata version %d > max version %d", format.MetaVersion, MaxVersion)
			os.Exit(UmountCode)
		} else if format.UUID != old.UUID {
			logger.Errorf("UUID changed from %s to %s", old.UUID, format.UUID)
			os.Exit(UmountCode)
		} else if !reflect.DeepEqual(format, old) {
			m.msgCallbacks.Lock()
			cbs := m.reloadCb
			m.msgCallbacks.Unlock()
			for _, cb := range cbs {
				cb(format)
			}
		}

		if v, err := m.en.getCounter(usedSpace); err == nil {
			atomic.StoreInt64(&m.usedSpace, v)
		} else {
			logger.Warnf("Get counter %s: %s", usedSpace, err)
		}
		if v, err := m.en.getCounter(totalInodes); err == nil {
			atomic.StoreInt64(&m.usedInodes, v)
		} else {
			logger.Warnf("Get counter %s: %s", totalInodes, err)
		}
		m.loadQuotas()

		if m.conf.ReadOnly || m.conf.NoBGJob || m.conf.Heartbeat == 0 {
			continue
		}
		if ok, err := m.en.setIfSmall("lastCleanupSessions", time.Now().Unix(), int64((m.conf.Heartbeat * 9 / 10).Seconds())); err != nil {
			logger.Warnf("checking counter lastCleanupSessions: %s", err)
		} else if ok {
			go m.CleanStaleSessions(ctx)
		}
	}
}

func (m *baseMeta) CleanStaleSessions(ctx Context) {
	sids, err := m.en.doFindStaleSessions(1000)
	if err != nil {
		logger.Warnf("scan stale sessions: %s", err)
		return
	}
	for _, sid := range sids {
		if ctx.Canceled() {
			return
		}
		s, err := m.en.GetSession(sid, false)
		if err != nil {
			logger.Warnf("Get session info %d: %v", sid, err)
			s = &Session{Sid: sid}
		}
		logger.Infof("clean up stale session %d %+v: %v", sid, s.SessionInfo, m.en.doCleanStaleSession(sid))
	}
}

func (m *baseMeta) CloseSession() error {
	m.FlushSession()
	m.sesMu.Lock()
	m.umounting = true
	m.sesMu.Unlock()
	var err error
	if m.sid > 0 {
		err = m.en.doCleanStaleSession(m.sid)
	}
	m.sessCtx.Cancel()
	m.sessWG.Wait()
	m.stopDeleteSliceTasks()
	logger.Infof("close session %d: %v", m.sid, err)
	return err
}

func (m *baseMeta) FlushSession() {
	if m.conf.ReadOnly {
		return
	}
	m.doFlushStats()
	m.doFlushDirStat()
	m.doFlushQuotas()
	logger.Infof("flush session %d:", m.sid)
}

func (m *baseMeta) Init(format *Format, force bool) error {
	return m.en.doInit(format, force)
}

func (m *baseMeta) cleanupDeletedFiles(ctx Context) {
	defer m.sessWG.Done()
	for {
		select {
		case <-ctx.Done():
			return
		case <-time.After(utils.JitterIt(time.Hour)):
		}
		if ok, err := m.en.setIfSmall("lastCleanupFiles", time.Now().Unix(), int64(time.Hour.Seconds())*9/10); err != nil {
			logger.Warnf("checking counter lastCleanupFiles: %s", err)
		} else if ok {
			job := "cleanupDeletedFiles"
			jobStart := time.Now()
			files, err := m.en.doFindDeletedFiles(time.Now().Add(-time.Hour).Unix(), 6e5)
			if err != nil {
				logger.Warnf("scan deleted files: %s", err)
				m.bgjobDuration.WithLabelValues(job, bgJobFail).Observe(time.Since(jobStart).Seconds())
				continue
			}
			var processed int64
			status := bgJobSucc
			for inode, length := range files {
				logger.Debugf("cleanup chunks of inode %d with %d bytes", inode, length)
				m.en.doDeleteFileData(inode, length)
				processed++
				if time.Since(jobStart) > 50*time.Minute { // Yield my time slice to avoid conflicts with other clients
					status = bgJobCanceled
					break
				}
			}
			m.bgjobDuration.WithLabelValues(job, status).Observe(time.Since(jobStart).Seconds())
			m.bgjobDels.WithLabelValues(job).Add(float64(processed))
		}
	}
}

func (m *baseMeta) cleanupSlices(ctx Context) {
	defer m.sessWG.Done()
	for {
		select {
		case <-ctx.Done():
			return
		case <-time.After(utils.JitterIt(time.Hour)):
		}
		if ok, err := m.en.setIfSmall("nextCleanupSlices", time.Now().Unix(), int64(time.Hour.Seconds())*9/10); err != nil {
			logger.Warnf("checking counter nextCleanupSlices: %s", err)
		} else if ok {
			func() {
				cCtx := WrapWithTimeout(ctx, time.Minute*50)
				defer cCtx.Cancel()
				jobStart := time.Now()
				status := bgJobSucc
				var cnt uint64
				if err := m.en.doCleanupSlices(cCtx, &cnt); err != nil {
					if errors.Is(err, context.DeadlineExceeded) {
						status = bgJobCanceled
					} else {
						status = bgJobFail
					}
				}
				m.bgjobDuration.WithLabelValues("cleanupSlices", status).Observe(time.Since(jobStart).Seconds())
				m.bgjobDels.WithLabelValues("cleanupSlices").Add(float64(cnt))
			}()
		}
	}
}

func (m *baseMeta) StatFS(ctx Context, ino Ino, totalspace, availspace, iused, iavail *uint64) syscall.Errno {
	defer m.timeit("StatFS", time.Now())
	if st := m.statRootFs(ctx, totalspace, availspace, iused, iavail); st != 0 {
		return st
	}
	ino = m.checkRoot(ino)
	var usage, quota *Quota
	for ino >= RootInode {
		ino, quota = m.getQuotaParent(ctx, ino)
		if quota == nil {
			break
		}
		q := quota.snap()
		q.sanitize()
		if usage == nil {
			usage = &q
		}
		if q.MaxSpace > 0 {
			ls := uint64(q.MaxSpace - q.UsedSpace)
			if ls < *availspace {
				*availspace = ls
			}
		}
		if q.MaxInodes > 0 {
			li := uint64(q.MaxInodes - q.UsedInodes)
			if li < *iavail {
				*iavail = li
			}
		}
		if ino == RootInode {
			break
		}
		if parent, st := m.getDirParent(ctx, ino); st != 0 {
			logger.Warnf("Get directory parent of inode %d: %s", ino, st)
			break
		} else {
			ino = parent
		}
	}
	if usage != nil {
		*totalspace = uint64(usage.UsedSpace) + *availspace
		*iused = uint64(usage.UsedInodes)
	}
	return 0
}

func (m *baseMeta) statRootFs(ctx Context, totalspace, availspace, iused, iavail *uint64) syscall.Errno {
	used, inodes := atomic.LoadInt64(&m.usedSpace), atomic.LoadInt64(&m.usedInodes)
	var err error
	if !m.conf.FastStatfs || used == unknownUsage || inodes == unknownUsage {
		var remoteUsed int64 // using an additional variable here to ensure the assignment inside `utils.WithTimeout` does not change the `used` variable again after a timeout.
		err = utils.WithTimeout(ctx, func(context.Context) error {
			remoteUsed, err = m.en.getCounter(usedSpace)
			return err
		}, time.Millisecond*150)
		if err == nil {
			used = remoteUsed
		}
		var remoteInodes int64
		err = utils.WithTimeout(ctx, func(context.Context) error {
			remoteInodes, err = m.en.getCounter(totalInodes)
			return err
		}, time.Millisecond*150)
		if err == nil {
			inodes = remoteInodes
		}
	}

	used += atomic.LoadInt64(&m.newSpace)
	inodes += atomic.LoadInt64(&m.newInodes)
	if used < 0 {
		used = 0
	}
	format := m.getFormat()
	if format.Capacity > 0 {
		*totalspace = format.Capacity
		if *totalspace < uint64(used) {
			*totalspace = uint64(used)
		}
	} else {
		*totalspace = 1 << 50
		const maxVal = math.MaxUint64 >> 1
		for *totalspace*8 < uint64(used)*10 {
			if *totalspace >= maxVal {
				*totalspace = math.MaxUint64
				break
			}
			*totalspace <<= 1
		}
	}
	*availspace = *totalspace - uint64(used)
	if inodes < 0 {
		inodes = 0
	}
	*iused = uint64(inodes)
	if format.Inodes > 0 {
		if *iused > format.Inodes {
			*iavail = 0
		} else {
			*iavail = format.Inodes - *iused
		}
	} else {
		*iavail = 10 << 20
		const maxVal = math.MaxUint64 >> 1
		for *iused > *iavail*4 {
			if *iavail >= maxVal {
				break
			}
			*iavail <<= 1
		}
	}
	return 0
}

func (m *baseMeta) resolveCase(ctx Context, parent Ino, name string) *Entry {
	var entries []*Entry
	_ = m.en.doReaddir(ctx, parent, 0, &entries, -1)
	for _, e := range entries {
		n := string(e.Name)
		if strings.EqualFold(name, n) {
			return e
		}
	}
	return nil
}

func (m *baseMeta) Lookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr, checkPerm bool) syscall.Errno {
	if inode == nil || attr == nil {
		return syscall.EINVAL // bad request
	}
	defer m.timeit("Lookup", time.Now())
	parent = m.checkRoot(parent)
	if checkPerm {
		if st := m.Access(ctx, parent, MODE_MASK_X, nil); st != 0 {
			return st
		}
	}
	if name == ".." {
		if parent == m.root {
			name = "."
		} else {
			if st := m.GetAttr(ctx, parent, attr); st != 0 {
				return st
			}
			if attr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
			*inode = attr.Parent
			return m.GetAttr(ctx, *inode, attr)
		}
	}
	if name == "." {
		if st := m.GetAttr(ctx, parent, attr); st != 0 {
			return st
		}
		*inode = parent
		return 0
	}
	if parent == RootInode && name == TrashName {
		if st := m.GetAttr(ctx, TrashInode, attr); st != 0 {
			return st
		}
		*inode = TrashInode
		return 0
	}
	st := m.en.doLookup(ctx, parent, name, inode, attr)
	if st == syscall.ENOENT && m.conf.CaseInsensi {
		if e := m.resolveCase(ctx, parent, name); e != nil {
			*inode = e.Inode
			if st = m.GetAttr(ctx, *inode, attr); st == syscall.ENOENT {
				logger.Warnf("no attribute for inode %d (%d, %s)", e.Inode, parent, e.Name)
				*attr = *e.Attr
				st = 0
			}
		}
	}
	if st == 0 && attr.Typ == TypeDirectory && !parent.IsTrash() {
		m.parentMu.Lock()
		m.dirParents[*inode] = parent
		m.parentMu.Unlock()
	}
	return st
}

func (attr *Attr) reset() {
	attr.Flags = 0
	attr.Mode = 0
	attr.Typ = 0
	attr.Uid = 0
	attr.Gid = 0
	attr.Atime = 0
	attr.Atimensec = 0
	attr.Mtime = 0
	attr.Mtimensec = 0
	attr.Ctime = 0
	attr.Ctimensec = 0
	attr.Nlink = 0
	attr.Length = 0
	attr.Rdev = 0
	attr.Parent = 0
	attr.AccessACL = aclAPI.None
	attr.DefaultACL = aclAPI.None
	attr.Full = false
}

func (m *baseMeta) parseAttr(buf []byte, attr *Attr) {
	attr.Unmarshal(buf)
}

func (m *baseMeta) marshal(attr *Attr) []byte {
	return attr.Marshal()
}

func (m *baseMeta) encodeDelayedSlice(id uint64, size uint32) []byte {
	w := utils.NewBuffer(8 + 4)
	w.Put64(id)
	w.Put32(size)
	return w.Bytes()
}

func (m *baseMeta) decodeDelayedSlices(buf []byte, ss *[]Slice) {
	if len(buf) == 0 || len(buf)%12 != 0 {
		return
	}
	for rb := utils.FromBuffer(buf); rb.HasMore(); {
		*ss = append(*ss, Slice{Id: rb.Get64(), Size: rb.Get32()})
	}
}

func clearSUGID(ctx Context, cur *Attr, set *Attr) {
	switch runtime.GOOS {
	case "darwin":
		if ctx.Uid() != 0 {
			// clear SUID and SGID
			cur.Mode &= 01777
			set.Mode &= 01777
		}
	case "linux":
		// same as ext
		if cur.Typ != TypeDirectory {
			if ctx.Uid() != 0 || (cur.Mode>>3)&1 != 0 {
				// clear SUID and SGID
				cur.Mode &= 01777
				set.Mode &= 01777
			} else {
				// keep SGID if the file is non-group-executable
				cur.Mode &= 03777
				set.Mode &= 03777
			}
		}
	}
}

func (r *baseMeta) Resolve(ctx Context, parent Ino, path string, inode *Ino, attr *Attr) syscall.Errno {
	return syscall.ENOTSUP
}

func (m *baseMeta) Access(ctx Context, inode Ino, mmask uint8, attr *Attr) syscall.Errno {
	if ctx.Uid() == 0 {
		return 0
	}
	if !ctx.CheckPermission() {
		return 0
	}

	if attr == nil || !attr.Full {
		if attr == nil {
			attr = &Attr{}
		}
		err := m.GetAttr(ctx, inode, attr)
		if err != 0 {
			return err
		}
	}

	// ref: https://github.com/torvalds/linux/blob/e5eb28f6d1afebed4bb7d740a797d0390bd3a357/fs/namei.c#L352-L357
	// dont check acl if mask is 0
	if attr.AccessACL != aclAPI.None && (attr.Mode&00070) != 0 {
		rule := &aclAPI.Rule{}
		if st := m.en.doGetFacl(ctx, inode, aclAPI.TypeAccess, attr.AccessACL, rule); st != 0 {
			return st
		}
		if rule.CanAccess(ctx.Uid(), ctx.Gids(), attr.Uid, attr.Gid, mmask) {
			return 0
		}
		return syscall.EACCES
	}

	mode := accessMode(attr, ctx.Uid(), ctx.Gids())
	if mode&mmask != mmask {
		logger.Debugf("Access inode %d %o, mode %o, request mode %o", inode, attr.Mode, mode, mmask)
		return syscall.EACCES
	}
	return 0
}

func (m *baseMeta) GetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	inode = m.checkRoot(inode)
	if m.conf.OpenCache > 0 && m.of.Check(inode, attr) {
		return 0
	}
	defer m.timeit("GetAttr", time.Now())
	var err syscall.Errno
	if inode == RootInode || inode == TrashInode {
		// doGetAttr could overwrite the `attr` after timeout
		var a Attr
		e := utils.WithTimeout(ctx, func(context.Context) error {
			err = m.en.doGetAttr(ctx, inode, &a)
			return nil
		}, time.Millisecond*300)
		if e == nil && err == 0 {
			*attr = a
		} else {
			err = 0
			attr.Typ = TypeDirectory
			attr.Mode = 0777
			attr.Nlink = 2
			attr.Length = 4 << 10
			if inode == TrashInode {
				attr.Mode = 0555
			}
			attr.Parent = RootInode
			attr.Full = true
		}
	} else {
		err = m.en.doGetAttr(ctx, inode, attr)
	}
	if err == 0 {
		m.of.Update(inode, attr)
		if attr.Typ == TypeDirectory && inode != RootInode && !attr.Parent.IsTrash() {
			m.parentMu.Lock()
			m.dirParents[inode] = attr.Parent
			m.parentMu.Unlock()
		}
	}
	return err
}

func (m *baseMeta) SetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr) syscall.Errno {
	defer m.timeit("SetAttr", time.Now())
	inode = m.checkRoot(inode)
	var oldAttr Attr

	err := m.en.doSetAttr(ctx, inode, set, sugidclearmode, attr, &oldAttr)
	if err == 0 {
		m.of.InvalidateChunk(inode, invalidateAttrOnly)
		m.of.Update(inode, attr)

		uidChanged := oldAttr.Uid != attr.Uid
		gidChanged := oldAttr.Gid != attr.Gid
		if uidChanged || gidChanged {
			var space, inodes int64
			if attr.Typ == TypeFile {
				space = align4K(attr.Length)
				inodes = 1
			} else if attr.Typ == TypeDirectory {
				space = align4K(0)
				inodes = 1
			}

			if uidChanged {
				m.updateUserGroupStat(ctx, oldAttr.Uid, 0, -space, -inodes)
				m.updateUserGroupStat(ctx, attr.Uid, 0, space, inodes)
			}
			if gidChanged {
				m.updateUserGroupStat(ctx, 0, oldAttr.Gid, -space, -inodes)
				m.updateUserGroupStat(ctx, 0, attr.Gid, space, inodes)
			}
		}
	}
	return err
}

func (m *baseMeta) nextInode() (Ino, error) {
	m.freeMu.Lock()
	defer m.freeMu.Unlock()
	if m.freeInodes.next >= m.freeInodes.maxid {

		m.prefetchMu.Lock() // Wait until prefetchInodes() is done
		if m.prefetchedInodes.maxid > m.freeInodes.maxid {
			m.freeInodes = m.prefetchedInodes
			m.prefetchedInodes = freeID{}
		}
		m.prefetchMu.Unlock()

		if m.freeInodes.next >= m.freeInodes.maxid { // Prefetch missed, try again
			nextInodes, err := m.allocateInodes()
			if err != nil {
				return 0, err
			}
			m.freeInodes = nextInodes
		}
	}
	n := m.freeInodes.next
	m.freeInodes.next++
	for n <= 1 {
		n = m.freeInodes.next
		m.freeInodes.next++
	}
	if m.freeInodes.maxid-m.freeInodes.next == inodeNeedPrefetch {
		go m.prefetchInodes()
	}
	return Ino(n), nil
}

func (m *baseMeta) prefetchInodes() {
	m.prefetchMu.Lock()
	defer m.prefetchMu.Unlock()
	if m.prefetchedInodes.maxid > m.freeInodes.maxid {
		return // Someone else has done the job
	}
	nextInodes, err := m.allocateInodes()
	if err == nil {
		m.prefetchedInodes = nextInodes
	} else {
		logger.Warnf("Failed to prefetch inodes: %s, current limit: %d", err, m.freeInodes.maxid)
	}
}

func (m *baseMeta) allocateInodes() (freeID, error) {
	v, err := m.en.incrCounter("nextInode", inodeBatch)
	if err != nil {
		return freeID{}, err
	}
	return freeID{next: uint64(v) - inodeBatch, maxid: uint64(v)}, nil
}

func (m *baseMeta) Mknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, rdev uint32, path string, inode *Ino, attr *Attr) syscall.Errno {
	if _type < TypeFile || _type > TypeSocket {
		return syscall.EINVAL
	}
	if parent.IsTrash() {
		return syscall.EPERM
	}
	if parent == RootInode && name == TrashName {
		return syscall.EPERM
	}
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	if name == "." || name == ".." {
		return syscall.EEXIST
	}
	if errno := checkInodeName(name); errno != 0 {
		return errno
	}

	defer m.timeit("Mknod", time.Now())
	parent = m.checkRoot(parent)
	var space, inodes int64 = align4K(0), 1
	if err := m.checkQuota(ctx, space, inodes, ctx.Uid(), ctx.Gid(), parent); err != 0 {
		return err
	}

	ino, err := m.nextInode()
	if err != nil {
		return errno(err)
	}
	if inode == nil {
		inode = &ino
	}
	*inode = ino
	if attr == nil {
		attr = &Attr{}
	}
	attr.Typ = _type
	attr.Uid = ctx.Uid()
	attr.Gid = ctx.Gid()
	if _type == TypeDirectory {
		attr.Nlink = 2
		attr.Length = 4 << 10
	} else {
		attr.Nlink = 1
		if _type == TypeSymlink {
			attr.Length = uint64(len(path))
		} else {
			attr.Length = 0
			attr.Rdev = rdev
		}
	}
	attr.Parent = parent
	attr.Full = true
	st := m.en.doMknod(ctx, parent, name, _type, mode, cumask, path, inode, attr)
	if st == 0 {
		m.en.updateStats(space, inodes)
		m.updateDirStat(ctx, parent, 0, space, inodes)
		m.updateDirQuota(ctx, parent, space, inodes)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, space, inodes)
	}
	return st
}

func (m *baseMeta) Create(ctx Context, parent Ino, name string, mode uint16, cumask uint16, flags uint32, inode *Ino, attr *Attr) syscall.Errno {
	if attr == nil {
		attr = &Attr{}
	}
	eno := m.Mknod(ctx, parent, name, TypeFile, mode, cumask, 0, "", inode, attr)
	if eno == syscall.EEXIST && (flags&syscall.O_EXCL) == 0 && attr.Typ == TypeFile {
		eno = 0
	}
	if eno == 0 && inode != nil {
		m.of.Open(*inode, attr)
	}
	return eno
}

func (m *baseMeta) Mkdir(ctx Context, parent Ino, name string, mode uint16, cumask uint16, copysgid uint8, inode *Ino, attr *Attr) syscall.Errno {
	st := m.Mknod(ctx, parent, name, TypeDirectory, mode, cumask, 0, "", inode, attr)
	if st == 0 {
		m.parentMu.Lock()
		m.dirParents[*inode] = parent
		m.parentMu.Unlock()
	}
	return st
}

func (m *baseMeta) Symlink(ctx Context, parent Ino, name string, path string, inode *Ino, attr *Attr) syscall.Errno {
	if len(path) == 0 || len(path) > MaxSymlink {
		return syscall.EINVAL
	}
	for _, c := range path {
		if c == 0 {
			return syscall.EINVAL
		}
	}
	// mode of symlink is ignored in POSIX
	return m.Mknod(ctx, parent, name, TypeSymlink, 0777, 0, 0, path, inode, attr)
}

func (m *baseMeta) Link(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno {
	if parent.IsTrash() {
		return syscall.EPERM
	}
	if parent == RootInode && name == TrashName {
		return syscall.EPERM
	}
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	if errno := checkInodeName(name); errno != 0 {
		return errno
	}
	if name == "." || name == ".." {
		return syscall.EEXIST
	}

	defer m.timeit("Link", time.Now())
	if attr == nil {
		attr = &Attr{}
	}
	parent = m.checkRoot(parent)
	if st := m.GetAttr(ctx, inode, attr); st != 0 {
		return st
	}
	if attr.Typ == TypeDirectory {
		return syscall.EPERM
	}

	if m.checkUserQuota(ctx, uint64(attr.Uid), 0, 1) {
		return syscall.EDQUOT
	}
	if m.checkGroupQuota(ctx, uint64(attr.Gid), 0, 1) {
		return syscall.EDQUOT
	}
	if m.checkDirQuota(ctx, parent, align4K(attr.Length), 1) {
		return syscall.EDQUOT
	}

	defer func() { m.of.InvalidateChunk(inode, invalidateAttrOnly) }()
	err := m.en.doLink(ctx, inode, parent, name, attr)
	if err == 0 {
		m.updateDirStat(ctx, parent, int64(attr.Length), align4K(attr.Length), 1)
		m.updateDirQuota(ctx, parent, align4K(attr.Length), 1)
	}
	return err
}

func (m *baseMeta) ReadLink(ctx Context, inode Ino, path *[]byte) syscall.Errno {
	noatime := m.conf.AtimeMode == NoAtime || m.conf.ReadOnly
	if target, ok := m.symlinks.Load(inode); ok {
		if noatime {
			*path = target.([]byte)
			return 0
		} else {
			buf := target.([]byte)
			// ctime and mtime are ignored since symlink can't be modified
			atime := int64(binary.BigEndian.Uint64(buf[:8]))
			attr := &Attr{Atime: atime / int64(time.Second), Atimensec: uint32(atime % int64(time.Second))}
			if !m.atimeNeedsUpdate(attr, time.Now()) {
				*path = buf[8:]
				return 0
			}
		}
	}
	defer m.timeit("ReadLink", time.Now())
	atime, target, err := m.en.doReadlink(ctx, inode, noatime)
	if err != nil {
		return errno(err)
	}
	if len(target) == 0 {
		var attr Attr
		if st := m.GetAttr(ctx, inode, &attr); st != 0 {
			return st
		}
		if attr.Typ != TypeSymlink {
			return syscall.EINVAL
		}
		return syscall.EIO
	}
	*path = target
	if noatime {
		m.symlinks.Store(inode, target)
	} else {
		buf := make([]byte, 8+len(target))
		binary.BigEndian.PutUint64(buf[:8], uint64(atime))
		copy(buf[8:], target)
		m.symlinks.Store(inode, buf)
	}
	return 0
}

func (m *baseMeta) Unlink(ctx Context, parent Ino, name string, skipCheckTrash ...bool) syscall.Errno {
	if parent == RootInode && name == TrashName || parent.IsTrash() && ctx.Uid() != 0 {
		return syscall.EPERM
	}
	if m.conf.ReadOnly {
		return syscall.EROFS
	}

	defer m.timeit("Unlink", time.Now())
	parent = m.checkRoot(parent)
	var attr Attr
	err := m.en.doUnlink(ctx, parent, name, &attr, skipCheckTrash...)
	if err == 0 {
		var diffLength uint64
		if attr.Typ == TypeFile {
			diffLength = attr.Length
		}
		m.updateDirStat(ctx, parent, -int64(diffLength), -align4K(diffLength), -1)
		if !parent.IsTrash() {
			m.updateDirQuota(ctx, parent, -align4K(diffLength), -1)
		}
	}
	return err
}

func (m *baseMeta) Rmdir(ctx Context, parent Ino, name string, skipCheckTrash ...bool) syscall.Errno {
	if name == "." {
		return syscall.EINVAL
	}
	if name == ".." {
		return syscall.ENOTEMPTY
	}
	if parent == RootInode && name == TrashName || parent == TrashInode || parent.IsTrash() && ctx.Uid() != 0 {
		return syscall.EPERM
	}
	if m.conf.ReadOnly {
		return syscall.EROFS
	}

	defer m.timeit("Rmdir", time.Now())
	parent = m.checkRoot(parent)
	var inode Ino
	var oldAttr Attr
	st := m.en.doRmdir(ctx, parent, name, &inode, &oldAttr, skipCheckTrash...)
	if st == 0 {
		if !parent.IsTrash() {
			m.parentMu.Lock()
			delete(m.dirParents, inode)
			m.parentMu.Unlock()
		}
		m.updateDirStat(ctx, parent, 0, -align4K(0), -1)
		m.updateDirQuota(ctx, parent, -align4K(0), -1)
	}
	return st
}

// BatchUnlink delete multiple files in the same directory (case-sensitive filenames)
func (m *baseMeta) BatchUnlink(ctx Context, parent Ino, entries []*Entry, count *uint64, skipCheckTrash bool) syscall.Errno {
	if len(entries) == 0 {
		return 0
	}
	var delta dirStat
	st := m.en.doBatchUnlink(ctx, parent, entries, &delta, skipCheckTrash)
	if st == 0 {
		m.updateDirStat(ctx, parent, delta.length, delta.space, delta.inodes)
		if !parent.IsTrash() {
			m.updateDirQuota(ctx, parent, delta.space, delta.inodes)
		}
		if count != nil && len(entries) > 0 {
			atomic.AddUint64(count, uint64(len(entries)))
		}
	}
	return st
}

func (m *baseMeta) BatchClone(ctx Context, srcParent Ino, dstParent Ino, entries []*Entry, cmode uint8, cumask uint16, count *uint64) syscall.Errno {
	if len(entries) == 0 {
		return 0
	}
	var r batchCloneResult
	st := m.en.doBatchClone(ctx, srcParent, dstParent, entries, cmode, cumask, &r)
	if st == 0 {
		m.en.updateStats(r.space, r.inodes)
		m.updateDirQuota(ctx, dstParent, r.space, r.inodes)
		// TODO
		for _, q := range r.deltas {
			m.updateUserGroupStat(ctx, q.Uid, q.Gid, q.Space, q.Inodes)
		}
		if count != nil {
			atomic.AddUint64(count, uint64(r.inodes))
		}
	}
	return st
}

func (m *baseMeta) Rename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno {
	if parentSrc == RootInode && nameSrc == TrashName || parentDst == RootInode && nameDst == TrashName {
		return syscall.EPERM
	}
	if parentDst.IsTrash() || parentSrc.IsTrash() && ctx.Uid() != 0 {
		return syscall.EPERM
	}
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	if errno := checkInodeName(nameDst); errno != 0 {
		return errno
	}

	switch flags {
	case 0, RenameNoReplace, RenameExchange, RenameNoReplace | RenameRestore:
	case RenameWhiteout, RenameNoReplace | RenameWhiteout:
		return syscall.ENOTSUP
	default:
		return syscall.EINVAL
	}

	defer m.timeit("Rename", time.Now())
	if inode == nil {
		inode = new(Ino)
	}
	if attr == nil {
		attr = &Attr{}
	}
	parentSrc = m.checkRoot(parentSrc)
	parentDst = m.checkRoot(parentDst)
	var quotaSrc, quotaDst Ino
	if !parentSrc.IsTrash() {
		quotaSrc, _ = m.getQuotaParent(ctx, parentSrc)
	}
	if parentSrc == parentDst {
		quotaDst = quotaSrc
	} else {
		quotaDst, _ = m.getQuotaParent(ctx, parentDst)
	}
	var space, inodes int64
	if quotaSrc != quotaDst {
		if st := m.Lookup(ctx, parentSrc, nameSrc, inode, attr, false); st != 0 {
			return st
		}
		if attr.Typ == TypeDirectory {
			m.quotaMu.RLock()
			q := m.dirQuotas[uint64(*inode)]
			m.quotaMu.RUnlock()
			if q != nil {
				space, inodes = q.UsedSpace+align4K(0), q.UsedInodes+1
			} else {
				var sum Summary
				logger.Debugf("Start to get summary of inode %d", *inode)
				if st := m.GetSummary(ctx, *inode, &sum, true, false); st != 0 {
					logger.Warnf("Get summary of inode %d: %s", *inode, st)
					return st
				}
				space, inodes = int64(sum.Size), int64(sum.Dirs+sum.Files)
			}
		} else {
			space, inodes = align4K(attr.Length), 1
		}
		// TODO: dst exists and is replaced or exchanged
		if quotaDst > 0 && m.checkDirQuota(ctx, parentDst, space, inodes) {
			return syscall.EDQUOT
		}
	}
	tinode := new(Ino)
	tattr := new(Attr)
	st := m.en.doRename(ctx, parentSrc, nameSrc, parentDst, nameDst, flags, inode, tinode, attr, tattr)
	if st == 0 {
		var diffLength uint64
		if attr.Typ == TypeDirectory {
			m.parentMu.Lock()
			m.dirParents[*inode] = parentDst
			m.parentMu.Unlock()
		} else if attr.Typ == TypeFile {
			diffLength = attr.Length
		}
		if parentSrc != parentDst {
			m.updateDirStat(ctx, parentSrc, -int64(diffLength), -align4K(diffLength), -1)
			m.updateDirStat(ctx, parentDst, int64(diffLength), align4K(diffLength), 1)
			if quotaSrc != quotaDst {
				if quotaSrc > 0 {
					m.updateDirQuota(ctx, parentSrc, -space, -inodes)
				}
				if quotaDst > 0 {
					m.updateDirQuota(ctx, parentDst, space, inodes)
				}
			}
		}
		if *tinode > 0 && flags != RenameExchange {
			diffLength = 0
			if tattr.Typ == TypeDirectory {
				m.parentMu.Lock()
				delete(m.dirParents, *tinode)
				m.parentMu.Unlock()
			} else if attr.Typ == TypeFile {
				diffLength = tattr.Length
			}
			m.updateDirStat(ctx, parentDst, -int64(diffLength), -align4K(diffLength), -1)
			if quotaDst > 0 {
				m.updateDirQuota(ctx, parentDst, -align4K(diffLength), -1)
			}
		}
	}
	return st
}

// caller makes sure inode is not special inode.
func (m *baseMeta) touchAtime(ctx Context, inode Ino, attr *Attr) {
	if m.conf.AtimeMode == NoAtime || m.conf.ReadOnly {
		return
	}

	if attr == nil {
		attr = new(Attr)
		if of := m.of.find(inode); of != nil {
			*attr = of.attr
		}
	}
	now := time.Now()
	if attr.Full && !m.atimeNeedsUpdate(attr, now) {
		return
	}

	updated, err := m.en.doTouchAtime(ctx, inode, attr, now)
	if updated {
		m.of.Update(inode, attr)
	} else if err != nil {
		logger.Warnf("Update atime of inode %d: %s", inode, err)
	}
}

func (m *baseMeta) Open(ctx Context, inode Ino, flags uint32, attr *Attr) (st syscall.Errno) {
	if m.conf.ReadOnly && flags&(syscall.O_WRONLY|syscall.O_RDWR|syscall.O_TRUNC|syscall.O_APPEND) != 0 {
		return syscall.EROFS
	}
	defer func() {
		if st == 0 {
			m.touchAtime(ctx, inode, attr)
		}
	}()
	if m.conf.OpenCache > 0 && m.of.OpenCheck(inode, attr) {
		return 0
	}
	// attr may be valid, see fs.Open()
	if attr != nil && !attr.Full {
		if st = m.GetAttr(ctx, inode, attr); st != 0 {
			return
		}
	}
	var mmask uint8 = 0
	switch flags & (syscall.O_RDONLY | syscall.O_WRONLY | syscall.O_RDWR) {
	case syscall.O_RDONLY:
		mmask = MODE_MASK_R
		// 0x20 means O_FMODE_EXEC
		if (flags & 0x20) != 0 {
			mmask = MODE_MASK_X
		}
	case syscall.O_WRONLY:
		mmask = MODE_MASK_W
	case syscall.O_RDWR:
		mmask = MODE_MASK_R | MODE_MASK_W
	}
	if st = m.Access(ctx, inode, mmask, attr); st != 0 {
		return
	}

	if attr.Flags&FlagImmutable != 0 || attr.Parent > TrashInode {
		if flags&(syscall.O_WRONLY|syscall.O_RDWR) != 0 {
			return syscall.EPERM
		}
	}
	if attr.Flags&FlagAppend != 0 {
		if (flags&(syscall.O_WRONLY|syscall.O_RDWR)) != 0 && (flags&syscall.O_APPEND) == 0 {
			return syscall.EPERM
		}
		if flags&syscall.O_TRUNC != 0 {
			return syscall.EPERM
		}
	}
	m.of.Open(inode, attr)
	return 0
}

func (m *baseMeta) InvalidateChunkCache(ctx Context, inode Ino, indx uint32) syscall.Errno {
	m.of.InvalidateChunk(inode, indx)
	return 0
}

func (m *baseMeta) Read(ctx Context, inode Ino, indx uint32, slices *[]Slice) (st syscall.Errno) {
	defer func() {
		if st == 0 {
			m.touchAtime(ctx, inode, nil)
		}
	}()

	f := m.of.find(inode)
	if f != nil {
		f.RLock()
		defer f.RUnlock()
	}
	if ss, ok := m.of.ReadChunk(inode, indx); ok {
		*slices = ss
		return 0
	}

	*slices = nil
	defer m.timeit("Read", time.Now())
	ss, st := m.en.doRead(ctx, inode, indx)
	if st != 0 {
		return st
	}
	if ss == nil {
		return syscall.EIO
	}
	if len(ss) == 0 {
		var attr Attr
		if st = m.en.doGetAttr(ctx, inode, &attr); st != 0 {
			return st
		}
		if attr.Typ != TypeFile {
			return syscall.EPERM
		}
		return 0
	}

	*slices = buildSlice(ss)
	m.of.CacheChunk(inode, indx, *slices)
	if !m.conf.ReadOnly && (len(ss) >= 5 || len(*slices) >= 5) {
		go m.compactChunk(inode, indx, false, false)
	}
	return 0
}

func (m *baseMeta) NewSlice(ctx Context, id *uint64) syscall.Errno {
	m.freeMu.Lock()
	defer m.freeMu.Unlock()
	if m.freeSlices.next >= m.freeSlices.maxid {
		v, err := m.en.incrCounter("nextChunk", sliceIdBatch)
		if err != nil {
			return errno(err)
		}
		m.freeSlices.next = uint64(v) - sliceIdBatch
		m.freeSlices.maxid = uint64(v)
	}
	*id = m.freeSlices.next
	m.freeSlices.next++
	return 0
}

func (m *baseMeta) Close(ctx Context, inode Ino) syscall.Errno {
	if m.of.Close(inode) {
		m.Lock()
		_, removed := m.removedFiles[inode]
		if removed {
			delete(m.removedFiles, inode)
		}
		m.Unlock()
		if removed {
			_ = m.en.doDeleteSustainedInode(m.sid, inode)
		}
	}
	return 0
}

func (m *baseMeta) Write(ctx Context, inode Ino, indx uint32, off uint32, slice Slice, mtime time.Time) syscall.Errno {
	defer m.timeit("Write", time.Now())
	f := m.of.find(inode)
	if f != nil {
		f.Lock()
		defer f.Unlock()
	}
	defer func() { m.of.InvalidateChunk(inode, indx) }()
	var numSlices int
	var delta dirStat
	var attr Attr
	st := m.en.doWrite(ctx, inode, indx, off, slice, mtime, &numSlices, &delta, &attr)
	if st == 0 {
		m.updateParentStat(ctx, inode, attr.Parent, delta.length, delta.space)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, delta.space, 0)
		if numSlices%100 == 99 || numSlices > 350 {
			if numSlices < maxSlices {
				go m.compactChunk(inode, indx, false, false)
			} else {
				m.compactChunk(inode, indx, true, false)
			}
		}
	}
	return st
}

func (m *baseMeta) Truncate(ctx Context, inode Ino, flags uint8, length uint64, attr *Attr, skipPermCheck bool) syscall.Errno {
	defer m.timeit("Truncate", time.Now())
	f := m.of.find(inode)
	if f != nil {
		f.Lock()
		defer f.Unlock()
	}
	defer func() { m.of.InvalidateChunk(inode, invalidateAllChunks) }()
	if attr == nil {
		attr = &Attr{}
	}
	var delta dirStat
	st := m.en.doTruncate(ctx, inode, flags, length, &delta, attr, skipPermCheck)
	if st == 0 {
		m.updateParentStat(ctx, inode, attr.Parent, delta.length, delta.space)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, delta.space, 0)
	}
	return st
}

func (m *baseMeta) Fallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64, flength *uint64) syscall.Errno {
	if mode&fallocCollapesRange != 0 && mode != fallocCollapesRange {
		return syscall.EINVAL
	}
	if mode&fallocInsertRange != 0 && mode != fallocInsertRange {
		return syscall.EINVAL
	}
	if mode == fallocInsertRange || mode == fallocCollapesRange {
		return syscall.ENOTSUP
	}
	if mode&fallocPunchHole != 0 && mode&fallocKeepSize == 0 {
		return syscall.EINVAL
	}
	if size == 0 {
		return syscall.EINVAL
	}
	defer m.timeit("Fallocate", time.Now())
	f := m.of.find(inode)
	if f != nil {
		f.Lock()
		defer f.Unlock()
	}
	defer func() { m.of.InvalidateChunk(inode, invalidateAllChunks) }()
	var delta dirStat
	var attr Attr
	st := m.en.doFallocate(ctx, inode, mode, off, size, &delta, &attr)
	if st == 0 {
		if flength != nil {
			*flength = attr.Length
		}
		m.updateParentStat(ctx, inode, attr.Parent, delta.length, delta.space)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, delta.space, 0)
	}
	return st
}

func (m *baseMeta) Readdir(ctx Context, inode Ino, plus uint8, entries *[]*Entry) (rerr syscall.Errno) {
	var attr Attr
	defer func() {
		if rerr == 0 {
			m.touchAtime(ctx, inode, &attr)
		}
	}()
	inode = m.checkRoot(inode)
	if err := m.GetAttr(ctx, inode, &attr); err != 0 {
		return err
	}
	defer m.timeit("Readdir", time.Now())
	var mmask uint8 = MODE_MASK_R
	if plus != 0 {
		mmask |= MODE_MASK_X
	}
	if st := m.Access(ctx, inode, mmask, &attr); st != 0 {
		return st
	}
	if inode == m.root {
		attr.Parent = m.root
	}
	*entries = []*Entry{
		{
			Inode: inode,
			Name:  []byte("."),
			Attr:  &Attr{Typ: TypeDirectory},
		},
	}
	*entries = append(*entries, &Entry{
		Inode: attr.Parent,
		Name:  []byte(".."),
		Attr:  &Attr{Typ: TypeDirectory},
	})
	st := m.en.doReaddir(ctx, inode, plus, entries, -1)
	if st == syscall.ENOENT && inode == TrashInode {
		st = 0
	}
	return st
}

func (m *baseMeta) SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	if name == "" {
		return syscall.EINVAL
	}
	switch flags {
	case 0, XattrCreate, XattrReplace:
	default:
		return syscall.EINVAL
	}

	defer m.timeit("SetXattr", time.Now())
	return m.en.doSetXattr(ctx, m.checkRoot(inode), name, value, flags)
}

func (m *baseMeta) RemoveXattr(ctx Context, inode Ino, name string) syscall.Errno {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	if name == "" {
		return syscall.EINVAL
	}

	defer m.timeit("RemoveXattr", time.Now())
	return m.en.doRemoveXattr(ctx, m.checkRoot(inode), name)
}

func (m *baseMeta) GetParents(ctx Context, inode Ino) map[Ino]int {
	if inode == RootInode || inode == TrashInode {
		return map[Ino]int{1: 1}
	}
	var attr Attr
	if st := m.GetAttr(ctx, inode, &attr); st != 0 {
		logger.Warnf("GetAttr inode %d: %s", inode, st)
		return nil
	}
	if attr.Parent > 0 {
		return map[Ino]int{attr.Parent: 1}
	} else {
		return m.en.doGetParents(ctx, inode)
	}
}

func (m *baseMeta) GetPaths(ctx Context, inode Ino) []string {
	if inode == RootInode {
		return []string{"/"}
	}

	if inode == TrashInode {
		return []string{"/.trash"}
	}

	outside := "path not shown because it's outside of the mounted root"
	getDirPath := func(ino Ino) (string, error) {
		var names []string
		var attr Attr
		for ino != RootInode && ino != m.root {
			if st := m.en.doGetAttr(ctx, ino, &attr); st != 0 {
				return "", fmt.Errorf("getattr inode %d: %s", ino, st)
			}
			if attr.Typ != TypeDirectory {
				return "", fmt.Errorf("inode %d is not a directory", ino)
			}
			var entries []*Entry
			if st := m.en.doReaddir(ctx, attr.Parent, 0, &entries, -1); st != 0 {
				return "", fmt.Errorf("readdir inode %d: %s", ino, st)
			}
			var name string
			for _, e := range entries {
				if e.Inode == ino {
					name = string(e.Name)
					break
				}
			}
			if attr.Parent == RootInode && ino == TrashInode {
				name = TrashName
			}
			if name == "" {
				return "", fmt.Errorf("entry %d/%d not found", attr.Parent, ino)
			}
			names = append(names, name)
			ino = attr.Parent
		}
		if m.root != RootInode && ino == RootInode {
			return outside, nil
		}
		names = append(names, "/") // add root

		for i, j := 0, len(names)-1; i < j; i, j = i+1, j-1 { // reverse
			names[i], names[j] = names[j], names[i]
		}
		return path.Join(names...), nil
	}

	var paths []string
	// inode != RootInode, parent is the real parent inode
	for parent, count := range m.GetParents(ctx, inode) {
		if count <= 0 {
			continue
		}
		dir, err := getDirPath(parent)
		if err != nil {
			logger.Warnf("Get directory path of %d: %s", parent, err)
			continue
		} else if dir == outside {
			paths = append(paths, outside)
			continue
		}
		var entries []*Entry
		if st := m.en.doReaddir(ctx, parent, 0, &entries, -1); st != 0 {
			logger.Warnf("Readdir inode %d: %s", parent, st)
			continue
		}
		var c int
		for _, e := range entries {
			if e.Inode == inode {
				c++
				paths = append(paths, path.Join(dir, string(e.Name)))
			}
		}
		if c != count {
			logger.Warnf("Expect to find %d entries under parent %d, but got %d", count, parent, c)
		}
	}
	return paths
}

func (m *baseMeta) countDirNlink(ctx Context, inode Ino) (uint32, syscall.Errno) {
	var entries []*Entry
	if st := m.en.doReaddir(ctx, inode, 0, &entries, -1); st != 0 {
		return 0, st
	}
	var dirCounter uint32 = 2
	for _, e := range entries {
		if e.Attr.Typ == TypeDirectory {
			dirCounter++
		}
	}
	return dirCounter, 0
}

type metaWalkFunc func(ctx Context, inode Ino, p string, attr *Attr)

func (m *baseMeta) walk(ctx Context, inode Ino, p string, attr *Attr, walkFn metaWalkFunc) syscall.Errno {
	walkFn(ctx, inode, p, attr)
	if attr.Full && attr.Typ != TypeDirectory {
		return 0
	}
	var entries []*Entry
	st := m.en.doReaddir(ctx, inode, 1, &entries, -1)
	if st != 0 && st != syscall.ENOENT {
		logger.Errorf("list %s: %s", p, st)
		return st
	}
	for _, entry := range entries {
		if ctx.Canceled() {
			return syscall.EINTR
		}
		if !entry.Attr.Full {
			entry.Attr.Parent = inode
		}
		if st := m.walk(ctx, entry.Inode, path.Join(p, string(entry.Name)), entry.Attr, walkFn); st != 0 {
			return st
		}
	}
	return 0
}

func (m *baseMeta) Check(ctx Context, fpath string, opt *CheckOpt) error {
	var attr Attr
	var inode = RootInode
	var parent = RootInode
	attr.Typ = TypeDirectory
	if fpath == "/" {
		if st := m.GetAttr(ctx, inode, &attr); st != 0 && st != syscall.ENOENT {
			logger.Errorf("GetAttr inode %d: %s", inode, st)
			return st
		}
	} else {
		ps := strings.FieldsFunc(fpath, func(r rune) bool {
			return r == '/'
		})
		for i, name := range ps {
			parent = inode
			if st := m.Lookup(ctx, parent, name, &inode, &attr, false); st != 0 {
				logger.Errorf("Lookup parent %d name %s: %s", parent, name, st)
				return st
			}
			if !attr.Full && i < len(ps)-1 {
				// missing attribute
				p := "/" + path.Join(ps[:i+1]...)
				if attr.Typ != TypeDirectory { // TODO: determine file size?
					logger.Warnf("Attribute of %s (inode %d type %d) is missing and cannot be auto-repaired, please repair it manually or remove it", p, inode, attr.Typ)
				} else {
					logger.Warnf("Attribute of %s (inode %d) is missing, please re-run with '--path %s --repair' to fix it", p, inode, p)
				}
			}
		}
	}
	if !attr.Full {
		attr.Parent = parent
	}

	progress := utils.NewProgress(false)
	defer progress.Done()
	nodeBar := progress.AddCountBar("Checked nodes", 0)

	var hasError bool
	type node struct {
		inode Ino
		path  string
		attr  *Attr
	}
	nodes := make(chan *node, 1000)
	go func() {
		defer close(nodes)
		var count int64
		if opt.Recursive {
			if st := m.walk(ctx, inode, fpath, &attr, func(ctx Context, inode Ino, path string, attr *Attr) {
				nodes <- &node{inode, path, attr}
				atomic.AddInt64(&count, 1)
			}); st != 0 {
				hasError = true
				logger.Errorf("Walk %s: %s", fpath, st)
			}
		} else {
			nodes <- &node{inode, fpath, &attr}
			count = 1
		}
		nodeBar.SetTotal(count)
	}()

	format, err := m.Load(false)
	if err != nil {
		return errors.Wrap(err, "load meta format")
	}
	if opt.SyncDirStat && !format.DirStats {
		logger.Warn("dir stats is disabled, flag '--sync-dir-stat' will be ignored")
	}
	var lock sync.Mutex
	listSlices := func(inode Ino, path string) {
		lock.Lock()
		if _, ok := opt.Slices[inode]; ok {
			lock.Unlock()
			return
		}
		opt.Slices[inode] = []Slice{}
		lock.Unlock()
		rawSlices, st := m.en.doList(ctx, inode)
		if st != 0 {
			logger.Errorf("dolist %s: %s", path, st)
			return
		}
		ss := make([]Slice, 0, len(rawSlices))
		for _, rs := range rawSlices {
			if rs.id > 0 {
				ss = append(ss, Slice{Id: rs.id, Size: rs.size})
			}
		}
		lock.Lock()
		opt.Slices[inode] = ss
		if opt.ShowProgress != nil {
			opt.ShowProgress(len(opt.Slices[inode]))
		}
		lock.Unlock()
	}
	var wg sync.WaitGroup
	for i := 0; i < 20; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for e := range nodes {
				inode := e.inode
				path := e.path
				attr := e.attr
				if attr.Typ != TypeDirectory {
					if attr.Typ == TypeFile {
						listSlices(inode, path)
						nodeBar.Increment()
					}
					continue
				}

				var attrBroken, statBroken bool
				if attr.Full {
					nlink, st := m.countDirNlink(ctx, inode)
					if st == syscall.ENOENT {
						continue
					}
					if st != 0 {
						hasError = true
						logger.Errorf("Count nlink for inode %d: %s", inode, st)
						continue
					}
					if attr.Nlink != nlink {
						logger.Warnf("nlink of %s should be %d, but got %d", path, nlink, attr.Nlink)
						attrBroken = true
					}
				} else {
					logger.Warnf("attribute of %s is missing", path)
					attrBroken = true
				}

				if attrBroken {
					if opt.Repair {
						if !attr.Full {
							now := time.Now().Unix()
							attr.Mode = opt.RepairDirMode
							attr.Uid = ctx.Uid()
							attr.Gid = ctx.Gid()
							attr.Atime = now
							attr.Mtime = now
							attr.Ctime = now
							attr.Length = 4 << 10
						}
						if st1 := m.en.doRepair(ctx, inode, attr); st1 == 0 || st1 == syscall.ENOENT {
							logger.Debugf("Path %s (inode %d) is successfully repaired", path, inode)
						} else {
							hasError = true
							logger.Errorf("Repair path %s inode %d: %s", path, inode, st1)
						}
					} else {
						logger.Warnf("Path %s (inode %d) can be repaired, please re-run with '--path %s --repair' to fix it", path, inode, path)
						hasError = true
					}
				}

				if format.DirStats {
					stat, st := m.en.doGetDirStat(ctx, inode, false)
					if st == syscall.ENOENT {
						continue
					}
					if st != 0 {
						hasError = true
						logger.Errorf("get dir stat for inode %d: %v", inode, st)
						continue
					}
					if stat == nil || stat.space < 0 || stat.inodes < 0 {
						logger.Warnf("usage stat of %s is missing or broken", path)
						statBroken = true
					}

					if !opt.Repair && opt.SyncDirStat {
						s, st := m.calcDirStat(ctx, inode)
						if st != 0 {
							hasError = true
							logger.Errorf("calc dir stat for inode %d: %v", inode, st)
							continue
						}
						if stat.space != s.space || stat.inodes != s.inodes {
							logger.Warnf("usage stat of %s should be %v, but got %v", path, s, stat)
							statBroken = true
						}
					}

					if opt.Repair {
						if statBroken || opt.SyncDirStat {
							if _, st := m.en.doSyncDirStat(ctx, inode); st == 0 || st == syscall.ENOENT {
								logger.Debugf("Stat of path %s (inode %d) is successfully synced", path, inode)
							} else {
								hasError = true
								logger.Errorf("Sync stat of path %s inode %d: %s", path, inode, st)
							}
						}
					} else if statBroken {
						logger.Warnf("Stat of path %s (inode %d) should be synced, please re-run with '--path %s --repair --sync-dir-stat' to fix it", path, inode, path)
						hasError = true
					}
				}
				nodeBar.Increment()
			}
		}()
	}
	wg.Wait()
	if fpath == "/" && opt.Repair && opt.Recursive && opt.SyncDirStat {
		if err := m.syncVolumeStat(ctx); err != nil {
			logger.Errorf("Sync used space: %s", err)
			hasError = true
		}
	}
	if hasError {
		return errors.New("some errors occurred, please check the log of fsck")
	}

	if progress.Quiet {
		logger.Infof("Checked %d nodes", nodeBar.Current())
	}

	return nil
}

func (m *baseMeta) Chroot(ctx Context, subdir string) syscall.Errno {
	for subdir != "" {
		ps := strings.SplitN(subdir, "/", 2)
		if ps[0] != "" {
			var attr Attr
			var inode Ino
			r := m.Lookup(ctx, m.root, ps[0], &inode, &attr, true)
			if r == syscall.ENOENT {
				r = m.Mkdir(ctx, m.root, ps[0], 0777, 0, 0, &inode, &attr)
			}
			if r != 0 {
				return r
			}
			if attr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
			m.chroot(inode)
		}
		if len(ps) == 1 {
			break
		}
		subdir = ps[1]
	}
	return 0
}

func (m *baseMeta) chroot(inode Ino) {
	m.root = inode
}

func (m *baseMeta) resolve(ctx Context, dpath string, inode *Ino, create bool) syscall.Errno {
	var attr Attr
	*inode = RootInode
	umask := utils.GetUmask()
	for dpath != "" {
		ps := strings.SplitN(dpath, "/", 2)
		if ps[0] != "" {
			r := m.en.doLookup(ctx, *inode, ps[0], inode, &attr)
			if errors.Is(r, syscall.ENOENT) && create {
				r = m.Mkdir(ctx, *inode, ps[0], 0777, uint16(umask), 0, inode, &attr)
			}
			if r != 0 {
				return r
			}
			if attr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
		}
		if len(ps) == 1 {
			break
		}
		dpath = ps[1]
	}
	return 0
}

func (m *baseMeta) getFormat() *Format {
	m.Lock()
	defer m.Unlock()
	return m.fmt
}

func (m *baseMeta) GetFormat() Format {
	return *m.getFormat()
}

func (m *baseMeta) CompactAll(ctx Context, threads int, bar *utils.Bar) syscall.Errno {
	var wg sync.WaitGroup
	ch := make(chan cchunk, 1000000)
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			for c := range ch {
				logger.Debugf("Compacting chunk %d:%d (%d slices)", c.inode, c.indx, c.slices)
				m.compactChunk(c.inode, c.indx, false, true)
				bar.Increment()
			}
			wg.Done()
		}()
	}

	err := m.en.scanAllChunks(ctx, ch, bar)
	close(ch)
	wg.Wait()
	if err != nil {
		logger.Warnf("Scan chunks: %s", err)
		return errno(err)
	}
	return 0
}

func (m *baseMeta) compactChunk(inode Ino, indx uint32, once, force bool) {
	// avoid too many or duplicated compaction
	k := uint64(inode) + (uint64(indx) << 40)
	m.Lock()
	if m.sessCtx != nil && m.sessCtx.Canceled() {
		m.Unlock()
		return
	}
	if once || force {
		for m.compacting[k] {
			m.Unlock()
			time.Sleep(time.Millisecond * 10)
			m.Lock()
		}
	} else if len(m.compacting) > 10 || m.compacting[k] {
		m.Unlock()
		return
	}
	m.compacting[k] = true
	m.Unlock()
	defer func() {
		m.Lock()
		delete(m.compacting, k)
		m.Unlock()
	}()

	ss, st := m.en.doRead(Background(), inode, indx)
	if st != 0 {
		return
	}
	if ss == nil {
		logger.Errorf("Corrupt value for inode %d chunk indx %d", inode, indx)
		return
	}
	if once && len(ss) < maxSlices {
		return
	}
	if len(ss) > maxCompactSlices {
		ss = ss[:maxCompactSlices]
	}
	skipped := skipSome(ss)
	compacted := ss[skipped:]
	pos, size, slices := compactChunk(compacted)
	if len(compacted) < 2 || size == 0 {
		return
	}
	for _, s := range ss[:skipped] {
		if pos+size > s.pos && s.pos+s.len > pos {
			var sstring string
			for _, s := range ss {
				sstring += fmt.Sprintf("\n%+v", *s)
			}
			panic(fmt.Sprintf("invalid compaction skipped %d, pos %d, size %d; slices: %s", skipped, pos, size, sstring))
		}
	}

	var id uint64
	if st = m.NewSlice(Background(), &id); st != 0 {
		return
	}
	logger.Debugf("compact %d:%d: skipped %d slices (%d bytes) %d slices (%d bytes)", inode, indx, skipped, pos, len(compacted), size)
	err := m.newMsg(CompactChunk, slices, id)
	if err != nil {
		if !strings.Contains(err.Error(), "not exist") && !strings.Contains(err.Error(), "not found") {
			logger.Warnf("compact %d %d with %d slices: %s", inode, indx, len(compacted), err)
		}
		return
	}

	var dsbuf []byte
	trash := m.toTrash(0)
	if trash {
		dsbuf = make([]byte, 0, len(compacted)*12)
		for _, s := range compacted {
			if s.id > 0 {
				dsbuf = append(dsbuf, m.encodeDelayedSlice(s.id, s.size)...)
			}
		}
	}
	origin := make([]byte, 0, len(ss)*sliceBytes)
	for _, s := range ss {
		origin = append(origin, marshalSlice(s.pos, s.id, s.size, s.off, s.len)...)
	}
	st = m.en.doCompactChunk(inode, indx, origin, compacted, skipped, pos, id, size, dsbuf)
	if st == syscall.EINVAL {
		logger.Infof("compaction for %d:%d is wasted, delete slice %d (%d bytes)", inode, indx, id, size)
		m.deleteSlice(id, size)
	} else if st == 0 {
		m.of.InvalidateChunk(inode, indx)
	} else {
		logger.Warnf("compact %d %d: %s", inode, indx, err)
	}

	if force {
		m.Lock()
		delete(m.compacting, k)
		m.Unlock()
		m.compactChunk(inode, indx, once, force)
	}
}

func (m *baseMeta) Compact(ctx Context, inode Ino, concurrency int, preFunc, postFunc func()) syscall.Errno {
	var attr Attr
	if st := m.GetAttr(ctx, inode, &attr); st != 0 {
		logger.Errorf("get attr error [inode %v]: %v", inode, st)
		return st
	}

	var wg sync.WaitGroup
	// compact
	chunkChan := make(chan cchunk, 10000)
	for i := 0; i < concurrency; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for c := range chunkChan {
				m.compactChunk(c.inode, c.indx, false, true)
				postFunc()
				if ctx.Canceled() {
					return
				}
			}
		}()
	}

	// scan
	st := m.walk(ctx, inode, "", &attr, func(ctx Context, fIno Ino, path string, fAttr *Attr) {
		if fAttr.Typ != TypeFile {
			return
		}
		// calc chunk index in local
		chunkCnt := uint32((fAttr.Length + ChunkSize - 1) / ChunkSize)
		for i := uint32(0); i < chunkCnt; i++ {
			select {
			case <-ctx.Done():
				return
			case chunkChan <- cchunk{inode: fIno, indx: i}:
				preFunc()
			}
		}
	})

	// finish
	close(chunkChan)
	wg.Wait()

	if st != 0 {
		logger.Errorf("walk error [inode %v]: %v", inode, st)
	}
	return st
}

func (m *baseMeta) fileDeleted(opened, force bool, inode Ino, length uint64) {
	if opened {
		m.Lock()
		m.removedFiles[inode] = true
		m.Unlock()
	} else {
		m.tryDeleteFileData(inode, length, force)
	}
}

func (m *baseMeta) tryDeleteFileData(inode Ino, length uint64, force bool) {
	if force {
		m.maxDeleting <- struct{}{}
	} else {
		select {
		case m.maxDeleting <- struct{}{}:
		default:
			return // will be cleanup later
		}
	}
	go func() {
		m.en.doDeleteFileData(inode, length)
		<-m.maxDeleting
	}()
}

func (m *baseMeta) deleteSlice_(id uint64, size uint32) {
	if err := m.newMsg(DeleteSlice, id, size); err != nil {
		logger.Warnf("Delete data blocks of slice %d (%d bytes): %s", id, size, err)
		return
	}
	if err := m.en.doDeleteSlice(id, size); err != nil {
		logger.Errorf("Delete meta entry of slice %d (%d bytes): %s", id, size, err)
	}
}

func (m *baseMeta) deleteSlice(id uint64, size uint32) {
	if id == 0 || m.conf.MaxDeletes == 0 {
		return
	}
	m.dSliceMu.Lock()
	if m.dslices == nil {
		m.dSliceMu.Unlock()
		m.deleteSlice_(id, size)
		return
	}
	select {
	case <-m.sessCtx.Done():
	case m.dslices <- Slice{Id: id, Size: size}:
	}
	m.dSliceMu.Unlock()
}

func (m *baseMeta) toTrash(parent Ino) bool {
	if parent.IsTrash() {
		return false
	}
	return m.getFormat().TrashDays > 0
}

func (m *baseMeta) checkTrash(parent Ino, trash *Ino) syscall.Errno {
	if !m.toTrash(parent) {
		return 0
	}
	name := time.Now().UTC().Format("2006-01-02-15")
	m.Lock()
	defer m.Unlock()
	if name == m.subTrash.name {
		*trash = m.subTrash.inode
		return 0
	}
	m.Unlock()

	st := m.en.doLookup(Background(), TrashInode, name, trash, nil)
	if st == syscall.ENOENT {
		attr := Attr{Typ: TypeDirectory, Nlink: 2, Length: 4 << 10, Parent: TrashInode, Full: true}
		st = m.en.doMknod(Background(), TrashInode, name, TypeDirectory, 0555, 0, "", trash, &attr)
		m.en.updateStats(align4K(0), 1)
	}

	m.Lock()
	if st != 0 && st != syscall.EEXIST {
		logger.Warnf("create subTrash %s: %s", name, st)
	} else if *trash <= TrashInode {
		logger.Warnf("invalid trash inode: %d", *trash)
		st = syscall.EBADF
	} else {
		m.subTrash.inode = *trash
		m.subTrash.name = name
		st = 0
	}
	return st
}

func (m *baseMeta) trashEntry(parent, inode Ino, name string) string {
	s := fmt.Sprintf("%d-%d-%s", parent, inode, name)
	if len(s) > MaxName {
		s = s[:MaxName]
		logger.Warnf("File name is too long as a trash entry, truncating it: %s -> %s", name, s)
	}
	return s
}

func (m *baseMeta) cleanupTrash(ctx Context) {
	defer m.sessWG.Done()
	for {
		select {
		case <-ctx.Done():
			return
		case <-time.After(utils.JitterIt(time.Hour)):
		}
		if st := m.en.doGetAttr(ctx, TrashInode, nil); st != 0 {
			if st != syscall.ENOENT {
				logger.Warnf("getattr inode %d: %s", TrashInode, st)
			}
			continue
		}
		if ok, err := m.en.setIfSmall("lastCleanupTrash", time.Now().Unix(), int64(time.Hour.Seconds())*9/10); err != nil {
			logger.Warnf("checking counter lastCleanupTrash: %s", err)
		} else if ok {
			func() {
				cCtx := WrapWithTimeout(ctx, 50*time.Minute)
				defer cCtx.Cancel()
				jobStart := time.Now()
				days := m.getFormat().TrashDays
				var wg sync.WaitGroup
				wg.Add(2)
				defer wg.Wait()
				go func() {
					defer wg.Done()
					stats := &CleanupTrashStats{}
					status := bgJobSucc
					if st := m.doCleanupTrash(cCtx, days, false, stats); st != 0 {
						if st == syscall.ETIMEDOUT {
							status = bgJobCanceled
						} else {
							status = bgJobFail
						}
					}
					m.bgjobDuration.WithLabelValues("cleanTrashFile", status).Observe(time.Since(jobStart).Seconds())
					m.bgjobDels.WithLabelValues("cleanTrashFile").Add(float64(atomic.LoadInt64(&stats.DeletedFiles)))
				}()
				go func() {
					defer wg.Done()
					status := bgJobSucc
					var cnt uint64
					if err := m.cleanupDelayedSlices(cCtx, days, &cnt); err != nil {
						if errors.Is(err, context.DeadlineExceeded) {
							status = bgJobCanceled
						} else {
							status = bgJobFail
						}
					}
					m.bgjobDuration.WithLabelValues("cleanDelayedSlice", status).Observe(time.Since(jobStart).Seconds())
					m.bgjobDels.WithLabelValues("cleanDelayedSlice").Add(float64(cnt))
				}()
			}()
		}
	}
}

func (m *baseMeta) CleanupDetachedNodesBefore(ctx Context, edge time.Time, increProgress func()) {
	for _, inode := range m.en.doFindDetachedNodes(edge) {
		if eno := m.en.doCleanupDetachedNode(Background(), inode); eno != 0 {
			logger.Errorf("cleanupDetachedNode: remove detached tree (%d) error: %s", inode, eno)
		} else {
			if increProgress != nil {
				increProgress()
			}
		}
	}
}

func (m *baseMeta) CleanupTrashBefore(ctx Context, edge time.Time, increProgress func(int), stats *CleanupTrashStats) syscall.Errno {
	logger.Debugf("cleanup trash: started")
	now := time.Now()
	var st syscall.Errno
	var entries []*Entry
	if st = m.en.doReaddir(ctx, TrashInode, 0, &entries, -1); st != 0 {
		logger.Warnf("readdir trash %d: %s", TrashInode, st)
		return st
	}
	sort.Slice(entries, func(i, j int) bool { return entries[i].Inode < entries[j].Inode })
	var count uint64
	done := make(chan struct{})
	defer func() {
		close(done)
		if count > 0 {
			logger.Infof("cleanup trash: deleted %d files in %v", count, time.Since(now))
			if stats != nil {
				atomic.StoreInt64(&stats.DeletedFiles, int64(count))
			}
		} else {
			logger.Debugf("cleanup trash: nothing to delete")
		}
	}()

	if increProgress != nil {
		go func() {
			var last uint64
			ticker := time.NewTicker(time.Second)
			defer ticker.Stop()
			for {
				select {
				case <-done:
					return
				case <-ticker.C:
					curr := atomic.LoadUint64(&count)
					if curr != last {
						increProgress(int(curr - last))
						last = curr
					}
				}
			}
		}()
	}

	concurrent := make(chan int, 1) // no effect for flatterned trash dirs
	for len(entries) > 0 {
		if ctx.Canceled() {
			return errno(ctx.Err())
		}
		e := entries[0]
		ts, err := time.Parse("2006-01-02-15", string(e.Name))
		if err != nil {
			logger.Warnf("bad entry as a subTrash: %s", e.Name)
			entries = entries[1:]
			continue
		}
		if !ts.Before(edge) {
			break
		}
		if st = m.emptyDir(ctx, e.Inode, true, &count, concurrent); st != 0 {
			if st != syscall.ETIMEDOUT && st != syscall.EINTR {
				logger.Warnf("empty subTrash %d/%s: %s", e.Inode, e.Name, st)
			}
		} else {
			entries = entries[1:]
			if st = m.en.doRmdir(ctx, TrashInode, string(e.Name), nil, nil); st != 0 {
				logger.Warnf("rmdir subTrash %s: %s", e.Name, st)
			}
		}
	}
	return 0
}

func (m *baseMeta) scanTrashEntry(ctx Context, scan func(inode Ino, size uint64)) error {
	var st syscall.Errno
	var entries []*Entry
	if st = m.en.doReaddir(ctx, TrashInode, 1, &entries, -1); st != 0 {
		return errors.Wrap(st, "read trash")
	}

	var subEntries []*Entry
	for _, entry := range entries {
		scan(entry.Inode, entry.Attr.Length)
		subEntries = subEntries[:0]
		if st = m.en.doReaddir(ctx, entry.Inode, 1, &subEntries, -1); st != 0 {
			logger.Warnf("readdir subEntry %d: %s", entry.Inode, st)
			continue
		}
		for _, se := range subEntries {
			scan(se.Inode, se.Attr.Length)
		}
	}
	return nil
}

func (m *baseMeta) scanTrashFiles(ctx Context, scan trashFileScan) error {
	var st syscall.Errno
	var entries []*Entry
	if st = m.en.doReaddir(ctx, TrashInode, 1, &entries, -1); st != 0 {
		return errors.Wrap(st, "read trash")
	}

	var subEntries []*Entry
	for _, entry := range entries {
		ts, err := time.Parse("2006-01-02-15", string(entry.Name))
		if err != nil {
			logger.Warnf("bad entry as a subTrash: %s", entry.Name)
			continue
		}
		subEntries = subEntries[:0]
		if st = m.en.doReaddir(ctx, entry.Inode, 1, &subEntries, -1); st != 0 {
			logger.Warnf("readdir subEntry %d: %s", entry.Inode, st)
			continue
		}
		for _, se := range subEntries {
			if se.Attr.Typ == TypeFile {
				clean, err := scan(se.Inode, se.Attr.Length, ts)
				if err != nil {
					return errors.Wrap(err, "scan trash files")
				}
				if clean {
					// TODO: m.en.doUnlink(ctx, entry.Attr.Parent, string(entry.Name))
					// avoid lint warning
					_ = clean
				}
			}
		}
	}
	return nil
}

func (m *baseMeta) doCleanupTrash(ctx Context, days int, force bool, stats *CleanupTrashStats) syscall.Errno {
	edge := time.Now().Add(-time.Duration(24*days+2) * time.Hour)
	if force {
		edge = time.Now()
	}
	return m.CleanupTrashBefore(ctx, edge, nil, stats)
}

func (m *baseMeta) cleanupDelayedSlices(ctx Context, days int, count *uint64) error {
	now := time.Now()
	edge := now.Unix() - int64(days)*24*3600
	logger.Debugf("Cleanup delayed slices: started with edge %d", edge)
	var err error
	var cnt int
	if cnt, err = m.en.doCleanupDelayedSlices(ctx, edge); err != nil && !errors.Is(err, context.DeadlineExceeded) {
		logger.Warnf("Cleanup delayed slices: deleted %d slices in %v, but got error: %s", count, time.Since(now), err)
		return err
	} else if cnt > 0 {
		logger.Infof("Cleanup delayed slices: deleted %d slices in %v", cnt, time.Since(now))
		if count != nil {
			atomic.AddUint64(count, uint64(cnt))
		}
	}
	return err
}

func (m *baseMeta) ScanDeletedObject(ctx Context, tss trashSliceScan, pss pendingSliceScan, tfs trashFileScan, pfs pendingFileScan) error {
	eg := errgroup.Group{}
	if tss != nil {
		eg.Go(func() error {
			return m.en.scanTrashSlices(ctx, tss)
		})
	}
	if pss != nil {
		eg.Go(func() error {
			return m.en.scanPendingSlices(ctx, pss)
		})
	}
	if tfs != nil {
		eg.Go(func() error {
			return m.scanTrashFiles(ctx, tfs)
		})
	}
	if pfs != nil {
		eg.Go(func() error {
			concurrency := m.conf.MaxDeletes
			cleanChan := make(chan struct {
				ino  Ino
				size uint64
			}, concurrency)
			var wg sync.WaitGroup

			for i := 0; i < concurrency; i++ {
				wg.Add(1)
				go func() {
					defer wg.Done()
					for p := range cleanChan {
						m.en.doDeleteFileData(p.ino, p.size)
					}
				}()
			}

			cpfs := func(ino Ino, size uint64, ts int64) (bool, error) {
				clean, err := pfs(ino, size, ts)
				if err != nil {
					return false, err
				}
				if clean {
					cleanChan <- struct {
						ino  Ino
						size uint64
					}{ino, size}
				}
				return clean, nil
			}

			err := m.en.scanPendingFiles(ctx, cpfs)
			close(cleanChan)
			wg.Wait()
			return err
		})
	}
	return eg.Wait()
}

func (m *baseMeta) Clone(ctx Context, srcParentIno, srcIno, parent Ino, name string, cmode uint8, cumask uint16, concurrency uint8, count, total *uint64) syscall.Errno {

	if srcIno.IsTrash() || srcParentIno.IsTrash() || parent.IsTrash() || (parent == RootInode && name == TrashName) {
		return syscall.EPERM
	}

	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	if name == "" {
		return syscall.ENOENT
	}

	defer m.timeit("Clone", time.Now())
	parent = m.checkRoot(parent)

	var attr Attr
	var eno syscall.Errno
	if eno = m.en.doGetAttr(ctx, srcIno, &attr); eno != 0 {
		return eno
	}
	if eno = m.Access(ctx, srcIno, MODE_MASK_R, &attr); eno != 0 {
		return eno
	}
	if eno = m.Access(ctx, parent, MODE_MASK_X|MODE_MASK_W, nil); eno != 0 {
		return eno
	}
	var dstIno Ino
	var _a Attr
	if eno = m.en.doLookup(ctx, parent, name, &dstIno, &_a); eno == 0 {
		return syscall.EEXIST
	} else if eno != syscall.ENOENT {
		return eno
	}
	var sum Summary
	eno = m.GetSummary(ctx, srcIno, &sum, true, false)
	if eno != 0 {
		return eno
	}
	if err := m.checkQuota(ctx, int64(sum.Size), int64(sum.Dirs)+int64(sum.Files), ctx.Uid(), ctx.Gid(), parent); err != 0 {
		return err
	}
	*total = sum.Dirs + sum.Files
	if concurrency < 1 {
		concurrency = 1
	}
	concurrent := make(chan struct{}, concurrency)
	if attr.Typ == TypeDirectory {
		eno = m.cloneEntry(ctx, srcIno, parent, name, &dstIno, cmode, cumask, count, true, concurrent)
		if eno == 0 {
			eno = m.en.doAttachDirNode(ctx, parent, dstIno, name)
		}
		if eno != 0 && dstIno != 0 {
			if eno := m.en.doCleanupDetachedNode(ctx, dstIno); eno != 0 {
				logger.Errorf("remove detached tree (%d): %s", dstIno, eno)
			}
		}
	} else {
		eno = m.cloneEntry(ctx, srcIno, parent, name, nil, cmode, cumask, count, true, concurrent)
	}
	if eno == 0 {
		m.updateDirStat(ctx, parent, int64(attr.Length), align4K(attr.Length), 1)
		m.updateDirQuota(ctx, parent, int64(sum.Size), int64(sum.Dirs)+int64(sum.Files))
	}
	return eno
}

func (m *baseMeta) cloneEntry(ctx Context, srcIno Ino, parent Ino, name string, dstIno *Ino, cmode uint8, cumask uint16, count *uint64, top bool, concurrent chan struct{}) syscall.Errno {
	ino, err := m.nextInode()
	if err != nil {
		return errno(err)
	}
	if dstIno != nil {
		*dstIno = ino
	}
	var attr Attr
	eno := m.en.doCloneEntry(ctx, srcIno, parent, name, ino, &attr, cmode, cumask, top)
	if eno != 0 {
		return eno
	}
	m.en.updateStats(align4K(attr.Length), 1)
	atomic.AddUint64(count, 1)
	m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, align4K(attr.Length), 1)
	if attr.Typ != TypeDirectory {
		return 0
	}
	if eno = m.Access(ctx, srcIno, MODE_MASK_R|MODE_MASK_X, &attr); eno != 0 {
		return eno
	}
	// Use DirHandler for batch processing to avoid loading all entries at once
	handler, eno := m.NewDirHandler(ctx, srcIno, true, nil)
	if eno == syscall.ENOENT {
		eno = 0 // empty dir
	}
	if eno != 0 {
		return eno
	}
	defer handler.Close()

	cloneCtx := WrapWithCancel(ctx, ctx.Pid(), ctx.Uid(), ctx.Gids())
	defer cloneCtx.Cancel()

	var g errgroup.Group
	var skipped uint32

	cloneChild := func(e *Entry) syscall.Errno {
		childEno := m.cloneEntry(cloneCtx, e.Inode, ino, string(e.Name), nil, cmode, cumask, count, false, concurrent)
		if childEno == syscall.ENOENT {
			logger.Warnf("ignore deleted %s in dir %d", string(e.Name), srcIno)
			if e.Attr.Typ == TypeDirectory {
				atomic.AddUint32(&skipped, 1)
			}
			return 0
		}
		if childEno != 0 {
			cloneCtx.Cancel()
		}
		return childEno
	}

	offset := 0
	for {
		batchEntries, batchEno := handler.List(cloneCtx, offset)
		if batchEno != 0 {
			eno = batchEno
			break
		}
		if len(batchEntries) == 0 {
			break
		}

		var nonDirEntries []*Entry
		for _, e := range batchEntries {
			if string(e.Name) == "." || string(e.Name) == ".." {
				continue
			}

			if e.Attr.Typ == TypeDirectory {
				select {
				case concurrent <- struct{}{}:
					entry := e
					g.Go(func() error {
						defer func() { <-concurrent }()
						if childEno := cloneChild(entry); childEno != 0 {
							return childEno
						}
						return nil
					})
				default:
					// Synchronous fallback when concurrency limit reached
					if childEno := cloneChild(e); childEno != 0 && eno == 0 {
						eno = childEno
					}
				}
			} else {
				nonDirEntries = append(nonDirEntries, e)
			}

			if cloneCtx.Canceled() {
				break
			}
		}

		if eno != 0 || cloneCtx.Canceled() {
			break
		}

		// Batch clone files immediately (don't wait for subdirs to finish)
		if len(nonDirEntries) > 0 {
			batchEno := m.BatchClone(cloneCtx, srcIno, ino, nonDirEntries, cmode, cumask, count)
			if batchEno == syscall.ENOTSUP {
				// Fallback: clone each file concurrently
				for _, e := range nonDirEntries {
					select {
					case concurrent <- struct{}{}:
						entry := e
						g.Go(func() error {
							defer func() { <-concurrent }()
							if childEno := cloneChild(entry); childEno != 0 {
								return childEno
							}
							return nil
						})
					default:
						// Synchronous fallback when concurrency limit reached
						if childEno := cloneChild(e); childEno != 0 && eno == 0 {
							eno = childEno
						}
					}

					if cloneCtx.Canceled() {
						break
					}
				}
				if eno == syscall.ENOTSUP {
					eno = 0
				}
			} else if batchEno != 0 {
				eno = batchEno
				break
			}
		}

		offset += len(batchEntries)
		if cloneCtx.Canceled() {
			break
		}
	}

	// Wait for all goroutines; preserve the first non-cancel error when possible.
	if err := g.Wait(); eno == 0 && err != nil {
		eno = errno(err)
	}
	if eno == 0 && cloneCtx.Canceled() {
		eno = syscall.EINTR
	}

	if eno == 0 && skipped > 0 {
		attr.Nlink -= skipped
		if eno := m.en.doRepair(ctx, ino, &attr); eno != 0 {
			logger.Warnf("fix nlink of %d: %s", ino, eno)
		}
	}
	return eno
}

func (m *baseMeta) mergeAttr(ctx Context, inode Ino, set uint16, cur, attr *Attr, now time.Time, rule *aclAPI.Rule) (*Attr, syscall.Errno) {
	dirtyAttr := *cur
	if (set&(SetAttrUID|SetAttrGID)) != 0 && (set&SetAttrMode) != 0 {
		attr.Mode |= (cur.Mode & 06000)
	}
	var changed bool
	if (cur.Mode&06000) != 0 && (set&(SetAttrUID|SetAttrGID)) != 0 {
		clearSUGID(ctx, &dirtyAttr, attr)
		changed = true
	}
	if set&SetAttrGID != 0 {
		if ctx.Uid() != 0 && ctx.Uid() != cur.Uid {
			return nil, syscall.EPERM
		}
		if cur.Gid != attr.Gid {
			if ctx.CheckPermission() && ctx.Uid() != 0 && !containsGid(ctx, attr.Gid) {
				return nil, syscall.EPERM
			}
			dirtyAttr.Gid = attr.Gid
			changed = true
		}
	}
	if set&SetAttrUID != 0 && cur.Uid != attr.Uid {
		if ctx.CheckPermission() && ctx.Uid() != 0 {
			return nil, syscall.EPERM
		}
		dirtyAttr.Uid = attr.Uid
		changed = true
	}
	if set&SetAttrMode != 0 {
		if ctx.Uid() != 0 && (attr.Mode&02000) != 0 {
			if ctx.Gid() != cur.Gid {
				attr.Mode &= 05777
			}
		}

		if rule != nil {
			rule.SetMode(attr.Mode)
			dirtyAttr.Mode = attr.Mode&07000 | rule.GetMode()
			changed = true
		} else if attr.Mode != cur.Mode {
			if ctx.Uid() != 0 && ctx.Uid() != cur.Uid &&
				(cur.Mode&01777 != attr.Mode&01777 || attr.Mode&02000 > cur.Mode&02000 || attr.Mode&04000 > cur.Mode&04000) {
				return nil, syscall.EPERM
			}
			dirtyAttr.Mode = attr.Mode
			changed = true
		}
	}
	if set&SetAttrAtimeNow != 0 || (set&SetAttrAtime) != 0 && attr.Atime < 0 {
		if st := m.Access(ctx, inode, MODE_MASK_W, cur); ctx.Uid() != cur.Uid && st != 0 {
			return nil, syscall.EACCES
		}
		dirtyAttr.Atime = now.Unix()
		dirtyAttr.Atimensec = uint32(now.Nanosecond())
		changed = true
	} else if set&SetAttrAtime != 0 && (cur.Atime != attr.Atime || cur.Atimensec != attr.Atimensec) {
		if cur.Uid == 0 && ctx.Uid() != 0 {
			return nil, syscall.EPERM
		}
		if st := m.Access(ctx, inode, MODE_MASK_W, cur); ctx.Uid() != cur.Uid && st != 0 {
			return nil, syscall.EACCES
		}
		dirtyAttr.Atime = attr.Atime
		dirtyAttr.Atimensec = attr.Atimensec
		changed = true
	}
	if set&SetAttrMtimeNow != 0 || (set&SetAttrMtime) != 0 && attr.Mtime < 0 {
		if st := m.Access(ctx, inode, MODE_MASK_W, cur); ctx.Uid() != cur.Uid && st != 0 {
			return nil, syscall.EACCES
		}
		dirtyAttr.Mtime = now.Unix()
		dirtyAttr.Mtimensec = uint32(now.Nanosecond())
		changed = true
	} else if set&SetAttrMtime != 0 && (cur.Mtime != attr.Mtime || cur.Mtimensec != attr.Mtimensec) {
		if cur.Uid == 0 && ctx.Uid() != 0 {
			return nil, syscall.EPERM
		}
		if st := m.Access(ctx, inode, MODE_MASK_W, cur); ctx.Uid() != cur.Uid && st != 0 {
			return nil, syscall.EACCES
		}
		dirtyAttr.Mtime = attr.Mtime
		dirtyAttr.Mtimensec = attr.Mtimensec
		changed = true
	}
	if set&SetAttrFlag != 0 {
		dirtyAttr.Flags = attr.Flags
		changed = true
	}
	if !changed {
		*attr = *cur
		return nil, 0
	}
	return &dirtyAttr, 0
}

func (m *baseMeta) CheckSetAttr(ctx Context, inode Ino, set uint16, attr Attr) syscall.Errno {
	var cur Attr
	inode = m.checkRoot(inode)
	if st := m.en.doGetAttr(ctx, inode, &cur); st != 0 {
		return st
	}
	_, st := m.mergeAttr(ctx, inode, set, &cur, &attr, time.Now(), nil)
	return st
}

var errACLNotInCache = errors.New("acl not in cache")

func (m *baseMeta) getFaclFromCache(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) error {
	ino = m.checkRoot(ino)
	cAttr := &Attr{}
	if m.conf.OpenCache > 0 && m.of.Check(ino, cAttr) {
		aclId := getAttrACLId(cAttr, aclType)
		if aclId == aclAPI.None {
			return ENOATTR
		}

		if cRule := m.aclCache.Get(aclId); cRule != nil {
			*rule = *cRule
			return nil
		}
	}
	return errACLNotInCache
}

func setAttrACLId(attr *Attr, aclType uint8, id uint32) {
	switch aclType {
	case aclAPI.TypeAccess:
		attr.AccessACL = id
	case aclAPI.TypeDefault:
		attr.DefaultACL = id
	}
}

func getAttrACLId(attr *Attr, aclType uint8) uint32 {
	switch aclType {
	case aclAPI.TypeAccess:
		return attr.AccessACL
	case aclAPI.TypeDefault:
		return attr.DefaultACL
	}
	return aclAPI.None
}

func setXAttrACL(xattrs *[]byte, accessACL, defaultACL uint32) {
	if accessACL != aclAPI.None {
		*xattrs = append(*xattrs, []byte("system.posix_acl_access")...)
		*xattrs = append(*xattrs, 0)
	}
	if defaultACL != aclAPI.None {
		*xattrs = append(*xattrs, []byte("system.posix_acl_default")...)
		*xattrs = append(*xattrs, 0)
	}
}

func (m *baseMeta) saveACL(rule *aclAPI.Rule, aclMaxId *uint32) uint32 {
	if rule == nil {
		return aclAPI.None
	}
	id := m.aclCache.GetId(rule)
	if id == aclAPI.None {
		(*aclMaxId)++
		id = *aclMaxId
		m.aclCache.Put(id, rule)
	}
	return id
}

func (m *baseMeta) SetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno {
	if aclType != aclAPI.TypeAccess && aclType != aclAPI.TypeDefault {
		return syscall.EINVAL
	}

	if !ino.IsNormal() {
		return syscall.EPERM
	}

	now := time.Now()
	defer func() {
		m.timeit("SetFacl", now)
		m.of.InvalidateChunk(ino, invalidateAttrOnly)
	}()

	return m.en.doSetFacl(ctx, ino, aclType, rule)
}

func (m *baseMeta) GetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno {
	var err error
	if err = m.getFaclFromCache(ctx, ino, aclType, rule); err == nil {
		return 0
	}

	if !errors.Is(err, errACLNotInCache) {
		return errno(err)
	}

	now := time.Now()
	defer m.timeit("GetFacl", now)

	return m.en.doGetFacl(ctx, ino, aclType, aclAPI.None, rule)
}

func (m *baseMeta) StoreToken(ctx Context, token []byte) (id uint32, st syscall.Errno) {
	defer m.timeit("StoreToken", time.Now())
	return m.en.doStoreToken(ctx, token)
}

func (m *baseMeta) UpdateToken(ctx Context, id uint32, token []byte) syscall.Errno {
	defer m.timeit("UpdateToken", time.Now())
	return m.en.doUpdateToken(ctx, id, token)
}

func (m *baseMeta) LoadToken(ctx Context, id uint32) (token []byte, st syscall.Errno) {
	defer m.timeit("LoadToken", time.Now())
	return m.en.doLoadToken(ctx, id)
}

func (m *baseMeta) DeleteTokens(ctx Context, ids []uint32) syscall.Errno {
	defer m.timeit("DeleteTokens", time.Now())
	return m.en.doDeleteTokens(ctx, ids)
}

func (m *baseMeta) ListTokens(ctx Context) (tokens map[uint32][]byte, st syscall.Errno) {
	defer m.timeit("ListTokens", time.Now())
	return m.en.doListTokens(ctx)
}

func inGroup(ctx Context, gid uint32) bool {
	for _, egid := range ctx.Gids() {
		if egid == gid {
			return true
		}
	}
	return false
}

type DirHandler interface {
	List(ctx Context, offset int) ([]*Entry, syscall.Errno)
	Insert(inode Ino, name string, attr *Attr)
	Delete(name string)
	Read(offset int)
	Close()
}

func (m *baseMeta) NewDirHandler(ctx Context, inode Ino, plus bool, initEntries []*Entry) (DirHandler, syscall.Errno) {
	var attr Attr
	var st syscall.Errno
	defer func() {
		if st == 0 {
			m.touchAtime(ctx, inode, &attr)
		}
	}()

	inode = m.checkRoot(inode)
	if st = m.GetAttr(ctx, inode, &attr); st != 0 {
		return nil, st
	}
	defer m.timeit("NewDirHandler", time.Now())
	var mmask uint8 = MODE_MASK_R
	if plus {
		mmask |= MODE_MASK_X
	}

	if st = m.Access(ctx, inode, mmask, &attr); st != 0 {
		return nil, st
	}
	if inode == m.root {
		attr.Parent = m.root
	}

	initEntries = append(initEntries, &Entry{
		Inode: inode,
		Name:  []byte("."),
		Attr:  &attr,
	})

	parent := &Entry{
		Inode: attr.Parent,
		Name:  []byte(".."),
		Attr:  &Attr{Typ: TypeDirectory},
	}
	if plus {
		if attr.Parent == inode {
			parent.Attr = &attr
		} else {
			if st := m.GetAttr(ctx, attr.Parent, parent.Attr); st != 0 {
				return nil, st
			}
		}
	}
	initEntries = append(initEntries, parent)

	return m.en.newDirHandler(inode, plus, initEntries), 0
}

type dirBatch struct {
	isEnd   bool
	offset  int
	cursor  interface{}
	entries []*Entry
	indexes map[string]int
}

func (b *dirBatch) contain(offset int) bool {
	if b == nil {
		return false
	}
	return b.offset <= offset && offset < b.offset+len(b.entries) || (len(b.entries) == 0 && b.offset == offset)
}

func (b *dirBatch) predecessor(offset int) bool {
	return b.offset+len(b.entries) == offset
}

type dirFetcher func(ctx Context, inode Ino, cursor interface{}, offset, limit int, plus bool) (interface{}, []*Entry, error)

type dirHandler struct {
	sync.Mutex
	inode       Ino
	plus        bool
	initEntries []*Entry
	batch       *dirBatch
	fetcher     dirFetcher
	readOff     int
	batchNum    int
}

func (h *dirHandler) fetch(ctx Context, offset int) (*dirBatch, error) {
	var cursor interface{}
	if h.batch != nil && h.batch.predecessor(offset) {
		if h.batch.isEnd {
			return h.batch, nil
		}
		cursor = h.batch.cursor
	}
	nextCursor, entries, err := h.fetcher(ctx, h.inode, cursor, offset, h.batchNum, h.plus)
	if err != nil {
		return nil, err
	}
	if entries == nil {
		entries = []*Entry{}
		nextCursor = cursor
	}
	indexes := make(map[string]int, len(entries))
	for i, e := range entries {
		indexes[string(e.Name)] = i
	}
	return &dirBatch{isEnd: len(entries) < h.batchNum, offset: offset, cursor: nextCursor, entries: entries, indexes: indexes}, nil
}

func (h *dirHandler) List(ctx Context, offset int) ([]*Entry, syscall.Errno) {
	var prefix []*Entry
	if offset < len(h.initEntries) {
		prefix = h.initEntries[offset:]
		offset = 0
	} else {
		offset -= len(h.initEntries)
	}

	var err error
	h.Lock()
	defer h.Unlock()
	if !h.batch.contain(offset) {
		h.batch, err = h.fetch(ctx, offset)
	}

	if err != nil {
		return nil, errno(err)
	}

	h.readOff = h.batch.offset + len(h.batch.entries)
	if len(prefix) > 0 {
		return append(prefix, h.batch.entries...), 0
	}
	return h.batch.entries[offset-h.batch.offset:], 0
}

func (h *dirHandler) Delete(name string) {
	h.Lock()
	defer h.Unlock()
	if h.batch == nil || len(h.batch.entries) == 0 {
		return
	}

	if idx, ok := h.batch.indexes[name]; ok && idx+h.batch.offset >= h.readOff {
		delete(h.batch.indexes, name)
		n := len(h.batch.entries)
		if idx < n-1 {
			// TODO: sorted
			h.batch.entries[idx] = h.batch.entries[n-1]
			h.batch.indexes[string(h.batch.entries[idx].Name)] = idx
		}
		h.batch.entries = h.batch.entries[:n-1]
	}
}

func (h *dirHandler) Insert(inode Ino, name string, attr *Attr) {
	h.Lock()
	defer h.Unlock()
	if h.batch == nil {
		return
	}
	if h.batch.isEnd || bytes.Compare([]byte(name), h.batch.cursor.([]byte)) < 0 {
		// TODO: sorted
		h.batch.entries = append(h.batch.entries, &Entry{Inode: inode, Name: []byte(name), Attr: attr})
		h.batch.indexes[name] = len(h.batch.entries) - 1
	}
}

func (h *dirHandler) Read(offset int) {
	h.readOff = offset - len(h.initEntries) // TODO: what if fuse only reads one entry?
}

func (h *dirHandler) Close() {
	h.Lock()
	h.batch = nil
	h.readOff = 0
	h.Unlock()
}

func (m *baseMeta) DumpMetaV2(ctx Context, w io.Writer, opt *DumpOption) error {
	opt = opt.check()

	bak := newBakFormat()
	ch := make(chan *dumpedResult, 100)
	wg := &sync.WaitGroup{}
	wg.Add(1)
	go func() {
		defer wg.Done()
		err := m.en.dump(ctx, opt, ch)
		if err != nil {
			logger.Errorf("dump meta err: %v", err)
			ctx.Cancel()
		} else {
			close(ch)
		}
	}()

	var res *dumpedResult
	for {
		select {
		case <-ctx.Done():
			wg.Wait()
			return ctx.Err()
		case res = <-ch:
		}
		if res == nil {
			break
		}
		seg := newBakSegment(res.msg)
		if err := bak.writeSegment(w, seg); err != nil {
			logger.Errorf("write %d err: %v", seg.typ, err)
			ctx.Cancel()
			wg.Wait()
			return err
		}
		if opt.Progress != nil {
			opt.Progress(seg.Name(), int(seg.num()))
		}
		if res.release != nil {
			res.release(res.msg)
		}
	}

	wg.Wait()
	return bak.writeFooter(w)
}

func (m *baseMeta) LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error {
	if opt == nil {
		opt = &LoadOption{}
	}
	if err := m.en.prepareLoad(ctx, opt); err != nil {
		return err
	}

	type task struct {
		typ int
		msg proto.Message
	}

	var wg sync.WaitGroup
	taskCh := make(chan *task, 100)

	workerFunc := func(ctx Context, taskCh <-chan *task) {
		defer wg.Done()
		var task *task
		for {
			select {
			case <-ctx.Done():
				return
			case task = <-taskCh:
			}
			if task == nil {
				break
			}
			err := m.en.load(ctx, task.typ, opt, task.msg)
			if err != nil {
				logger.Errorf("failed to insert %d: %s", task.typ, err)
				ctx.Cancel()
				return
			}
		}
	}

	for i := 0; i < opt.Threads; i++ {
		wg.Add(1)
		go workerFunc(ctx, taskCh)
	}

	bak := &BakFormat{}
	for {
		seg, err := bak.ReadSegment(r)
		if err != nil {
			if errors.Is(err, errBakEOF) {
				close(taskCh)
				break
			}
			ctx.Cancel()
			wg.Wait()
			return err
		}

		select {
		case <-ctx.Done():
			wg.Wait()
			return ctx.Err()
		case taskCh <- &task{int(seg.typ), seg.val}:
			if opt.Progress != nil {
				opt.Progress(seg.Name(), int(seg.num()))
			}
		}
	}
	wg.Wait()
	return nil
}


================================================
FILE: pkg/meta/base_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//
//mutate:disable
//nolint:errcheck
package meta

import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"math/rand"
	"os"
	"reflect"
	"runtime"
	"sort"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"testing"
	"time"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/redis/go-redis/v9"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
	"xorm.io/xorm"
)

func testConfig() *Config {
	conf := DefaultConf()
	conf.DirStatFlushPeriod = 100 * time.Millisecond
	return conf
}

func testFormat() *Format {
	return &Format{Name: "test", DirStats: true}
}

func TestRedisClient(t *testing.T) {
	m, err := newRedisMeta("redis", "127.0.0.1:6379/10", testConfig())
	if err != nil || m.Name() != "redis" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestKeyDB(t *testing.T) { // skip mutate
	if os.Getenv("SKIP_NON_CORE") == "true" {
		t.Skipf("skip non-core test")
	}
	// 127.0.0.1:6378 enable flash, 127.0.0.1:6377 disable flash
	for _, addr := range []string{"127.0.0.1:6378/10", "127.0.0.1:6377/10"} {
		m, err := newRedisMeta("redis", addr, testConfig())
		if err != nil || m.Name() != "redis" {
			t.Fatalf("create meta: %s", err)
		}
		if r, ok := m.(*redisMeta); ok {
			rawInfo, err := r.rdb.Info(Background()).Result()
			if err != nil {
				t.Fatalf("parse info: %s", err)
			}
			var storageProvider, maxMemoryPolicy string
			for _, l := range strings.Split(strings.TrimSpace(rawInfo), "\n") {
				l = strings.TrimSpace(l)
				if l == "" || strings.HasPrefix(l, "#") {
					continue
				}
				kvPair := strings.SplitN(l, ":", 2)
				if len(kvPair) < 2 {
					continue
				}
				key, val := kvPair[0], kvPair[1]
				switch key {
				case "maxmemory_policy":
					maxMemoryPolicy = val
				case "storage_provider":
					storageProvider = val
				}
			}
			if storageProvider == "none" && maxMemoryPolicy != "noeviction" {
				t.Fatalf("maxmemory_policy should be noeviction")
			}
			if storageProvider == "flash" && maxMemoryPolicy == "noeviction" {
				t.Fatalf("maxmemory_policy should not be noeviction")
			}
		} else {
			t.Fatalf("should be redisMeta")
		}
	}
}

func TestRedisCluster(t *testing.T) { // skip mutate
	if os.Getenv("SKIP_NON_CORE") == "true" {
		t.Skipf("skip non-core test")
	}
	m, err := newRedisMeta("redis", "127.0.0.1:7001,127.0.0.1:7002,127.0.0.1:7003/2", testConfig())
	if err != nil {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func testMeta(t *testing.T, m Meta) {
	if err := m.Reset(); err != nil {
		t.Fatalf("reset meta: %s", err)
	}

	testMetaClient(t, m)
	testTruncateAndDelete(t, m)
	testTrash(t, m)
	testParents(t, m)
	testRemove(t, m)
	testResolve(t, m)
	testStickyBit(t, m)
	testLocks(t, m)
	testListLocks(t, m)
	testConcurrentWrite(t, m)
	testCompaction(t, m, false)
	time.Sleep(time.Second)
	testCompaction(t, m, true)
	testCopyFileRange(t, m)
	testCloseSession(t, m)
	testConcurrentDir(t, m)
	testAttrFlags(t, m)
	testQuota(t, m)
	testUserGroupQuota(t, m)
	testAtime(t, m)
	testAccess(t, m)
	base := m.getBase()
	base.conf.OpenCache = time.Second
	base.of.expire = time.Second
	testOpenCache(t, m)
	base.conf.CaseInsensi = true
	testCaseIncensi(t, m)
	testCaseIncensiRename(t, m)
	testCaseIncensiHardlinkRename(t, m)
	testCheckAndRepair(t, m)
	testDirStat(t, m)
	testClone(t, m)
	testBatchClone(t, m)
	testACL(t, m)
	testKerberosToken(t, m)
	base.conf.ReadOnly = true
	testReadOnly(t, m)
}

func testAccess(t *testing.T, m Meta) {
	if err := m.Init(testFormat(), false); err != nil {
		t.Fatalf("init error: %s", err)
	}

	defer m.getBase().aclCache.Clear()

	var testNode Ino = 2
	ctx := NewContext(1, 1, []uint32{2})
	attr := &Attr{
		Mode:       0541,
		Uid:        0,
		Gid:        0,
		AccessACL:  1,
		DefaultACL: 0,
		Full:       true,
	}

	r1 := &aclAPI.Rule{
		Owner: 5,
		Group: 4,
		Mask:  2,
		Other: 1,
		NamedUsers: aclAPI.Entries{
			{
				Id:   1,
				Perm: 6,
			},
		},
		NamedGroups: aclAPI.Entries{
			{
				Id:   2,
				Perm: 6,
			},
		},
	}
	m.getBase().aclCache.Put(1, r1)

	// case: match owner, skip named entries
	st := m.Access(ctx, testNode, MODE_MASK_R|MODE_MASK_W, attr)
	assert.Equal(t, syscall.EACCES, st)

	// case: match named grouped entry, but group perm & mask failed
	ctx = NewContext(1, 2, []uint32{2})
	st = m.Access(ctx, testNode, MODE_MASK_R|MODE_MASK_W, attr)
	assert.Equal(t, syscall.EACCES, st)

	// case: same as above, make mask to pass test
	r2 := &aclAPI.Rule{}
	*r2 = *r1
	r2.Mask = 7
	m.getBase().aclCache.Put(2, r2)
	attr.AccessACL = 2

	ctx = NewContext(1, 2, []uint32{2})
	st = m.Access(ctx, testNode, MODE_MASK_R|MODE_MASK_W, attr)
	assert.Equal(t, syscall.Errno(0), st)
}

func testACL(t *testing.T, m Meta) {
	format := testFormat()
	format.EnableACL = true

	if err := m.Init(format, false); err != nil {
		t.Fatalf("test acl failed: %s", err)
	}

	defer m.getBase().aclCache.Clear()

	ctx := Background()
	testDir := "test_dir"
	var testDirIno Ino
	attr1 := &Attr{}

	if st := m.Mkdir(ctx, RootInode, testDir, 0644, 0, 0, &testDirIno, attr1); st != 0 {
		t.Fatalf("create %s: %s", testDir, st)
	}
	defer m.Rmdir(ctx, RootInode, testDir)

	rule := &aclAPI.Rule{
		Owner: 7,
		Group: 7,
		Mask:  7,
		Other: 7,
		NamedUsers: []aclAPI.Entry{
			{
				Id:   1001,
				Perm: 4,
			},
		},
		NamedGroups: nil,
	}

	// case: setfacl
	if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule); st != 0 {
		t.Fatalf("setfacl error: %s", st)
	}

	// case: getfacl
	rule2 := &aclAPI.Rule{}
	if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule2); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	assert.True(t, rule.IsEqual(rule2))

	// case: setfacl will sync mode (group class is mask)
	attr2 := &Attr{}
	if st := m.GetAttr(ctx, testDirIno, attr2); st != 0 {
		t.Fatalf("getattr error: %s", st)
	}
	assert.Equal(t, uint16(0777), attr2.Mode)

	// case: setattr will sync acl
	set := uint16(0) | SetAttrMode
	attr2 = &Attr{
		Mode: 0555,
	}
	if st := m.SetAttr(ctx, testDirIno, set, 0, attr2); st != 0 {
		t.Fatalf("setattr error: %s", st)
	}

	rule3 := &aclAPI.Rule{}
	if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule3); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	rule2.Owner = 5
	rule2.Mask = 5
	rule2.Other = 5
	assert.True(t, rule3.IsEqual(rule2))

	// case: remove acl
	rule3.Mask = 0xFFFF
	rule3.NamedUsers = nil
	rule3.NamedGroups = nil
	if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeAccess, rule3); st != 0 {
		t.Fatalf("setattr error: %s", st)
	}

	st := m.GetFacl(ctx, testDirIno, aclAPI.TypeAccess, nil)
	assert.Equal(t, ENOATTR, st)

	attr2 = &Attr{}
	if st := m.GetAttr(ctx, testDirIno, attr2); st != 0 {
		t.Fatalf("getattr error: %s", st)
	}
	assert.Equal(t, uint16(0575), attr2.Mode)

	// case: set normal default acl
	if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule); st != 0 {
		t.Fatalf("setfacl error: %s", st)
	}

	// case: get normal default acl
	rule2 = &aclAPI.Rule{}
	if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule2); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	assert.True(t, rule2.IsEqual(rule))

	// case: mk subdir with normal default acl
	subDir := "sub_dir"
	var subDirIno Ino
	attr2 = &Attr{}

	mode := uint16(0222)
	// cumask will be ignored
	if st := m.Mkdir(ctx, testDirIno, subDir, mode, 0022, 0, &subDirIno, attr2); st != 0 {
		t.Fatalf("create %s: %s", subDir, st)
	}
	defer m.Rmdir(ctx, testDirIno, subDir)

	// subdir inherit default acl
	rule3 = &aclAPI.Rule{}
	if st := m.GetFacl(ctx, subDirIno, aclAPI.TypeDefault, rule3); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	assert.True(t, rule3.IsEqual(rule2))

	// subdir access acl
	rule3 = &aclAPI.Rule{}
	if st := m.GetFacl(ctx, subDirIno, aclAPI.TypeAccess, rule3); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	rule2.Owner &= (mode >> 6) & 7
	rule2.Mask &= (mode >> 3) & 7
	rule2.Other &= mode & 7
	assert.True(t, rule3.IsEqual(rule2))

	// case: set minimal default acl
	rule = &aclAPI.Rule{
		Owner:       5,
		Group:       5,
		Mask:        0xFFFF,
		Other:       5,
		NamedUsers:  nil,
		NamedGroups: nil,
	}
	if st := m.SetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule); st != 0 {
		t.Fatalf("setfacl error: %s", st)
	}

	// case: get minimal default acl
	rule2 = &aclAPI.Rule{}
	if st := m.GetFacl(ctx, testDirIno, aclAPI.TypeDefault, rule2); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	assert.True(t, rule2.IsEqual(rule))

	// case: mk subdir with minimal default acl
	subDir2 := "sub_dir2"
	var subDirIno2 Ino
	attr2 = &Attr{}

	mode = uint16(0222)
	if st := m.Mkdir(ctx, testDirIno, subDir2, mode, 0022, 0, &subDirIno2, attr2); st != 0 {
		t.Fatalf("create %s: %s", subDir, st)
	}
	defer m.Rmdir(ctx, testDirIno, subDir2)
	assert.Equal(t, uint16(0), attr2.Mode)

	// subdir inherit default acl
	rule3 = &aclAPI.Rule{}
	if st := m.GetFacl(ctx, subDirIno2, aclAPI.TypeDefault, rule3); st != 0 {
		t.Fatalf("getfacl error: %s", st)
	}
	assert.True(t, rule3.IsEqual(rule2))

	// subdir have no access acl
	rule3 = &aclAPI.Rule{}
	st = m.GetFacl(ctx, subDirIno2, aclAPI.TypeAccess, rule3)
	assert.Equal(t, ENOATTR, st)

	// test cache all
	sz := m.getBase().aclCache.Size()
	err := m.getBase().en.cacheACLs(ctx)
	assert.Nil(t, err)
	assert.Equal(t, sz, m.getBase().aclCache.Size())
}

func testKerberosToken(t *testing.T, m Meta) {
	type token struct {
		User     string
		Renewer  string
		Password string
		Issued   int64
		Expire   int64
	}

	format := testFormat()
	if err := m.Init(format, false); err != nil {
		t.Fatalf("test acl failed: %s", err)
	}
	ctx := Background()

	issueToken := func() (uint32, *token) {
		now := time.Now()
		tk := &token{
			User:     "tom",
			Renewer:  "yarn",
			Password: "password123",
			Issued:   now.Unix(),
			Expire:   now.Add(2 * time.Second).Unix(),
		}
		tb, err := json.Marshal(tk)
		if err != nil {
			t.Fatalf("marshal token failed: %s", err)
		}
		id, eno := m.StoreToken(ctx, tb)
		if eno != 0 {
			t.Fatalf("store token failed: %s", eno)
		}
		return id, tk
	}

	buildToken := func(data []byte) *token {
		tk := &token{}
		if err := json.Unmarshal(data, tk); err != nil {
			t.Fatalf("unmarshal token: %s", err)
		}
		return tk
	}

	id1, tk1 := issueToken()
	retb, eno := m.LoadToken(ctx, id1)
	if eno != 0 {
		t.Fatalf("load token failed: %s", eno)
	}
	var rettk token
	if err := json.Unmarshal(retb, &rettk); err != nil {
		t.Fatalf("unmarshal token: %s", err)
	}
	if !reflect.DeepEqual(tk1, &rettk) {
		t.Fatalf("token mismatch: %+v != %+v", tk1, &rettk)
	}
	tk1.Expire = time.Now().Add(2 * time.Second).Unix()
	tb, err := json.Marshal(tk1)
	if err != nil {
		t.Fatalf("marshal token failed: %s", err)
	}
	eno = m.UpdateToken(ctx, id1, tb)
	if eno != 0 {
		t.Fatalf("update token failed: %s", eno)
	}

	id2, tk2 := issueToken()
	tokens, eno := m.ListTokens(ctx)
	if eno != 0 {
		t.Fatalf("list tokens failed: %s", eno)
	}
	if !reflect.DeepEqual(tk2, buildToken(tokens[id2])) {
		t.Fatalf("token2 mismatch: %+v != %+v", tk2, buildToken(tokens[id2]))
	}
	if !reflect.DeepEqual(tk1, buildToken(tokens[id1])) {
		t.Fatalf("token1 mismatch: %+v != %+v", tk1, buildToken(tokens[id1]))
	}

	eno = m.DeleteTokens(ctx, []uint32{id1, id2})
	if eno != 0 {
		t.Fatalf("delete tokens failed: %s", eno)
	}
	tokens, eno = m.ListTokens(ctx)
	if eno != 0 {
		t.Fatalf("list tokens failed: %s", eno)
	}
	if tokens[id1] != nil || tokens[id2] != nil {
		t.Fatalf("tokens not deleted")
	}
}

func testMetaClient(t *testing.T, m Meta) {
	m.OnMsg(DeleteSlice, func(args ...interface{}) error { return nil })
	ctx := Background()
	var attr = &Attr{}
	if st := m.GetAttr(ctx, 1, attr); st != 0 || attr.Mode != 0777 { // getattr of root always succeed
		t.Fatalf("getattr root: %s", st)
	}

	if err := m.Init(testFormat(), true); err != nil {
		t.Fatalf("initialize failed: %s", err)
	}
	if err := m.Init(&Format{Name: "test2"}, false); err == nil { // not allowed
		t.Fatalf("change name without --force is not allowed")
	}
	format, err := m.Load(true)
	if err != nil {
		t.Fatalf("load failed after initialization: %s", err)
	}
	if format.Name != "test" {
		t.Fatalf("load got volume name %s, expected %s", format.Name, "test")
	}
	if err = m.NewSession(true); err != nil {
		t.Fatalf("new session: %s", err)
	}
	defer m.CloseSession()
	ses, err := m.ListSessions()
	if err != nil || len(ses) != 1 {
		t.Fatalf("list sessions %+v: %s", ses, err)
	}
	base := m.getBase()
	if base.sid != ses[0].Sid {
		t.Fatalf("my sid %d != registered sid %d", base.sid, ses[0].Sid)
	}
	go m.CleanStaleSessions(Background())

	var parent, inode, dummyInode Ino
	if st := m.Mkdir(ctx, 1, "d", 0640, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	defer m.Rmdir(ctx, 1, "d")
	if st := m.Unlink(ctx, 1, "d"); st != syscall.EPERM {
		t.Fatalf("unlink d: %s", st)
	}
	if st := m.Rmdir(ctx, parent, "."); st != syscall.EINVAL {
		t.Fatalf("unlink d.: %s", st)
	}
	if st := m.Rmdir(ctx, parent, ".."); st != syscall.ENOTEMPTY {
		t.Fatalf("unlink d..: %s", st)
	}
	if st := m.Lookup(ctx, 1, "d", &parent, attr, true); st != 0 {
		t.Fatalf("lookup d: %s", st)
	}
	if st := m.Lookup(ctx, 1, "d", &parent, nil, true); st != syscall.EINVAL {
		t.Fatalf("lookup d: %s", st)
	}
	if st := m.Lookup(ctx, 1, "..", &inode, attr, true); st != 0 || inode != 1 {
		t.Fatalf("lookup ..: %s", st)
	}
	if st := m.Lookup(ctx, parent, ".", &inode, attr, true); st != 0 || inode != parent {
		t.Fatalf("lookup .: %s", st)
	}
	if st := m.Lookup(ctx, parent, "..", &inode, attr, true); st != 0 || inode != 1 {
		t.Fatalf("lookup ..: %s", st)
	}
	if attr.Nlink != 3 {
		t.Fatalf("nlink expect 3, but got %d", attr.Nlink)
	}
	if st := m.Access(ctx, parent, 4, attr); st != 0 {
		t.Fatalf("access d: %s", st)
	}
	if st := m.Create(ctx, parent, "f", 0650, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	_ = m.Close(ctx, inode)
	var tino Ino
	if st := m.Lookup(ctx, inode, ".", &tino, attr, true); st != 0 {
		t.Fatalf("lookup /d/f/.: %s", st)
	}
	if st := m.Lookup(ctx, inode, "..", &tino, attr, true); st != syscall.ENOTDIR {
		t.Fatalf("lookup /d/f/..: %s", st)
	}
	defer m.Unlink(ctx, parent, "f")
	if st := m.Rmdir(ctx, parent, "f"); st != syscall.ENOTDIR {
		t.Fatalf("rmdir f: %s", st)
	}
	if st := m.Rmdir(ctx, 1, "d"); st != syscall.ENOTEMPTY {
		t.Fatalf("rmdir d: %s", st)
	}
	if st := m.Mknod(ctx, inode, "df", TypeFile, 0650, 022, 0, "", &dummyInode, nil); st != syscall.ENOTDIR {
		t.Fatalf("create fd: %s", st)
	}
	if st := m.Mknod(ctx, parent, "f", TypeFile, 0650, 022, 0, "", &inode, attr); st != syscall.EEXIST {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Lookup(ctx, parent, "f", &inode, attr, true); st != 0 {
		t.Fatalf("lookup f: %s", st)
	}
	if st := m.Resolve(ctx, 1, "d/f", &inode, attr); st != 0 && st != syscall.ENOTSUP {
		t.Fatalf("resolve d/f: %s", st)
	}
	if st := m.Resolve(ctx, parent, "/f", &inode, attr); st != 0 && st != syscall.ENOTSUP {
		t.Fatalf("resolve f: %s", st)
	}
	var ctx2 = NewContext(0, 1, []uint32{1})
	if st := m.Resolve(ctx2, parent, "/f", &inode, attr); st != syscall.EACCES && st != syscall.ENOTSUP {
		t.Fatalf("resolve f: %s", st)
	}
	if st := m.Resolve(ctx, parent, "/f/c", &inode, attr); st != syscall.ENOTDIR && st != syscall.ENOTSUP {
		t.Fatalf("resolve f: %s", st)
	}
	if st := m.Resolve(ctx, parent, "/f2", &inode, attr); st != syscall.ENOENT && st != syscall.ENOTSUP {
		t.Fatalf("resolve f2: %s", st)
	}
	// check owner permission
	var p1, c1 Ino
	if st := m.Mkdir(ctx2, 1, "d1", 02777, 0, 0, &p1, attr); st != 0 {
		t.Fatalf("mkdir d1: %s", st)
	}
	attr.Gid = 1
	m.SetAttr(ctx, p1, SetAttrGID, 0, attr)
	if attr.Mode&02000 == 0 {
		t.Fatalf("SGID is lost")
	}
	var ctx3 = NewContext(2, 2, []uint32{2})
	if st := m.Mkdir(ctx3, p1, "d2", 0777, 022, 0, &c1, attr); st != 0 {
		t.Fatalf("mkdir d2: %s", st)
	}
	if attr.Gid != ctx2.Gid() {
		t.Fatalf("inherit gid: %d != %d", attr.Gid, ctx2.Gid())
	}
	if runtime.GOOS == "linux" {
		if attr.Mode&02000 == 0 {
			t.Fatalf("not inherit sgid")
		}
		if st := m.Mknod(ctx2, p1, "f1", TypeFile, 02777, 022, 0, "", &dummyInode, attr); st != 0 {
			t.Fatalf("create f1: %s", st)
		} else if attr.Mode&02010 != 02010 {
			t.Fatalf("sgid should not be cleared")
		}
		if st := m.Mknod(ctx3, p1, "f2", TypeFile, 02777, 022, 0, "", &dummyInode, attr); st != 0 {
			t.Fatalf("create f2: %s", st)
		} else if attr.Mode&02010 != 00010 {
			t.Fatalf("sgid should be cleared")
		}

	}
	if st := m.Resolve(ctx2, 1, "/d1/d2", nil, nil); st != 0 && st != syscall.ENOTSUP {
		t.Fatalf("resolve /d1/d2: %s", st)
	}
	if st := m.Remove(ctx, 1, "d1", false, RmrDefaultThreads, nil); st != 0 {
		t.Fatalf("Remove d1: %s", st)
	}
	attr.Atime = 2
	attr.Mtime = 2
	attr.Uid = 1
	attr.Gid = 1
	attr.Mode = 0640
	if st := m.SetAttr(ctx, inode, SetAttrAtime|SetAttrMtime|SetAttrUID|SetAttrGID|SetAttrMode, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.SetAttr(ctx, inode, 0, 0, attr); st != 0 { // changes nothing
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.GetAttr(ctx, inode, attr); st != 0 {
		t.Fatalf("getattr f: %s", st)
	}
	if attr.Atime != 2 || attr.Mtime != 2 || attr.Uid != 1 || attr.Gid != 1 || attr.Mode != 0640 {
		t.Fatalf("atime:%d mtime:%d uid:%d gid:%d mode:%o", attr.Atime, attr.Mtime, attr.Uid, attr.Gid, attr.Mode)
	}
	if st := m.SetAttr(ctx, inode, SetAttrAtimeNow|SetAttrMtimeNow, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	fakeCtx := NewContext(100, 2, []uint32{2, 1})
	if st := m.Access(fakeCtx, parent, 2, nil); st != syscall.EACCES {
		t.Fatalf("access d: %s", st)
	}
	if st := m.Access(fakeCtx, inode, 4, nil); st != 0 {
		t.Fatalf("access f: %s", st)
	}
	var entries []*Entry
	if st := m.Readdir(ctx, parent, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	} else if len(entries) != 3 {
		t.Fatalf("entries: %d", len(entries))
	} else if string(entries[0].Name) != "." || string(entries[1].Name) != ".." || string(entries[2].Name) != "f" {
		t.Fatalf("entries: %+v", entries)
	}
	if st := m.Rename(ctx, parent, "f", 1, "f2", RenameWhiteout, &inode, attr); st != syscall.ENOTSUP {
		t.Fatalf("rename d/f -> f2: %s", st)
	}
	if st := m.Rename(ctx, parent, "f", 1, "f2", 0, &inode, attr); st != 0 {
		t.Fatalf("rename d/f -> f2: %s", st)
	}
	defer func() {
		_ = m.Unlink(ctx, 1, "f2")
	}()
	if st := m.Rename(ctx, 1, "f2", 1, "f2", 0, &inode, attr); st != 0 {
		t.Fatalf("rename f2 -> f2: %s", st)
	}
	if st := m.Rename(ctx, 1, "f2", 1, "f", RenameExchange, &inode, attr); st != syscall.ENOENT {
		t.Fatalf("rename f2 -> f: %s", st)
	}
	if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	_ = m.Close(ctx, inode)
	defer m.Unlink(ctx, 1, "f")
	if st := m.Rename(ctx, 1, "f2", 1, "f", RenameNoReplace, &inode, attr); st != syscall.EEXIST {
		t.Fatalf("rename f2 -> f: %s", st)
	}
	if st := m.Rename(ctx, 1, "f2", 1, "f", 0, &inode, attr); st != 0 {
		t.Fatalf("rename f2 -> f: %s", st)
	}
	if st := m.Rename(ctx, 1, "f", 1, "d", RenameExchange, &inode, attr); st != 0 {
		t.Fatalf("rename f <-> d: %s", st)
	}
	if st := m.Rename(ctx, 1, "f", 1, "d", RenameExchange, &inode, attr); st != 0 {
		t.Fatalf("rename f <-> d: %s", st)
	}
	if st := m.Rename(ctx, 1, "d", 1, "f", 0, &inode, attr); st != syscall.ENOTDIR {
		t.Fatalf("rename d -> f: %s", st)
	}
	if st := m.GetAttr(ctx, 1, attr); st != 0 {
		t.Fatalf("getattr f: %s", st)
	}
	if attr.Nlink != 3 {
		t.Fatalf("nlink expect 3, but got %d", attr.Nlink)
	}
	// Test rename with parent change
	var parent2 Ino
	if st := m.Mkdir(ctx, 1, "d4", 0777, 0, 0, &parent2, attr); st != 0 {
		t.Fatalf("create dir d4: %s", st)
	}
	if st := m.Mkdir(ctx, parent2, "d5", 0777, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create dir d4/d5: %s", st)
	}
	if st := m.Rename(ctx, parent2, "d5", 1, "d5", RenameNoReplace, &inode, attr); st != 0 {
		t.Fatalf("rename d4/d5 <-> d5: %s", st)
	} else if attr.Parent != 1 {
		t.Fatalf("after rename d4/d5 <-> d5 parent %d expect 1", attr.Parent)
	}
	if st := m.Mknod(ctx, parent2, "f6", TypeFile, 0650, 022, 0, "", &inode, attr); st != 0 {
		t.Fatalf("create dir d4/f6: %s", st)
	}
	if st := m.Rename(ctx, 1, "d5", parent2, "f6", RenameExchange, &inode, attr); st != 0 {
		t.Fatalf("rename d5 <-> d4/d6: %s", st)
	} else if attr.Parent != parent2 {
		t.Fatalf("after exchange d5 <-> d4/f6 parent %d expect %d", attr.Parent, parent2)
	} else if attr.Typ != TypeDirectory {
		t.Fatalf("after exchange d5 <-> d4/f6 type %d expect %d", attr.Typ, TypeDirectory)
	}
	if st := m.Lookup(ctx, 1, "d5", &inode, attr, true); st != 0 || attr.Parent != 1 {
		t.Fatalf("lookup d5 after exchange: %s; parent %d expect 1", st, attr.Parent)
	} else if attr.Typ != TypeFile {
		t.Fatalf("after exchange d5 <-> d4/f6 type %d expect %d", attr.Typ, TypeFile)
	}
	if st := m.Rmdir(ctx, parent2, "f6"); st != 0 {
		t.Fatalf("rmdir d4/f6 : %s", st)
	}
	if st := m.Rmdir(ctx, 1, "d4"); st != 0 {
		t.Fatalf("rmdir d4 first : %s", st)
	}
	if st := m.Unlink(ctx, 1, "d5"); st != 0 {
		t.Fatalf("rmdir d6 : %s", st)
	}
	if st := m.Lookup(ctx, 1, "f", &inode, attr, true); st != 0 {
		t.Fatalf("lookup f: %s", st)
	}
	if st := m.Link(ctx, inode, 1, "f3", attr); st != 0 {
		t.Fatalf("link f3 -> f: %s", st)
	}
	defer m.Unlink(ctx, 1, "f3")
	if st := m.Link(ctx, inode, 1, "F3", attr); st != 0 { // CaseInsensi = false
		t.Fatalf("link F3 -> f: %s", st)
	}
	if st := m.Link(ctx, parent, 1, "d2", attr); st != syscall.EPERM {
		t.Fatalf("link d2 -> d: %s", st)
	}
	if st := m.Symlink(ctx, 1, "s", "/f", &inode, attr); st != 0 {
		t.Fatalf("symlink s -> /f: %s", st)
	}
	if attr.Mode&0777 != 0777 {
		t.Fatalf("mode of symlink should be 0777")
	}
	defer m.Unlink(ctx, 1, "s")
	var target1, target2 []byte
	if st := m.ReadLink(ctx, inode, &target1); st != 0 {
		t.Fatalf("readlink s: %s", st)
	}
	if st := m.ReadLink(ctx, inode, &target2); st != 0 { // cached
		t.Fatalf("readlink s: %s", st)
	}
	if !bytes.Equal(target1, target2) || !bytes.Equal(target1, []byte("/f")) {
		t.Fatalf("readlink got %s %s, expected %s", target1, target2, "/f")
	}
	if st := m.ReadLink(ctx, parent, &target1); st != syscall.EINVAL {
		t.Fatalf("readlink d: %s", st)
	}
	if st := m.Lookup(ctx, 1, "f", &inode, attr, true); st != 0 {
		t.Fatalf("lookup f: %s", st)
	}

	// data
	var sliceId uint64
	// try to open a file that does not exist
	if st := m.Open(ctx, 99999, syscall.O_RDWR, &Attr{}); st != syscall.ENOENT {
		t.Fatalf("open not exist inode got %d, expected %d", st, syscall.ENOENT)
	}
	if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != 0 {
		t.Fatalf("open f: %s", st)
	}
	_ = m.Close(ctx, inode)
	if st := m.NewSlice(ctx, &sliceId); st != 0 {
		t.Fatalf("write chunk: %s", st)
	}
	var s = Slice{Id: sliceId, Size: 100, Len: 100}
	if st := m.Write(ctx, inode, 0, 100, s, time.Now()); st != 0 {
		t.Fatalf("write end: %s", st)
	}
	var slices []Slice
	if st := m.Read(ctx, inode, 0, &slices); st != 0 {
		t.Fatalf("read chunk: %s", st)
	}
	if len(slices) != 2 || slices[0].Id != 0 || slices[0].Size != 100 || slices[1].Id != sliceId || slices[1].Size != 100 {
		t.Fatalf("slices: %v", slices)
	}
	if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocKeepSize, 100, 50, nil); st != 0 {
		t.Fatalf("fallocate: %s", st)
	}
	if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocCollapesRange, 100, 50, nil); st != syscall.EINVAL {
		t.Fatalf("fallocate: %s", st)
	}
	if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocInsertRange, 100, 50, nil); st != syscall.EINVAL {
		t.Fatalf("fallocate: %s", st)
	}
	if st := m.Fallocate(ctx, inode, fallocCollapesRange, 100, 50, nil); st != syscall.ENOTSUP {
		t.Fatalf("fallocate: %s", st)
	}
	if st := m.Fallocate(ctx, inode, fallocPunchHole, 100, 50, nil); st != syscall.EINVAL {
		t.Fatalf("fallocate: %s", st)
	}
	if st := m.Fallocate(ctx, inode, fallocPunchHole|fallocKeepSize, 0, 0, nil); st != syscall.EINVAL {
		t.Fatalf("fallocate: %s", st)
	}
	if st := m.Fallocate(ctx, parent, fallocPunchHole|fallocKeepSize, 100, 50, nil); st != syscall.EPERM {
		t.Fatalf("fallocate dir: %s", st)
	}
	if st := m.Read(ctx, inode, 0, &slices); st != 0 {
		t.Fatalf("read chunk: %s", st)
	}
	if len(slices) != 3 || slices[1].Id != 0 || slices[1].Len != 50 || slices[2].Id != sliceId || slices[2].Len != 50 {
		t.Fatalf("slices: %v", slices)
	}

	// xattr
	if st := m.SetXattr(ctx, inode, "a", []byte("v"), XattrCreateOrReplace); st != 0 {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v2"), XattrCreateOrReplace); st != 0 {
		t.Fatalf("setxattr: %s", st)
	}
	var value []byte
	if st := m.GetXattr(ctx, inode, "a", &value); st != 0 || string(value) != "v2" {
		t.Fatalf("getxattr: %s %v", st, value)
	}
	if st := m.ListXattr(ctx, inode, &value); st != 0 || string(value) != "a\000" {
		t.Fatalf("listxattr: %s %v", st, value)
	}
	if st := m.Unlink(ctx, 1, "F3"); st != 0 {
		t.Fatalf("unlink F3: %s", st)
	}
	if st := m.GetXattr(ctx, inode, "a", &value); st != 0 || string(value) != "v2" {
		t.Fatalf("getxattr: %s %v", st, value)
	}
	if st := m.RemoveXattr(ctx, inode, "a"); st != 0 {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v"), XattrReplace); st != ENOATTR {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v3"), XattrCreate); st != 0 {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v3"), XattrCreate); st != syscall.EEXIST {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v3"), XattrReplace); st != 0 {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v4"), XattrReplace); st != 0 {
		t.Fatalf("setxattr: %s", st)
	}
	if st := m.SetXattr(ctx, inode, "a", []byte("v5"), 5); st != syscall.EINVAL {
		t.Fatalf("setxattr: %s", st)
	}

	var totalspace, availspace, iused, iavail uint64
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<50 || iavail != 10<<20 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}
	format.Capacity = 1 << 20
	format.Inodes = 100
	if err = m.Init(format, false); err != nil {
		t.Fatalf("set quota failed: %s", err)
	}
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<20 || iavail != 97 {
		time.Sleep(time.Millisecond * 100)
		_ = m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail)
		if totalspace != 1<<20 || iavail != 97 {
			t.Fatalf("total space %d, iavail %d", totalspace, iavail)
		}
	}
	// test StatFS with subdir and quota
	var subIno Ino
	if st := m.Mkdir(ctx, 1, "subdir", 0755, 0, 0, &subIno, nil); st != 0 {
		t.Fatalf("mkdir subdir: %s", st)
	}
	if st := m.Chroot(ctx, "subdir"); st != 0 {
		t.Fatalf("chroot: %s", st)
	}
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<20 || iavail != 96 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "/subdir", 0, 0, map[string]*Quota{
		"/subdir": {
			MaxSpace:  0,
			MaxInodes: 0,
		},
	}, false, false, false); err != nil {
		t.Fatalf("set quota: %s", err)
	}
	base.loadQuotas()
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<20-4*uint64(align4K(0)) || iavail != 96 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "/subdir", 0, 0, map[string]*Quota{
		"/subdir": {
			MaxSpace:  1 << 10,
			MaxInodes: 0,
		},
	}, false, false, false); err != nil {
		t.Fatalf("set quota: %s", err)
	}
	base.loadQuotas()
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<10 || iavail != 96 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "/subdir", 0, 0, map[string]*Quota{
		"/subdir": {
			MaxSpace:  0,
			MaxInodes: 10,
		},
	}, false, false, false); err != nil {
		t.Fatalf("set quota: %s", err)
	}
	base.loadQuotas()
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<20-4*uint64(align4K(0)) || iavail != 10 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "/subdir", 0, 0, map[string]*Quota{
		"/subdir": {
			MaxSpace:  1 << 10,
			MaxInodes: 10,
		},
	}, false, false, false); err != nil {
		t.Fatalf("set quota: %s", err)
	}
	base.loadQuotas()
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<10 || iavail != 10 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}

	m.chroot(RootInode)
	if st := m.StatFS(ctx, RootInode, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<20 || iavail != 96 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}
	// statfs subdir directly
	if st := m.StatFS(ctx, subIno, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<10 || iavail != 10 {
		t.Fatalf("total space %d, iavail %d", totalspace, iavail)
	}

	base.loadQuotas()
	base.quotaMu.RLock()
	q := base.dirQuotas[uint64(subIno)]
	base.quotaMu.RUnlock()
	q.update(4<<10, 15) // used > max
	base.doFlushQuotas()
	if st := m.StatFS(ctx, subIno, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 4<<10 || availspace != 0 || iused != 15 || iavail != 0 {
		t.Fatalf("total space %d, availspace %d, iused %d, iavail %d", totalspace, availspace, iused, iavail)
	}
	q.update(-8<<10, -20) // used < 0
	base.doFlushQuotas()
	if st := m.StatFS(ctx, subIno, &totalspace, &availspace, &iused, &iavail); st != 0 {
		t.Fatalf("statfs: %s", st)
	}
	if totalspace != 1<<10 || availspace != 1<<10 || iused != 0 || iavail != 10 {
		t.Fatalf("total space %d, availspace %d, iused %d, iavail %d", totalspace, availspace, iused, iavail)
	}

	if st := m.Rmdir(ctx, 1, "subdir"); st != 0 {
		t.Fatalf("rmdir subdir: %s", st)
	}

	var summary Summary
	if st := m.GetSummary(ctx, parent, &summary, false, true); st != 0 {
		t.Fatalf("summary: %s", st)
	}
	expected := Summary{Length: 0, Size: 4096, Files: 0, Dirs: 1}
	if summary != expected {
		t.Fatalf("summary %+v not equal to expected: %+v", summary, expected)
	}
	summary = Summary{}
	if st := m.GetSummary(ctx, 1, &summary, true, true); st != 0 {
		t.Fatalf("summary: %s", st)
	}
	expected = Summary{Length: 400, Size: 20480, Files: 3, Dirs: 2}
	if summary != expected {
		t.Fatalf("summary %+v not equal to expected: %+v", summary, expected)
	}
	if st := m.GetSummary(ctx, inode, &summary, true, true); st != 0 {
		t.Fatalf("summary: %s", st)
	}
	expected = Summary{Length: 600, Size: 24576, Files: 4, Dirs: 2}
	if summary != expected {
		t.Fatalf("summary %+v not equal to expected: %+v", summary, expected)
	}
	if st := m.Unlink(ctx, 1, "f"); st != 0 {
		t.Fatalf("unlink f: %s", st)
	}
	if st := m.Unlink(ctx, 1, "f3"); st != 0 {
		t.Fatalf("unlink f3: %s", st)
	}
	time.Sleep(time.Millisecond * 100) // wait for delete
	if st := m.Read(ctx, inode, 0, &slices); st != syscall.ENOENT {
		t.Fatalf("read chunk: %s", st)
	}
	if st := m.Rmdir(ctx, 1, "d"); st != 0 {
		t.Fatalf("rmdir d: %s", st)
	}
}

func testStickyBit(t *testing.T, m Meta) {
	ctx := Background()
	var sticky, normal, inode Ino
	var attr = &Attr{}
	m.Mkdir(ctx, 1, "tmp", 01777, 0, 0, &sticky, attr)
	m.Mkdir(ctx, 1, "tmp2", 0777, 0, 0, &normal, attr)
	ctxA := NewContext(1, 1, []uint32{1})
	// file
	m.Create(ctxA, sticky, "f", 0777, 0, 0, &inode, attr)
	m.Create(ctxA, normal, "f", 0777, 0, 0, &inode, attr)
	ctxB := NewContext(1, 2, []uint32{2})
	if e := m.Unlink(ctxB, sticky, "f"); e != syscall.EACCES {
		t.Fatalf("unlink f: %s", e)
	}
	if e := m.Rename(ctxB, sticky, "f", sticky, "f2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename f: %s", e)
	}
	if e := m.Rename(ctxB, sticky, "f", normal, "f2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename f: %s", e)
	}
	m.Create(ctxB, sticky, "f2", 0777, 0, 0, &inode, attr)
	if e := m.Rename(ctxB, sticky, "f2", sticky, "f", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("overwrite f: %s", e)
	}
	if e := m.Rename(ctxA, sticky, "f", sticky, "f2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename f: %s", e)
	}
	if e := m.Rename(ctxA, normal, "f", sticky, "f2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename f: %s", e)
	}
	if e := m.Rename(ctxA, sticky, "f", sticky, "f3", 0, &inode, attr); e != 0 {
		t.Fatalf("rename f: %s", e)
	}
	if e := m.Unlink(ctxA, sticky, "f3"); e != 0 {
		t.Fatalf("unlink f3: %s", e)
	}
	// dir
	m.Mkdir(ctxA, sticky, "d", 0777, 0, 0, &inode, attr)
	m.Mkdir(ctxA, normal, "d", 0777, 0, 0, &inode, attr)
	if e := m.Rmdir(ctxB, sticky, "d"); e != syscall.EACCES {
		t.Fatalf("rmdir d: %s", e)
	}
	if e := m.Rename(ctxB, sticky, "d", sticky, "d2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename d: %s", e)
	}
	if e := m.Rename(ctxB, sticky, "d", normal, "d2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename d: %s", e)
	}
	m.Mkdir(ctxB, sticky, "d2", 0777, 0, 0, &inode, attr)
	if e := m.Rename(ctxB, sticky, "d2", sticky, "d", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("overwrite d: %s", e)
	}
	if e := m.Rename(ctxA, sticky, "d", sticky, "d2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename d: %s", e)
	}
	if e := m.Rename(ctxA, normal, "d", sticky, "d2", 0, &inode, attr); e != syscall.EACCES {
		t.Fatalf("rename d: %s", e)
	}
	if e := m.Rename(ctxA, sticky, "d", sticky, "d3", 0, &inode, attr); e != 0 {
		t.Fatalf("rename d: %s", e)
	}
	if e := m.Rmdir(ctxA, sticky, "d3"); e != 0 {
		t.Fatalf("rmdir d3: %s", e)
	}
}

func testListLocks(t *testing.T, m Meta) {
	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	defer m.Unlink(ctx, 1, "f")
	if st := m.Create(ctx, 1, "f", 0644, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}

	// flock
	o1 := uint64(0xF000000000000001)
	if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != 0 {
		t.Fatalf("flock wlock: %s", st)
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 1 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_UNLCK, false); st != 0 {
		t.Fatalf("flock unlock: %s", st)
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
	for i := 2; i < 10; i++ {
		if st := m.Flock(ctx, inode, uint64(i), syscall.F_RDLCK, false); st != 0 {
			t.Fatalf("flock wlock: %s", st)
		}
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 8 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
	for i := 2; i < 10; i++ {
		if st := m.Flock(ctx, inode, uint64(i), syscall.F_UNLCK, false); st != 0 {
			t.Fatalf("flock unlock: %s", st)
		}
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}

	// plock
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_WRLCK, 0, 0xFFFF, 1); st != 0 {
		t.Fatalf("plock rlock: %s", st)
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 1 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_UNLCK, 0, 0xFFFF, 1); st != 0 {
		t.Fatalf("plock unlock: %s", st)
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
	for i := 2; i < 10; i++ {
		if st := m.Setlk(ctx, inode, uint64(i), false, syscall.F_RDLCK, 0, 0xFFFF, 1); st != 0 {
			t.Fatalf("plock rlock: %s", st)
		}
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 8 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
	for i := 2; i < 10; i++ {
		if st := m.Setlk(ctx, inode, uint64(i), false, syscall.F_UNLCK, 0, 0xFFFF, 1); st != 0 {
			t.Fatalf("plock unlock: %s", st)
		}
	}
	if plocks, flocks, err := m.ListLocks(ctx, inode); err != nil || len(plocks) != 0 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
}

func testLocks(t *testing.T, m Meta) {
	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	defer m.Unlink(ctx, 1, "f")
	if st := m.Create(ctx, 1, "f", 0644, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	// flock
	o1 := uint64(0xF000000000000001)
	if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != 0 {
		t.Fatalf("flock wlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != 0 {
		t.Fatalf("flock wlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_RDLCK, false); st != 0 {
		t.Fatalf("flock rlock: %s", st)
	}
	if st := m.Flock(ctx, inode, 2, syscall.F_RDLCK, false); st != 0 {
		t.Fatalf("flock rlock: %s", st)
	}
	if st := m.Flock(ctx, inode, 2, syscall.F_UNLCK, false); st != 0 {
		t.Fatalf("flock unlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != 0 {
		t.Fatalf("flock wlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_UNLCK, false); st != 0 {
		t.Fatalf("flock unlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_RDLCK, false); st != 0 {
		t.Fatalf("flock rlock: %s", st)
	}
	if st := m.Flock(ctx, inode, 2, syscall.F_RDLCK, false); st != 0 {
		t.Fatalf("flock rlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != syscall.EAGAIN {
		t.Fatalf("flock wlock: %s", st)
	}
	if st := m.Flock(ctx, inode, 2, syscall.F_UNLCK, false); st != 0 {
		t.Fatalf("flock unlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_WRLCK, false); st != 0 {
		t.Fatalf("flock wlock again: %s", st)
	}
	if st := m.Flock(ctx, inode, 2, syscall.F_WRLCK, false); st != syscall.EAGAIN {
		t.Fatalf("flock wlock: %s", st)
	}
	if st := m.Flock(ctx, inode, 2, syscall.F_RDLCK, false); st != syscall.EAGAIN {
		t.Fatalf("flock rlock: %s", st)
	}
	if st := m.Flock(ctx, inode, o1, syscall.F_UNLCK, false); st != 0 {
		t.Fatalf("flock unlock: %s", st)
	}
	if r, ok := m.(*redisMeta); ok {
		ms, err := r.rdb.SMembers(context.Background(), r.lockedKey(r.sid)).Result()
		if err != nil {
			t.Fatalf("Smember %s: %s", r.lockedKey(r.sid), err)
		}
		if len(ms) != 0 {
			t.Fatalf("locked inodes leaked: %d", len(ms))
		}
	}

	// POSIX locks
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_UNLCK, 0, 0xFFFF, 1); st != 0 {
		t.Fatalf("plock unlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_RDLCK, 0, 0xFFFF, 1); st != 0 {
		t.Fatalf("plock rlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_RDLCK, 0, 0xFFFF, 1); st != 0 {
		t.Fatalf("plock rlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, 2, false, syscall.F_RDLCK, 0, 0x2FFFF, 1); st != 0 {
		t.Fatalf("plock rlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0, 0xFFFF, 1); st != syscall.EAGAIN {
		t.Fatalf("plock wlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0x10000, 0x20000, 1); st != 0 {
		t.Fatalf("plock wlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_UNLCK, 0, 0x20000, 1); st != 0 {
		t.Fatalf("plock unlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0, 0xFFFF, 10); st != 0 {
		t.Fatalf("plock wlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, 2, false, syscall.F_WRLCK, 0x2000, 0xFFFF, 20); st != 0 {
		t.Fatalf("plock wlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, o1, false, syscall.F_WRLCK, 0, 0xFFFF, 1); st != syscall.EAGAIN {
		t.Fatalf("plock rlock: %s", st)
	}
	var ltype, pid uint32 = syscall.F_WRLCK, 1
	var start, end uint64 = 0x2000, 0xFFFF
	if st := m.Getlk(ctx, inode, o1, &ltype, &start, &end, &pid); st != 0 || ltype != syscall.F_WRLCK || pid != 20 || start != 0x2000 || end != 0xFFFF {
		t.Fatalf("plock get rlock: %s, %d %d %x %x", st, ltype, pid, start, end)
	}
	if st := m.Setlk(ctx, inode, 2, false, syscall.F_UNLCK, 0, 0x2FFFF, 1); st != 0 {
		t.Fatalf("plock unlock: %s", st)
	}
	ltype = syscall.F_WRLCK
	start, end = 0, 0xFFFFFF
	if st := m.Getlk(ctx, inode, o1, &ltype, &start, &end, &pid); st != 0 || ltype != syscall.F_UNLCK || pid != 0 || start != 0 || end != 0 {
		t.Fatalf("plock get rlock: %s, %d %d %x %x", st, ltype, pid, start, end)
	}

	// concurrent locks
	var g sync.WaitGroup
	var count int
	var err syscall.Errno
	for i := 0; i < 100; i++ {
		g.Add(1)
		go func(i int) {
			defer g.Done()
			if st := m.Setlk(ctx, inode, uint64(i), true, syscall.F_WRLCK, 0, 0xFFFF, uint32(i)); st != 0 {
				err = st
			}
			count++
			time.Sleep(time.Millisecond)
			count--
			if count > 0 {
				panic(fmt.Errorf("count should be zero but got %d", count))
			}
			if st := m.Setlk(ctx, inode, uint64(i), false, syscall.F_UNLCK, 0, 0xFFFF, uint32(i)); st != 0 {
				panic(fmt.Errorf("plock unlock: %s", st))
			}
		}(i)
	}
	g.Wait()
	if err != 0 {
		t.Fatalf("lock fail: %s", err)
	}

	if r, ok := m.(*redisMeta); ok {
		ms, err := r.rdb.SMembers(context.Background(), r.lockedKey(r.sid)).Result()
		if err != nil {
			t.Fatalf("Smember %s: %s", r.lockedKey(r.sid), err)
		}
		if len(ms) != 0 {
			t.Fatalf("locked inode leaked: %d", len(ms))
		}
	}
}

func testResolve(t *testing.T, m Meta) {
	var inode, parent Ino
	var attr, pattr Attr
	if st := m.Mkdir(NewContext(1, 65534, []uint32{65534}), 1, "d", 0770, 0, 0, &parent, &pattr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	if pattr.Gid != 65534 {
		pattr.Gid = 65534
		if st := m.SetAttr(NewContext(1, 65534, []uint32{65534}), parent, SetAttrGID, 0, &pattr); st != 0 {
			t.Fatalf("setattr gid: %s", st)
		}
	}

	if pattr.Uid != 65534 || pattr.Gid != 65534 {
		t.Fatalf("attr %+v", pattr)
	}
	if st := m.Create(NewContext(1, 65534, []uint32{65534}), parent, "f", 0644, 0, 0, &inode, &attr); st != 0 {
		t.Fatalf("create /d/f: %s", st)
	}

	defer func() {
		if st := m.Remove(NewContext(0, 65534, []uint32{65534}), parent, "f", false, RmrDefaultThreads, nil); st != 0 {
			t.Fatalf("remove /d/f by owner: %s", st)
		}
		if st := m.Rmdir(NewContext(0, 65534, []uint32{65534}), 1, "d"); st != 0 {
			t.Fatalf("rmdir /d by owner: %s", st)
		}
	}()

	if st := m.Resolve(NewContext(0, 65534, []uint32{65534}), 1, "/d/f", &inode, &attr); st != 0 {
		if st == syscall.ENOTSUP {
			return
		}
		t.Fatalf("resolve /d/f by owner: %s", st)
	}
	if st := m.Resolve(NewContext(0, 65533, []uint32{65534}), 1, "/d/f", &inode, &attr); st != 0 {
		t.Fatalf("resolve /d/f by group: %s", st)
	}
	if st := m.Resolve(NewContext(0, 65533, []uint32{65533, 65534}), 1, "/d/f", &inode, &attr); st != 0 {
		t.Fatalf("resolve /d/f by multi-group: %s", st)
	}
	if st := m.Resolve(NewContext(0, 65533, []uint32{65533}), 1, "/d/f", &inode, &attr); st != syscall.EACCES {
		t.Fatalf("resolve /d/f by non-group: %s", st)
	}
}

func testRemove(t *testing.T, m Meta) {
	ctx := Background()
	var inode, parent Ino
	var attr = &Attr{}
	if st := m.Create(ctx, 1, "f", 0644, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Remove(ctx, 1, "f", false, RmrDefaultThreads, nil); st != 0 {
		t.Fatalf("rmr f: %s", st)
	}
	if st := m.Mkdir(ctx, 1, "d", 0755, 0, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	if st := m.Mkdir(ctx, parent, "d2", 0755, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create d/d2: %s", st)
	}
	if st := m.Create(ctx, parent, "f", 0644, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create d/f: %s", st)
	}
	if ps := m.GetPaths(ctx, parent); len(ps) == 0 || ps[0] != "/d" {
		t.Fatalf("get path /d: %v", ps)
	}
	if ps := m.GetPaths(ctx, inode); len(ps) == 0 || ps[0] != "/d/f" {
		t.Fatalf("get path /d/f: %v", ps)
	}
	for i := 0; i < 4096; i++ {
		if st := m.Create(ctx, 1, "f"+strconv.Itoa(i), 0644, 0, 0, &inode, attr); st != 0 {
			t.Fatalf("create f%s: %s", strconv.Itoa(i), st)
		}
	}
	var entries []*Entry
	if st := m.Readdir(ctx, 1, 1, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	} else if len(entries) != 4099 {
		t.Fatalf("entries: %d", len(entries))
	}
	if st := m.Remove(ctx, 1, "d", false, RmrDefaultThreads, nil); st != 0 {
		t.Fatalf("rmr d: %s", st)
	}
}

func testCaseIncensi(t *testing.T, m Meta) {
	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	_ = m.Create(ctx, 1, "foo", 0755, 0, 0, &inode, attr)
	if st := m.Create(ctx, 1, "Foo", 0755, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create Foo should be ok")
	}
	if st := m.Create(ctx, 1, "Foo", 0755, 0, syscall.O_EXCL, &inode, attr); st != syscall.EEXIST {
		t.Fatalf("create should fail with EEXIST")
	}
	if st := m.Lookup(ctx, 1, "Foo", &inode, attr, true); st != 0 {
		t.Fatalf("lookup Foo should be OK")
	}
	if st := m.Rename(ctx, 1, "Foo", 1, "bar", 0, &inode, attr); st != 0 {
		t.Fatalf("rename Foo to bar should be OK, but got %s", st)
	}
	if st := m.Create(ctx, 1, "Foo", 0755, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create Foo should be OK")
	}
	if st := m.Resolve(ctx, 1, "/Foo", &inode, attr); st != syscall.ENOTSUP {
		t.Fatalf("resolve with case insensitive should be ENOTSUP")
	}
	if st := m.Lookup(ctx, 1, "Bar", &inode, attr, true); st != 0 {
		t.Fatalf("lookup Bar should be OK")
	}
	if st := m.Link(ctx, inode, 1, "foo", attr); st != syscall.EEXIST {
		t.Fatalf("link should fail with EEXIST")
	}
	if st := m.Unlink(ctx, 1, "Bar"); st != 0 {
		t.Fatalf("unlink Bar should be OK")
	}
	if st := m.Unlink(ctx, 1, "foo"); st != 0 {
		t.Fatalf("unlink foo should be OK")
	}
	if st := m.Mkdir(ctx, 1, "Foo", 0755, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("mkdir Foo should be OK, but got %s", st)
	}
	if st := m.Rmdir(ctx, 1, "foo"); st != 0 {
		t.Fatalf("rmdir foo should be OK")
	}
}

func testCaseIncensiRename(t *testing.T, m Meta) {
	ctx := Background()
	var inode Ino
	var attr = &Attr{}

	_ = m.Create(ctx, 1, "aaa", 0755, 0, 0, &inode, attr)
	if st := m.Create(ctx, 1, "AAA", 0755, 0, syscall.O_EXCL, &inode, attr); st == 0 {
		t.Fatalf("create AAA should NOT be ok")
	}

	_ = m.Create(ctx, 1, "bbb", 0755, 0, 0, &inode, attr)

	/* NOW we have:
	/aaa
	/bbb
	*/

	if st := m.Rename(ctx, 1, "aaa", 1, "AAA", 0, &inode, attr); st != 0 {
		t.Fatalf("rename aaa to AAA should be OK, bug got : %s", st)
	}

	if st := m.Rename(ctx, 1, "aaa", 1, "AAA", 0, &inode, attr); st != 0 {
		t.Fatalf("rename aaa to AAA again should be OK, bug got : %s", st)
	}

	if st := m.Rename(ctx, 1, "aaa", 1, "BBB", RenameNoReplace, &inode, attr); st == 0 {
		t.Fatal("rename aaa to BBB (RenameNoReplace) should NOT be OK")
	}

	if st := m.Rename(ctx, 1, "aaa", 1, "BBB", 0, &inode, attr); st != 0 {
		t.Fatalf("rename aaa to BBB should be OK, but got: %s", st)
	}

	/* NOW we have:
	/BBB
	*/

	if st := m.Create(ctx, 1, "aaa", 0755, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create aaa should be ok, but got %s", st)
	}

	/*NOW we have:
	/BBB
	/aaa
	*/

	if st := m.Rename(ctx, 1, "aaa", 1, "Aaa", 0, &inode, attr); st != 0 {
		t.Fatalf("rename aaa to Aaa should be OK, but got %s", st)
	}

	var dirInode Ino

	if st := m.Mkdir(ctx, 1, "case_insensi_dir", 0755, 0, 0, &dirInode, attr); st != 0 {
		t.Fatalf("mkdir case_insensi_dir should be OK, but got %s", st)
	}

	if st := m.Create(ctx, dirInode, "AAA", 0755, 0, 0, &inode, attr); st != 0 {
		t.Fatalf("create case_insensi_dir/AAA should be ok, but got %s", st)
	}

	/*NOW we have:
	/BBB
	/Aaa
	/case_insensi_dir/AAA
	*/

	if st := m.Rename(ctx, 1, "aaa", dirInode, "aaa", RenameNoReplace, &inode, attr); st == 0 {
		t.Fatalf("rename aaa to case_insensi_dir/aaa (RenameNoReplace) should NOT be OK")
	}

	if st := m.Rename(ctx, 1, "aaa", dirInode, "aaa", 0, &inode, attr); st != 0 {
		t.Fatalf("rename Aaa to case_insensi_dir/aaa should be OK, but got %s", st)
	}
}

func testCaseIncensiHardlinkRename(t *testing.T, m Meta) {
	ctx := Background()
	var inode Ino
	var attr = &Attr{}

	_ = m.Create(ctx, 1, "ccc", 0755, 0, 0, &inode, attr)
	if st := m.Link(ctx, inode, 1, "CCC", attr); st == 0 {
		t.Fatalf("create hardlink CCC should NOT be ok")
	}

	if st := m.Link(ctx, inode, 1, "ddd", attr); st != 0 {
		t.Fatalf("create hardlink ddd should be ok, but got %s", st)
	}

	/* NOW we have:
	/ccc
	/ddd
	*/

	if st := m.Rename(ctx, 1, "ccc", 1, "CCC", 0, &inode, attr); st != 0 {
		t.Fatalf("rename ccc to CCC should be OK, bug got : %s", st)
	}

	if st := m.Rename(ctx, 1, "ccc", 1, "DDD", RenameNoReplace, &inode, attr); st != syscall.EEXIST {
		t.Fatal("rename ccc to DDD (RenameNoReplace) should fail with EEXIST")
	}

	if st := m.Rename(ctx, 1, "ccc", 1, "DDD", 0, &inode, attr); st != 0 {
		t.Fatalf("rename ccc to DDD shouldshould fail silently")
	}

	if st := m.Lookup(ctx, 1, "ccc", &inode, attr, false); st != 0 {
		t.Fatalf("Lookup ccc should be OK, but got %s", st)
	}

	if st := m.Lookup(ctx, 1, "ddd", &inode, attr, false); st != 0 {
		t.Fatalf("Lookup ddd should be OK, but got %s", st)
	}

	/* NOW we have:
	/ccc
	/ddd
	*/

	var dirInode Ino

	if st := m.Mkdir(ctx, 1, "case_insensi_hark_dir", 0755, 0, 0, &dirInode, attr); st != 0 {
		t.Fatalf("mkdir case_insensi_hark_dir should be OK, but got %s", st)
	}

	if st := m.Link(ctx, inode, dirInode, "DDD", attr); st != 0 {
		t.Fatalf("create case_insensi_dir/DDD should be ok, but got %s", st)
	}

	/*NOW we have:
	/ccc
	/ddd
	/case_insensi_dir/DDD
	*/

	if st := m.Rename(ctx, 1, "ccc", dirInode, "ddd", RenameNoReplace, &inode, attr); st != syscall.EEXIST {
		t.Fatalf("rename ccc to case_insensi_dir/ddd (RenameNoReplace) should fail with EEXIST")
	}

	if st := m.Rename(ctx, 1, "ccc", dirInode, "ddd", 0, &inode, attr); st != 0 {
		t.Fatalf("rename ccc to case_insensi_dir/ddd should fail silently")
	}

	if st := m.Lookup(ctx, 1, "ccc", &inode, attr, false); st != 0 {
		t.Fatalf("resolve ccc should be OK, but got %s", st)
	}

	if st := m.Rename(ctx, 1, "ddd", dirInode, "ddd", 0, &inode, attr); st != 0 {
		t.Fatalf("rename ddd to case_insensi_dir/ddd should fail silently")
	}

	if st := m.Lookup(ctx, 1, "ddd", &inode, attr, false); st != 0 {
		t.Fatalf("lookup ddd should be OK, but got %s", st)
	}
}

type compactor interface {
	compactChunk(inode Ino, indx uint32, once, force bool)
}

func testCompaction(t *testing.T, m Meta, trash bool) {
	if trash {
		format := testFormat()
		format.TrashDays = 1
		_ = m.Init(format, false)
		defer func() {
			if err := m.Init(testFormat(), false); err != nil {
				t.Fatalf("init: %v", err)
			}
		}()
	} else {
		_ = m.Init(testFormat(), false)
	}

	if err := m.NewSession(false); err != nil {
		t.Fatalf("new session: %v", err)
	}
	defer m.CloseSession()
	var l sync.Mutex
	deleted := make(map[uint64]int)
	m.OnMsg(DeleteSlice, func(args ...interface{}) error {
		l.Lock()
		sliceId := args[0].(uint64)
		deleted[sliceId] = 1
		l.Unlock()
		return nil
	})
	m.OnMsg(CompactChunk, func(args ...interface{}) error {
		return nil
	})
	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	_ = m.Unlink(ctx, 1, "f")
	if st := m.Create(ctx, 1, "f", 0650, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create file %s", st)
	}
	defer func() {
		_ = m.Unlink(ctx, 1, "f")
	}()

	// random write
	var sliceId uint64
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 1, uint32(0), Slice{Id: sliceId, Size: 64 << 20, Len: 64 << 20}, time.Now())
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 1, uint32(30<<20), Slice{Id: sliceId, Size: 8, Len: 8}, time.Now())
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 1, uint32(40<<20), Slice{Id: sliceId, Size: 8, Len: 8}, time.Now())
	var cs1 []Slice
	_ = m.Read(ctx, inode, 1, &cs1)
	if len(cs1) != 5 {
		t.Fatalf("expect 5 slices, but got %+v", cs1)
	}
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 1, false, true)
	}
	var cs []Slice
	_ = m.Read(ctx, inode, 1, &cs)
	if len(cs) != 1 {
		t.Fatalf("expect 1 slice, but got %+v", cs)
	}

	// append
	var size uint32 = 100000
	for i := 0; i < 200; i++ {
		var sliceId uint64
		m.NewSlice(ctx, &sliceId)
		if st := m.Write(ctx, inode, 0, uint32(i)*size, Slice{Id: sliceId, Size: size, Len: size}, time.Now()); st != 0 {
			t.Fatalf("write %d: %s", i, st)
		}
		time.Sleep(time.Millisecond)
	}
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 0, false, true)
	}
	var slices []Slice
	if st := m.Read(ctx, inode, 0, &slices); st != 0 {
		t.Fatalf("read 0: %s", st)
	}
	if len(slices) >= 10 {
		t.Fatalf("inode %d should be compacted, but have %d slices", inode, len(slices))
	}
	var total uint32
	for _, s := range slices {
		total += s.Len
	}
	if total != size*200 {
		t.Fatalf("size of slice should be %d, but got %d", size*200, total)
	}

	// TODO: check result if that's predictable
	p, bar := utils.MockProgress()
	if st := m.CompactAll(ctx, 8, bar); st != 0 {
		t.Fatalf("compactall: %s", st)
	}
	p.Done()
	sliceMap := make(map[Ino][]Slice)
	if st := m.ListSlices(ctx, sliceMap, false, false, nil); st != 0 {
		t.Fatalf("list all slices: %s", st)
	}

	if trash {
		l.Lock()
		deletes := len(deleted)
		l.Unlock()
		if deletes > 10 {
			t.Fatalf("deleted slices %d is greater than 10", deletes)
		}
		if len(sliceMap[1]) < 200 {
			t.Fatalf("list delayed slices %d is less than 200", len(sliceMap[1]))
		}
		m.(engine).doCleanupDelayedSlices(ctx, time.Now().Unix()+1)
	}
	m.getBase().stopDeleteSliceTasks()
	l.Lock()
	deletes := len(deleted)
	l.Unlock()
	if deletes < 200 {
		t.Fatalf("deleted slices %d is less than 200", deletes)
	}
	m.getBase().startDeleteSliceTasks()

	// truncate to 0
	if st := m.Truncate(ctx, inode, 0, 0, attr, false); st != 0 {
		t.Fatalf("truncate file: %s", st)
	}
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 0, false, true)
	}
	if st := m.Read(ctx, inode, 0, &slices); st != 0 {
		t.Fatalf("read 0: %s", st)
	}
	if len(slices) != 1 || slices[0].Len != 1 {
		t.Fatalf("inode %d should be compacted, but have %d slices, size %d", inode, len(slices), slices[0].Len)
	}

	if st := m.Truncate(ctx, inode, 0, 64<<10, attr, false); st != 0 {
		t.Fatalf("truncate file: %s", st)
	}
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 0, uint32(1<<20), Slice{Id: sliceId, Size: 2 << 20, Len: 2 << 20}, time.Now())
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 0, false, true)
	}
	if st := m.Read(ctx, inode, 0, &slices); st != 0 {
		t.Fatalf("read 0: %s", st)
	}
	if len(slices) != 2 || slices[0].Id != 0 || slices[1].Len != 2<<20 {
		t.Fatalf("inode %d should be compacted, but have %d slices, id %d size %d",
			inode, len(slices), slices[0].Id, slices[1].Len)
	}

	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 0, uint32(512<<10), Slice{Id: sliceId, Size: 2 << 20, Len: 64 << 10}, time.Now())
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 0, uint32(0), Slice{Id: sliceId, Size: 1 << 20, Len: 64 << 10}, time.Now())
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 0, uint32(128<<10), Slice{Id: sliceId, Size: 2 << 20, Len: 128 << 10}, time.Now())
	_ = m.Write(ctx, inode, 0, uint32(0), Slice{Id: 0, Size: 1 << 20, Len: 1 << 20}, time.Now())
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 0, false, true)
	}
	if st := m.Read(ctx, inode, 0, &slices); st != 0 {
		t.Fatalf("read 0: %s", st)
	}
	if len(slices) != 1 || slices[0].Len != 3<<20 {
		t.Fatalf("inode %d should be compacted, but have %d slices, size %d", inode, len(slices), slices[0].Len)
	}

	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 2, 0, Slice{Id: sliceId, Size: 2338508, Len: 2338508}, time.Now())
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 2, 8829056, Slice{Id: sliceId, Size: 1074933, Len: 1074933}, time.Now())
	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 2, 7663608, Slice{Id: sliceId, Size: 41480, Len: 4148}, time.Now())
	_ = m.Fallocate(ctx, inode, fallocZeroRange, 2*ChunkSize+4515328, 3152428, nil)
	_ = m.Fallocate(ctx, inode, fallocZeroRange, 2*ChunkSize+4515328, 2607724, nil)
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 2, false, true)
	}
	if st := m.Read(ctx, inode, 2, &slices); st != 0 {
		t.Fatalf("read 1: %s", st)
	}
	// compact twice: 4515328+2607724-2338508 = 4784544; 8829056+1074933-2338508-4784544=2780937
	if len(slices) != 3 || slices[0].Len != 2338508 || slices[1].Len != 4784544 || slices[2].Len != 2780937 {
		t.Fatalf("inode %d should be compacted, but have %d slices, size %d,%d,%d",
			inode, len(slices), slices[0].Len, slices[1].Len, slices[2].Len)
	}

	m.NewSlice(ctx, &sliceId)
	_ = m.Write(ctx, inode, 3, 0, Slice{Id: sliceId, Size: 2338508, Len: 2338508}, time.Now())
	_ = m.CopyFileRange(ctx, inode, 3*ChunkSize, inode, 4*ChunkSize, 2338508, 0, nil, nil)
	_ = m.Fallocate(ctx, inode, fallocZeroRange, 4*ChunkSize, ChunkSize, nil)
	_ = m.CopyFileRange(ctx, inode, 3*ChunkSize, inode, 4*ChunkSize, 2338508, 0, nil, nil)
	if c, ok := m.(compactor); ok {
		c.compactChunk(inode, 4, false, true)
	}
	if st := m.Read(ctx, inode, 4, &slices); st != 0 {
		t.Fatalf("read inode %d chunk 4: %s", inode, st)
	}
	if len(slices) != 1 || slices[0].Len != 2338508 {
		t.Fatalf("inode %d should be compacted, but have %d slices, size %d", inode, len(slices), slices[0].Len)
	}
}

func testConcurrentWrite(t *testing.T, m Meta) {
	m.OnMsg(DeleteSlice, func(args ...interface{}) error {
		return nil
	})
	m.OnMsg(CompactChunk, func(args ...interface{}) error {
		return nil
	})

	if err := m.NewSession(false); err != nil {
		t.Fatalf("new session: %v", err)
	}
	defer m.CloseSession()

	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	_ = m.Unlink(ctx, 1, "f")
	if st := m.Create(ctx, 1, "f", 0650, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create file %s", st)
	}
	defer m.Unlink(ctx, 1, "f")

	var errno syscall.Errno
	var g sync.WaitGroup
	for i := 0; i <= 10; i++ {
		g.Add(1)
		go func(indx uint32) {
			defer g.Done()
			for j := 0; j < 100; j++ {
				var sliceId uint64
				m.NewSlice(ctx, &sliceId)
				var slice = Slice{Id: sliceId, Size: 100, Len: 100}
				st := m.Write(ctx, inode, indx, 0, slice, time.Now())
				if st != 0 {
					errno = st
					break
				}
			}
		}(uint32(i))
	}
	g.Wait()
	if errno != 0 {
		t.Fatal()
	}

	var g2 sync.WaitGroup
	for i := 0; i <= 10; i++ {
		g2.Add(1)
		go func() {
			defer g2.Done()
			for j := 0; j < 1000; j++ {
				var sliceId uint64
				m.NewSlice(ctx, &sliceId)
				var slice = Slice{Id: sliceId, Size: 100, Len: 100}
				st := m.Write(ctx, inode, 0, uint32(200*j), slice, time.Now())
				if st != 0 {
					errno = st
					break
				}
			}
		}()
	}
	g2.Wait()
	if errno != 0 {
		t.Fatal()
	}
}

func testTruncateAndDelete(t *testing.T, m Meta) {
	m.OnMsg(DeleteSlice, func(args ...interface{}) error {
		return nil
	})
	// remove quota
	format, _ := m.Load(false)
	format.Capacity = 0
	_ = m.Init(format, false)

	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	m.Unlink(ctx, 1, "f")
	if st := m.Truncate(ctx, 1, 0, 4<<10, attr, false); st != syscall.EPERM {
		t.Fatalf("truncate dir %s", st)
	}
	if st := m.Create(ctx, 1, "f", 0650, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create file %s", st)
	}
	defer m.Unlink(ctx, 1, "f")
	var sliceId uint64
	if st := m.NewSlice(ctx, &sliceId); st != 0 {
		t.Fatalf("new chunk: %s", st)
	}
	if st := m.Write(ctx, inode, 0, 100, Slice{sliceId, 100, 0, 100}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}
	if st := m.Truncate(ctx, inode, 0, 200<<20, attr, false); st != 0 {
		t.Fatalf("truncate file %s", st)
	}
	if st := m.Truncate(ctx, inode, 0, (10<<40)+10, attr, false); st != 0 {
		t.Fatalf("truncate file %s", st)
	}
	if st := m.Truncate(ctx, inode, 0, (300<<20)+10, attr, false); st != 0 {
		t.Fatalf("truncate file %s", st)
	}
	var total int64
	slices := make(map[Ino][]Slice)
	m.ListSlices(ctx, slices, false, false, func() { total++ })
	var totalSlices int
	for _, ss := range slices {
		totalSlices += len(ss)
	}
	if totalSlices != 1 {
		t.Fatalf("number of slices: %d != 1, %+v", totalSlices, slices)
	}
	_ = m.Close(ctx, inode)
	if st := m.Unlink(ctx, 1, "f"); st != 0 {
		t.Fatalf("unlink file %s", st)
	}

	time.Sleep(time.Millisecond * 100)
	slices = make(map[Ino][]Slice)
	m.ListSlices(ctx, slices, false, false, nil)
	totalSlices = 0
	for _, ss := range slices {
		totalSlices += len(ss)
	}
	// the last chunk could be found and deleted
	if totalSlices > 1 {
		t.Fatalf("number of slices: %d > 1, %+v", totalSlices, slices)
	}
}

func testCopyFileRange(t *testing.T, m Meta) {
	m.OnMsg(DeleteSlice, func(args ...interface{}) error {
		return nil
	})

	ctx := Background()
	var iin, iout Ino
	var attr = &Attr{}
	_ = m.Unlink(ctx, 1, "fin")
	_ = m.Unlink(ctx, 1, "fout")
	if st := m.Create(ctx, 1, "fin", 0650, 022, 0, &iin, attr); st != 0 {
		t.Fatalf("create file %s", st)
	}
	defer m.Unlink(ctx, 1, "fin")
	if st := m.Create(ctx, 1, "fout", 0650, 022, 0, &iout, attr); st != 0 {
		t.Fatalf("create file %s", st)
	}
	defer m.Unlink(ctx, 1, "fout")

	var sliceIds [4]uint64
	for i := 0; i < len(sliceIds); i++ {
		if st := m.NewSlice(Background(), &sliceIds[i]); st != 0 {
			t.Fatalf("new chunk: %s", st)
		}
	}

	if st := m.Write(ctx, iin, 0, 100, Slice{sliceIds[0], 200, 0, 100}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}
	if st := m.Write(ctx, iin, 1, 100<<10, Slice{sliceIds[1], 40 << 20, 0, 40 << 20}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}
	if st := m.Write(ctx, iin, 3, 0, Slice{sliceIds[2], 63 << 20, 10 << 20, 30 << 20}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}
	if st := m.Write(ctx, iout, 2, 10<<20, Slice{sliceIds[3], 50 << 20, 10 << 20, 30 << 20}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}
	var copied uint64
	if st := m.CopyFileRange(ctx, iin, 150, iout, 30<<20, 200<<20, 0, &copied, nil); st != 0 {
		t.Fatalf("copy file range: %s", st)
	}
	var expected uint64 = 200 << 20
	if copied != expected {
		t.Fatalf("expect copy %d bytes, but got %d", expected, copied)
	}
	var expectedSlices = [][]Slice{
		{{0, 30 << 20, 0, 30 << 20}, {sliceIds[0], 200, 50, 50}, {0, 0, 200, ChunkSize - 30<<20 - 50}},
		{{0, 0, 150 + (ChunkSize - 30<<20), 30<<20 - 150}, {0, 0, 0, 100 << 10}, {sliceIds[1], 40 << 20, 0, (34 << 20) + 150 - (100 << 10)}},
		{{sliceIds[1], 40 << 20, (34 << 20) + 150 - (100 << 10), 6<<20 - 150 + 100<<10}, {0, 0, 40<<20 + 100<<10, ChunkSize - 40<<20 - 100<<10}, {0, 0, 0, 150 + (ChunkSize - 30<<20)}},
		{{0, 0, 150 + (ChunkSize - 30<<20), 30<<20 - 150}, {sliceIds[2], 63 << 20, 10 << 20, (8 << 20) + 150}},
	}
	for i := uint32(0); i < 4; i++ {
		var slices []Slice
		if st := m.Read(ctx, iout, i, &slices); st != 0 {
			t.Fatalf("read chunk %d: %s", i, st)
		}
		if len(slices) != len(expectedSlices[i]) {
			t.Fatalf("expect chunk %d: %+v, but got %+v", i, expectedSlices[i], slices)
		}
		for j, s := range slices {
			if s != expectedSlices[i][j] {
				t.Fatalf("expect slice %d,%d: %+v, but got %+v", i, j, expectedSlices[i][j], s)
			}
		}
	}
}

func testCloseSession(t *testing.T, m Meta) {
	// reset session
	m.getBase().sid = 0
	if err := m.NewSession(true); err != nil {
		t.Fatalf("new session: %s", err)
	}

	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Flock(ctx, inode, 1, syscall.F_WRLCK, false); st != 0 {
		t.Fatalf("flock wlock: %s", st)
	}
	if st := m.Setlk(ctx, inode, 1, false, syscall.F_WRLCK, 0x10000, 0x20000, 1); st != 0 {
		t.Fatalf("plock wlock: %s", st)
	}
	if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != 0 {
		t.Fatalf("open f: %s", st)
	}
	if st := m.Unlink(ctx, 1, "f"); st != 0 {
		t.Fatalf("unlink f: %s", st)
	}
	time.Sleep(10 * time.Millisecond)
	sid := m.getBase().sid
	s, err := m.GetSession(sid, true)
	if err != nil {
		t.Fatalf("get session: %s", err)
	} else {
		if len(s.Flocks) != 1 || len(s.Plocks) != 1 || len(s.Sustained) != 1 {
			t.Fatalf("incorrect session: flock %d plock %d sustained %d", len(s.Flocks), len(s.Plocks), len(s.Sustained))
		}
	}
	if err = m.CloseSession(); err != nil {
		t.Fatalf("close session: %s", err)
	}
	if _, err = m.GetSession(sid, true); err == nil {
		t.Fatalf("get a deleted session: %s", err)
	}
	switch m := m.(type) {
	case *redisMeta:
		s, err = m.getSession(strconv.FormatUint(sid, 10), true)
	case *dbMeta:
		s, err = m.getSession(&session2{Sid: sid, Info: []byte("{}")}, true)
	case *kvMeta:
		s, err = m.getSession(sid, true)
	}
	if err != nil {
		t.Fatalf("get session: %s", err)
	}
	if s.SessionInfo.Version != "" || s.SessionInfo.HostName != "" || s.SessionInfo.IPAddrs != nil ||
		s.SessionInfo.MountPoint != "" || s.SessionInfo.ProcessID != 0 {
		t.Fatalf("incorrect session info %+v", s.SessionInfo)
	}
	if len(s.Flocks) != 0 || len(s.Plocks) != 0 || len(s.Sustained) != 0 {
		t.Fatalf("incorrect session: flock %d plock %d sustained %d", len(s.Flocks), len(s.Plocks), len(s.Sustained))
	}
}

func testTrash(t *testing.T, m Meta) {
	format := testFormat()
	format.TrashDays = 1
	if err := m.Init(format, false); err != nil {
		t.Fatalf("init: %v", err)
	}
	defer func() {
		if err := m.Init(testFormat(), false); err != nil {
			t.Fatalf("init: %v", err)
		}
	}()
	ctx := Background()
	var inode, parent Ino
	var attr = &Attr{}
	if st := m.Create(ctx, 1, "f1", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f1: %s", st)
	}
	if st := m.Create(ctx, 1, "f2", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f2: %s", st)
	}
	if st := m.Mkdir(ctx, 1, "d", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	if st := m.Create(ctx, parent, "f", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create d/f: %s", st)
	}
	if st := m.Rename(ctx, 1, "f1", 1, "d", 0, &inode, attr); st != syscall.EISDIR {
		t.Fatalf("rename f1 -> d: %s", st)
	}
	if st := m.Unlink(ctx, parent, "f"); st != 0 {
		t.Fatalf("unlink d/f: %s", st)
	}
	if st := m.GetAttr(ctx, inode, attr); st != 0 || attr.Parent != TrashInode+1 {
		t.Fatalf("getattr f(%d): %s, attr %+v", inode, st, attr)
	}
	if st := m.Truncate(ctx, inode, 0, 1<<30, attr, false); st != syscall.EPERM {
		t.Fatalf("should not truncate a file in trash")
	}
	if st := m.Open(ctx, inode, uint32(syscall.O_RDWR), attr); st != syscall.EPERM {
		t.Fatalf("should not fallocate a file in trash")
	}
	if st := m.SetAttr(ctx, inode, SetAttrMode, 1, &Attr{Mode: 0}); st != syscall.EPERM {
		t.Fatalf("should not change mode of a file in trash")
	}
	var parent2 Ino
	if st := m.Mkdir(ctx, 1, "d2", 0755, 022, 0, &parent2, attr); st != 0 {
		t.Fatalf("mkdir d2: %s", st)
	}
	if st := m.Rmdir(ctx, 1, "d2"); st != 0 {
		t.Fatalf("rmdir d2: %s", st)
	}
	if st := m.GetAttr(ctx, parent2, attr); st != 0 || attr.Parent != TrashInode+1 {
		t.Fatalf("getattr d2(%d): %s, attr %+v", parent2, st, attr)
	}
	var tino Ino
	if st := m.Mkdir(ctx, parent2, "d3", 0777, 022, 0, &tino, attr); st != syscall.ENOENT {
		t.Fatalf("mkdir inside trash should fail")
	}
	if st := m.Create(ctx, parent2, "d3", 0755, 022, 0, &tino, attr); st != syscall.ENOENT {
		t.Fatalf("create inside trash should fail")
	}
	if st := m.Link(ctx, inode, parent2, "ttlink", attr); st != syscall.ENOENT {
		t.Fatalf("link inside trash should fail")
	}
	if st := m.Rename(ctx, 1, "d", parent2, "ttlink", 0, &tino, attr); st != syscall.ENOENT {
		t.Fatalf("link inside trash should fail")
	}
	if st := m.Rmdir(ctx, 1, "d"); st != 0 {
		t.Fatalf("rmdir d: %s", st)
	}
	if st := m.Rename(ctx, 1, "f1", 1, "d", 0, &inode, attr); st != 0 {
		t.Fatalf("rename f1 -> d: %s", st)
	}
	if st := m.Rename(ctx, 1, "f2", TrashInode, "td", 0, &inode, attr); st != syscall.EPERM {
		t.Fatalf("rename f2 -> td: %s", st)
	}
	if st := m.Rename(ctx, 1, "f2", TrashInode+1, "td", 0, &inode, attr); st != syscall.EPERM {
		t.Fatalf("rename f2 -> td: %s", st)
	}
	if st := m.Rename(ctx, 1, "f2", 1, "d", 0, &inode, attr); st != 0 {
		t.Fatalf("rename f2 -> d: %s", st)
	}
	if st := m.Link(ctx, inode, 1, "l", attr); st != 0 || attr.Nlink != 2 {
		t.Fatalf("link d -> l1: %s", st)
	}
	if st := m.Unlink(ctx, 1, "l"); st != 0 {
		t.Fatalf("unlink l: %s", st)
	}
	// hardlink goes to the trash
	if st := m.GetAttr(ctx, inode, attr); st != 0 || attr.Nlink != 2 {
		t.Fatalf("getattr d(%d): %s, attr %+v", inode, st, attr)
	}
	if st := m.Link(ctx, inode, 1, "l", attr); st != 0 || attr.Nlink != 3 {
		t.Fatalf("link d -> l1: %s", st)
	}
	if st := m.Unlink(ctx, 1, "l"); st != 0 {
		t.Fatalf("unlink l: %s", st)
	}
	// hardlink is deleted directly
	if st := m.GetAttr(ctx, inode, attr); st != 0 || attr.Nlink != 2 {
		t.Fatalf("getattr d(%d): %s, attr %+v", inode, st, attr)
	}
	if st := m.Unlink(ctx, 1, "d"); st != 0 {
		t.Fatalf("unlink d: %s", st)
	}
	lname := strings.Repeat("f", MaxName)
	if st := m.Create(ctx, 1, lname, 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create %s: %s", lname, st)
	}
	if st := m.Unlink(ctx, 1, lname); st != 0 {
		t.Fatalf("unlink %s: %s", lname, st)
	}
	tname := fmt.Sprintf("1-%d-%s", inode, lname)[:MaxName]
	if st := m.Lookup(ctx, TrashInode+1, tname, &inode, attr, true); st != 0 || attr.Parent != TrashInode+1 {
		t.Fatalf("lookup subTrash/%s: %s, attr %+v", tname, st, attr)
	}
	var entries []*Entry
	if st := m.Readdir(ctx, 1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 2 {
		t.Fatalf("entries: %d", len(entries))
	}
	entries = entries[:0]
	if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 9 {
		t.Fatalf("entries: %d", len(entries))
	}
	// test Remove with skipTrash true/false
	if st := m.Mkdir(ctx, 1, "d10", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d10: %s", st)
	}
	if st := m.Create(ctx, parent, "f10", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create d10/f10: %s", st)
	}
	if st := m.Mkdir(ctx, parent, "d10", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d10/d10: %s", st)
	}
	if st := m.Remove(ctx, 1, "d10", false, RmrDefaultThreads, nil); st != 0 {
		t.Fatalf("rmr d10: %s", st)
	}
	entries = entries[:0]
	if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 12 {
		t.Fatalf("entries: %d", len(entries))
	}
	if st := m.Mkdir(ctx, 1, "d10", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d10: %s", st)
	}
	if st := m.Create(ctx, parent, "f10", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create d10/f10: %s", st)
	}
	if st := m.Mkdir(ctx, parent, "d10", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d10/d10: %s", st)
	}
	if st := m.Remove(ctx, 1, "d10", true, RmrDefaultThreads, nil); st != 0 {
		t.Fatalf("rmr d10: %s", st)
	}
	entries = entries[:0]
	if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 12 {
		t.Fatalf("entries: %d", len(entries))
	}

	// Selectively skip trash based on FS_SECRM_FL
	if st := m.Mkdir(ctx, 1, "secrmd", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir secrmd: %s", st)
	}
	if st := m.SetAttr(ctx, parent, SetAttrFlag, 0, &Attr{Flags: FlagSkipTrash}); st != 0 {
		t.Fatalf("setattr secrmd secrm: %s", st)
	}
	if st := m.Create(ctx, parent, "f1", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create secrmd/f1: %s", st)
	}
	if st := m.GetAttr(ctx, inode, attr); st != 0 || (attr.Flags&FlagSkipTrash) == 0 {
		t.Fatalf("getattr secrmd/f1(%d): %s, attr %+v", inode, st, attr)
	}
	if st := m.Unlink(ctx, parent, "f1"); st != 0 {
		t.Fatalf("unlink secrmd/f1: %s", st)
	}
	if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 12 {
		t.Fatalf("entries: %d", len(entries))
	}
	entries = entries[:0]
	if st := m.Mkdir(ctx, parent, "d1", 0755, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("mkdir secrmd/d1: %s", st)
	}
	if st := m.Rmdir(ctx, parent, "d1"); st != 0 {
		t.Fatalf("rmdir secrmd/d1: %s", st)
	}
	if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 12 {
		t.Fatalf("entries: %d", len(entries))
	}
	entries = entries[:0]
	if st := m.Create(ctx, parent, "f2", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create secrmd/f2: %s", st)
	}
	if st := m.Create(ctx, parent, "f3", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create secrmd/f3: %s", st)
	}
	if st := m.Rename(ctx, parent, "f2", parent, "f3", 0, &inode, attr); st != 0 {
		t.Fatalf("rename secrmd/f2 -> f3: %s", st)
	}
	if st := m.Readdir(ctx, TrashInode+1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	}
	if len(entries) != 12 {
		t.Fatalf("entries: %d", len(entries))
	}
	entries = entries[:0]
	if st := m.Unlink(ctx, parent, "f3"); st != 0 {
		t.Fatalf("unlink secrmd/f3: %s", st)
	}
	if st := m.Rmdir(ctx, 1, "secrmd"); st != 0 {
		t.Fatalf("rmdir secrmd: %s", st)
	}

	ctx2 := NewContext(1000, 1, []uint32{1})
	if st := m.Unlink(ctx2, TrashInode+1, "d"); st != syscall.EPERM {
		t.Fatalf("unlink d: %s", st)
	}
	if st := m.Rmdir(ctx2, TrashInode+1, "d"); st != syscall.EPERM {
		t.Fatalf("rmdir d: %s", st)
	}
	if st := m.Rename(ctx2, TrashInode+1, "d", 1, "f", 0, &inode, attr); st != syscall.EPERM {
		t.Fatalf("rename d -> f: %s", st)
	}
	m.getBase().doCleanupTrash(Background(), format.TrashDays, true, nil)
	if st := m.GetAttr(ctx2, TrashInode+1, attr); st != syscall.ENOENT {
		t.Fatalf("getattr: %s", st)
	}
}

func testParents(t *testing.T, m Meta) {
	ctx := Background()
	var inode, parent Ino
	var attr = &Attr{}
	if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if attr.Parent != 1 {
		t.Fatalf("expect parent 1, but got %d", attr.Parent)
	}
	checkParents := func(inode Ino, expect map[Ino]int) {
		if ps := m.GetParents(ctx, inode); ps == nil {
			t.Fatalf("get parents of inode %d returns nil", inode)
		} else if !reflect.DeepEqual(ps, expect) {
			t.Fatalf("expect parents %v, but got %v", expect, ps)
		}
	}
	checkParents(inode, map[Ino]int{1: 1})

	if st := m.Link(ctx, inode, 1, "l1", attr); st != 0 {
		t.Fatalf("link l1 -> f: %s", st)
	}
	if attr.Parent != 0 {
		t.Fatalf("expect parent 0, but got %d", attr.Parent)
	}
	checkParents(inode, map[Ino]int{1: 2})

	if st := m.Mkdir(ctx, 1, "d", 0755, 022, 0, &parent, attr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	if st := m.Link(ctx, inode, parent, "l2", attr); st != 0 {
		t.Fatalf("link l2 -> f: %s", st)
	}
	if st := m.Link(ctx, inode, parent, "l3", attr); st != 0 {
		t.Fatalf("link l3 -> f: %s", st)
	}
	checkParents(inode, map[Ino]int{1: 2, parent: 2})

	if st := m.Unlink(ctx, 1, "f"); st != 0 {
		t.Fatalf("unlink f: %s", st)
	}
	if st := m.Create(ctx, 1, "f2", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f2: %s", st)
	}
	if st := m.Rename(ctx, 1, "f2", 1, "l1", 0, &inode, attr); st != 0 {
		t.Fatalf("rename f2 -> l1: %s", st)
	}
	if st := m.Lookup(ctx, parent, "l2", &inode, attr, true); st != 0 {
		t.Fatalf("lookup d/l2: %s", st)
	}
	if attr.Parent != 0 {
		t.Fatalf("expect parent 0, but got %d", attr.Parent)
	}
	if st := m.Unlink(ctx, parent, "l2"); st != 0 {
		t.Fatalf("unlink d/l2: %s", st)
	}
	checkParents(inode, map[Ino]int{parent: 1})

	// clean up
	if st := m.Unlink(ctx, 1, "l1"); st != 0 {
		t.Fatalf("unlink l1: %s", st)
	}
	if st := m.Unlink(ctx, parent, "l3"); st != 0 {
		t.Fatalf("unlink d/l3: %s", st)
	}
	if st := m.Rmdir(ctx, 1, "d"); st != 0 {
		t.Fatalf("rmdir d: %s", st)
	}
}

func testOpenCache(t *testing.T, m Meta) {
	ctx := Background()
	var inode Ino
	var attr = &Attr{}
	if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	defer m.Unlink(ctx, 1, "f")
	if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != 0 {
		t.Fatalf("open f: %s", st)
	}
	defer m.Close(ctx, inode)

	var attr2 = &Attr{}
	if st := m.GetAttr(ctx, inode, attr2); st != 0 {
		t.Fatalf("getattr f: %s", st)
	}
	if *attr != *attr2 {
		t.Fatalf("attrs not the same: attr %+v; attr2 %+v", *attr, *attr2)
	}
	attr2.Uid = 1
	if st := m.SetAttr(ctx, inode, SetAttrUID, 0, attr2); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.GetAttr(ctx, inode, attr); st != 0 {
		t.Fatalf("getattr f: %s", st)
	}
	if attr.Uid != 1 {
		t.Fatalf("attr uid should be 1: %+v", *attr)
	}
}

func testReadOnly(t *testing.T, m Meta) {
	ctx := Background()
	if err := m.NewSession(true); err != nil {
		t.Fatalf("new session: %s", err)
	}
	defer m.CloseSession()

	var inode Ino
	var attr = &Attr{}
	if st := m.GetAttr(ctx, 1, attr); st != 0 {
		t.Fatalf("getattr 1: %s", st)
	}
	if st := m.Mkdir(ctx, 1, "d", 0640, 022, 0, &inode, attr); st != syscall.EROFS {
		t.Fatalf("mkdir d: %s", st)
	}
	if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, attr); st != syscall.EROFS {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Open(ctx, inode, syscall.O_RDWR, attr); st != syscall.EROFS {
		t.Fatalf("open f: %s", st)
	}

	if plocks, flocks, err := m.ListLocks(ctx, 1); err != nil || len(plocks) != 0 || len(flocks) != 0 {
		t.Fatalf("list locks: %v %v %v", plocks, flocks, err)
	}
}

func testConcurrentDir(t *testing.T, m Meta) {
	ctx := Background()
	var g sync.WaitGroup
	var err error
	format, err := m.Load(false)
	format.Capacity = 0
	format.Inodes = 0
	if err = m.Init(format, false); err != nil {
		t.Fatalf("set quota failed: %s", err)
	}
	for i := 0; i < 100; i++ {
		g.Add(1)
		go func(i int) {
			defer g.Done()
			var d1, d2 Ino
			var attr = new(Attr)
			if st := m.Mkdir(ctx, 1, "d1", 0640, 022, 0, &d1, attr); st != 0 && st != syscall.EEXIST {
				panic(fmt.Errorf("mkdir d1: %s", st))
			} else if st == syscall.EEXIST {
				st = m.Lookup(ctx, 1, "d1", &d1, attr, true)
				if st != 0 {
					panic(fmt.Errorf("lookup d1: %s", st))
				}
			}
			if st := m.Mkdir(ctx, 1, "d2", 0640, 022, 0, &d2, attr); st != 0 && st != syscall.EEXIST {
				panic(fmt.Errorf("mkdir d2: %s", st))
			} else if st == syscall.EEXIST {
				st = m.Lookup(ctx, 1, "d2", &d2, attr, true)
				if st != 0 {
					panic(fmt.Errorf("lookup d2: %s", st))
				}
			}
			name := fmt.Sprintf("file%d", i)
			var f Ino
			if st := m.Create(ctx, d1, name, 0664, 0, 0, &f, attr); st != 0 {
				panic(fmt.Errorf("create d1/%s: %s", name, st))
			}
			if st := m.Rename(ctx, d1, name, d2, name, 0, &f, attr); st != 0 {
				panic(fmt.Errorf("rename d1/%s -> d2/%s: %s", name, name, st))
			}
		}(i)
	}
	g.Wait()
	if err != nil {
		t.Fatalf("concurrent dir: %s", err)
	}
	for i := 0; i < 100; i++ {
		g.Add(1)
		go func(i int) {
			defer g.Done()
			var d2 Ino
			var attr = new(Attr)
			st := m.Lookup(ctx, 1, "d2", &d2, attr, true)
			if st != 0 {
				panic(fmt.Errorf("lookup d2: %s", st))
			}
			name := fmt.Sprintf("file%d", i)
			if st := m.Unlink(ctx, d2, name); st != 0 {
				panic(fmt.Errorf("unlink d2/%s: %s", name, st))
			}
			if st := m.Rmdir(ctx, 1, "d1"); st != 0 && st != syscall.ENOTEMPTY && st != syscall.ENOENT {
				panic(fmt.Errorf("rmdir d1: %s", st))
			}
			if st := m.Rmdir(ctx, 1, "d2"); st != 0 && st != syscall.ENOTEMPTY && st != syscall.ENOENT {
				panic(fmt.Errorf("rmdir d2: %s", st))
			}
		}(i)
	}
	g.Wait()
}

func testAttrFlags(t *testing.T, m Meta) {
	ctx := Background()
	var attr = &Attr{}
	var inode Ino
	if st := m.Create(ctx, 1, "f", 0644, 022, 0, &inode, nil); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	attr.Flags = FlagAppend
	if st := m.SetAttr(ctx, inode, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.Open(ctx, inode, syscall.O_WRONLY, attr); st != syscall.EPERM {
		t.Fatalf("open f: %s", st)
	}
	if st := m.Open(ctx, inode, syscall.O_WRONLY|syscall.O_APPEND, attr); st != 0 {
		t.Fatalf("open f: %s", st)
	}
	attr.Flags = FlagAppend | FlagImmutable
	if st := m.SetAttr(ctx, inode, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.Open(ctx, inode, syscall.O_WRONLY, attr); st != syscall.EPERM {
		t.Fatalf("open f: %s", st)
	}
	if st := m.Open(ctx, inode, syscall.O_WRONLY|syscall.O_APPEND, attr); st != syscall.EPERM {
		t.Fatalf("open f: %s", st)
	}

	var d Ino
	if st := m.Mkdir(ctx, 1, "d", 0640, 022, 0, &d, attr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	attr.Flags = FlagAppend
	if st := m.SetAttr(ctx, d, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Create(ctx, d, "f", 0644, 022, 0, &inode, nil); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Unlink(ctx, d, "f"); st != syscall.EPERM {
		t.Fatalf("unlink f: %s", st)
	}
	attr.Flags = FlagAppend | FlagImmutable
	if st := m.SetAttr(ctx, d, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Create(ctx, d, "f2", 0644, 022, 0, &inode, nil); st != syscall.EPERM {
		t.Fatalf("create f2: %s", st)
	}

	var Immutable Ino
	if st := m.Mkdir(ctx, 1, "ImmutFile", 0640, 022, 0, &Immutable, attr); st != 0 {
		t.Fatalf("mkdir d: %s", st)
	}
	attr.Flags = FlagImmutable
	if st := m.SetAttr(ctx, Immutable, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Create(ctx, Immutable, "f2", 0644, 022, 0, &inode, nil); st != syscall.EPERM {
		t.Fatalf("create f2: %s", st)
	}

	var src1, dst1, mfile Ino
	attr.Flags = 0
	if st := m.Mkdir(ctx, 1, "src1", 0640, 022, 0, &src1, attr); st != 0 {
		t.Fatalf("mkdir src1: %s", st)
	}
	if st := m.Create(ctx, src1, "mfile", 0644, 022, 0, &mfile, nil); st != 0 {
		t.Fatalf("create mfile: %s", st)
	}
	if st := m.Mkdir(ctx, 1, "dst1", 0640, 022, 0, &dst1, attr); st != 0 {
		t.Fatalf("mkdir dst1: %s", st)
	}

	attr.Flags = FlagAppend
	if st := m.SetAttr(ctx, src1, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Rename(ctx, src1, "mfile", dst1, "mfile", 0, &mfile, attr); st != syscall.EPERM {
		t.Fatalf("rename d: %s", st)
	}

	attr.Flags = FlagImmutable
	if st := m.SetAttr(ctx, src1, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Rename(ctx, src1, "mfile", dst1, "mfile", 0, &mfile, attr); st != syscall.EPERM {
		t.Fatalf("rename d: %s", st)
	}

	if st := m.SetAttr(ctx, dst1, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Rename(ctx, src1, "mfile", dst1, "mfile", 0, &mfile, attr); st != syscall.EPERM {
		t.Fatalf("rename d: %s", st)
	}

	var delFile Ino
	if st := m.Create(ctx, 1, "delfile", 0644, 022, 0, &delFile, nil); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	attr.Flags = FlagImmutable | FlagAppend
	if st := m.SetAttr(ctx, delFile, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr d: %s", st)
	}
	if st := m.Unlink(ctx, 1, "delfile"); st != syscall.EPERM {
		t.Fatalf("unlink f: %s", st)
	}

	var fallocFile Ino
	if st := m.Create(ctx, 1, "fallocfile", 0644, 022, 0, &fallocFile, nil); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	attr.Flags = FlagAppend
	if st := m.SetAttr(ctx, fallocFile, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.Fallocate(ctx, fallocFile, fallocKeepSize, 0, 1024, nil); st != 0 {
		t.Fatalf("fallocate f: %s", st)
	}
	if st := m.Fallocate(ctx, fallocFile, fallocKeepSize|fallocZeroRange, 0, 1024, nil); st != syscall.EPERM {
		t.Fatalf("fallocate f: %s", st)
	}
	attr.Flags = FlagImmutable
	if st := m.SetAttr(ctx, fallocFile, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.Fallocate(ctx, fallocFile, fallocKeepSize, 0, 1024, nil); st != syscall.EPERM {
		t.Fatalf("fallocate f: %s", st)
	}

	var copysrcFile, copydstFile Ino
	if st := m.Create(ctx, 1, "copysrcfile", 0644, 022, 0, &copysrcFile, nil); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Create(ctx, 1, "copydstfile", 0644, 022, 0, &copydstFile, nil); st != 0 {
		t.Fatalf("create f: %s", st)
	}
	if st := m.Fallocate(ctx, copysrcFile, 0, 0, 1024, nil); st != 0 {
		t.Fatalf("fallocate f: %s", st)
	}
	attr.Flags = FlagAppend
	if st := m.SetAttr(ctx, copydstFile, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.CopyFileRange(ctx, copysrcFile, 0, copydstFile, 0, 1024, 0, nil, nil); st != syscall.EPERM {
		t.Fatalf("copy_file_range f: %s", st)
	}
	attr.Flags = FlagImmutable
	if st := m.SetAttr(ctx, copydstFile, SetAttrFlag, 0, attr); st != 0 {
		t.Fatalf("setattr f: %s", st)
	}
	if st := m.CopyFileRange(ctx, copysrcFile, 0, copydstFile, 0, 1024, 0, nil, nil); st != syscall.EPERM {
		t.Fatalf("copy_file_range f: %s", st)
	}
}

func setAttr(t *testing.T, m Meta, inode Ino, attr *Attr) {
	var err error
	switch m := m.(type) {
	case *redisMeta:
		err = m.txn(Background(), func(tx *redis.Tx) error {
			return tx.Set(Background(), m.inodeKey(inode), m.marshal(attr), 0).Err()
		}, m.inodeKey(inode))
	case *dbMeta:
		err = m.txn(func(s *xorm.Session) error {
			_, err = s.ID(inode).AllCols().Update(&node{
				Inode:     inode,
				Type:      attr.Typ,
				Flags:     attr.Flags,
				Mode:      attr.Mode,
				Uid:       attr.Uid,
				Gid:       attr.Gid,
				Atime:     attr.Atime*1e6 + int64(attr.Atimensec)/1e3,
				Mtime:     attr.Mtime*1e6 + int64(attr.Mtimensec)/1e3,
				Ctime:     attr.Ctime*1e6 + int64(attr.Ctimensec)/1e3,
				Atimensec: int16(attr.Atimensec % 1e3),
				Mtimensec: int16(attr.Mtimensec % 1e3),
				Ctimensec: int16(attr.Ctimensec % 1e3),

				Nlink:  attr.Nlink,
				Length: attr.Length,
				Rdev:   attr.Rdev,
				Parent: attr.Parent,
			})
			return err
		})
	case *kvMeta:
		err = m.txn(Background(), func(tx *kvTxn) error {
			tx.set(m.inodeKey(inode), m.marshal(attr))
			return nil
		})
	}
	if err != nil {
		t.Fatalf("setAttr: %v", err)
	}
}

func testCheckAndRepair(t *testing.T, m Meta) {
	var checkInode, d1Inode, d2Inode, d3Inode, d4Inode Ino
	dirAttr := &Attr{Mode: 0644, Full: true, Typ: TypeDirectory, Nlink: 3}
	if st := m.Mkdir(Background(), RootInode, "check", 0640, 022, 0, &checkInode, dirAttr); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}
	if st := m.Mkdir(Background(), checkInode, "d1", 0640, 022, 0, &d1Inode, dirAttr); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}
	if st := m.Mkdir(Background(), d1Inode, "d2", 0640, 022, 0, &d2Inode, dirAttr); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}
	if st := m.Mkdir(Background(), d2Inode, "d3", 0640, 022, 0, &d3Inode, dirAttr); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}
	if st := m.Mkdir(Background(), d3Inode, "d4", 0640, 022, 0, &d4Inode, dirAttr); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}

	if st := m.GetAttr(Background(), checkInode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	dirAttr.Nlink = 0
	setAttr(t, m, checkInode, dirAttr)

	if st := m.GetAttr(Background(), d1Inode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	dirAttr.Nlink = 0
	setAttr(t, m, d1Inode, dirAttr)

	if st := m.GetAttr(Background(), d2Inode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	dirAttr.Nlink = 0
	setAttr(t, m, d2Inode, dirAttr)

	if st := m.GetAttr(Background(), d3Inode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	dirAttr.Nlink = 0
	setAttr(t, m, d3Inode, dirAttr)

	if st := m.GetAttr(Background(), d4Inode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	dirAttr.Full = false
	dirAttr.Nlink = 0
	setAttr(t, m, d4Inode, dirAttr)

	showProgress := func(n int) {}
	slices := make(map[Ino][]Slice)
	if err := m.Check(Background(), "/check", &CheckOpt{
		ShowProgress: showProgress,
		Slices:       slices,
	}); err == nil {
		t.Fatal("check should fail")
	}
	if st := m.GetAttr(Background(), checkInode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	if dirAttr.Nlink != 0 {
		t.Fatalf("checkInode nlink should is 0 now: %d", dirAttr.Nlink)
	}

	if err := m.Check(Background(), "/check", &CheckOpt{
		Repair:       true,
		ShowProgress: showProgress,
		Slices:       slices,
	}); err != nil {
		t.Fatalf("check: %s", err)
	}
	if st := m.GetAttr(Background(), checkInode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	if dirAttr.Nlink != 3 || dirAttr.Parent != RootInode {
		t.Fatalf("checkInode nlink should is 3 now: %d", dirAttr.Nlink)
	}

	if err := m.Check(Background(), "/check/d1/d2", &CheckOpt{
		Repair:       true,
		ShowProgress: showProgress,
		Slices:       slices,
	}); err != nil {
		t.Fatalf("check: %s", err)
	}
	if st := m.GetAttr(Background(), d2Inode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	if dirAttr.Nlink != 3 || dirAttr.Parent != d1Inode {
		t.Fatalf("d2Inode nlink should is 3 now: %d", dirAttr.Nlink)
	}
	if st := m.GetAttr(Background(), d1Inode, dirAttr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	if dirAttr.Nlink != 0 || dirAttr.Parent != checkInode {
		t.Fatalf("d1Inode nlink should is 0 now: %d", dirAttr.Nlink)
	}

	if m.Name() != "etcd" {
		if err := m.Check(Background(), "/", &CheckOpt{
			Repair:       true,
			Recursive:    true,
			ShowProgress: showProgress,
			Slices:       slices,
		}); err != nil {
			t.Fatalf("check: %s", err)
		}
		for _, ino := range []Ino{checkInode, d1Inode, d2Inode, d3Inode} {
			if st := m.GetAttr(Background(), ino, dirAttr); st != 0 {
				t.Fatalf("getattr: %s", st)
			}
			if !dirAttr.Full || dirAttr.Nlink != 3 {
				t.Fatalf("nlink should is 3 now: %d", dirAttr.Nlink)
			}
		}
		if st := m.GetAttr(Background(), d4Inode, dirAttr); st != 0 {
			t.Fatalf("getattr: %s", st)
		}
		if !dirAttr.Full || dirAttr.Nlink != 2 || dirAttr.Parent != d3Inode {
			t.Fatalf("d4Inode  attr: %+v", *dirAttr)
		}
	}
}

func testDirStat(t *testing.T, m Meta) {
	testDir := "testDirStat"
	var testInode Ino
	// test empty dir
	if st := m.Mkdir(Background(), RootInode, testDir, 0640, 022, 0, &testInode, nil); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}
	if err := m.NewSession(true); err != nil {
		t.Fatalf("new session: %s", err)
	}
	defer m.CloseSession()
	stat, st := m.GetDirStat(Background(), testInode)
	checkResult := func(length, space, inodes int64) {
		if st != 0 {
			t.Fatalf("get dir usage: %s", st)
		}
		expect := dirStat{length, space, inodes}
		if *stat != expect {
			t.Fatalf("test dir usage: expect %+v, but got %+v", expect, stat)
		}
	}
	checkResult(0, 0, 0)

	// test dir with file
	var fileInode Ino
	if st := m.Create(Background(), testInode, "file", 0640, 022, 0, &fileInode, nil); st != 0 {
		t.Fatalf("create: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(0, align4K(0), 1)

	// test dir with file and fallocate
	if st := m.Fallocate(Background(), fileInode, 0, 0, 4097, nil); st != 0 {
		t.Fatalf("fallocate: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(4097, align4K(4097), 1)

	// test dir with file and truncate
	if st := m.Truncate(Background(), fileInode, 0, 0, nil, false); st != 0 {
		t.Fatalf("truncate: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(0, align4K(0), 1)

	// test dir with file and write
	if st := m.Write(Background(), fileInode, 0, 0, Slice{Id: 1, Size: 1 << 20, Off: 0, Len: 4097}, time.Now()); st != 0 {
		t.Fatalf("write: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(4097, align4K(4097), 1)

	// test dir with file and link
	if st := m.Link(Background(), fileInode, testInode, "file2", nil); st != 0 {
		t.Fatalf("link: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(2*4097, 2*align4K(4097), 2)

	// test dir with subdir
	var subInode Ino
	if st := m.Mkdir(Background(), testInode, "sub", 0640, 022, 0, &subInode, nil); st != 0 {
		t.Fatalf("mkdir: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(2*4097, align4K(0)+2*align4K(4097), 3)

	// test rename
	if st := m.Rename(Background(), testInode, "file2", subInode, "file", 0, nil, nil); st != 0 {
		t.Fatalf("rename: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(4097, align4K(0)+align4K(4097), 2)
	stat, st = m.GetDirStat(Background(), subInode)
	checkResult(4097, align4K(4097), 1)

	// test unlink
	if st := m.Unlink(Background(), testInode, "file"); st != 0 {
		t.Fatalf("unlink: %s", st)
	}
	if st := m.Unlink(Background(), subInode, "file"); st != 0 {
		t.Fatalf("unlink: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(0, align4K(0), 1)
	stat, st = m.GetDirStat(Background(), subInode)
	checkResult(0, 0, 0)

	// test rmdir
	if st := m.Rmdir(Background(), testInode, "sub"); st != 0 {
		t.Fatalf("rmdir: %s", st)
	}
	time.Sleep(500 * time.Millisecond)
	stat, st = m.GetDirStat(Background(), testInode)
	checkResult(0, 0, 0)
}

func testBatchClone(t *testing.T, m Meta) {
	ctx := Background()

	// create source directory with mixed entry types
	var srcDir Ino
	if st := m.Mkdir(ctx, RootInode, "batchSrc", 0755, 022, 0, &srcDir, nil); st != 0 {
		t.Fatalf("mkdir batchSrc: %s", st)
	}

	// file with data
	var file1 Ino
	if st := m.Mknod(ctx, srcDir, "file1", TypeFile, 0644, 022, 0, "", &file1, nil); st != 0 {
		t.Fatalf("mknod file1: %s", st)
	}
	var sliceId1 uint64
	if st := m.NewSlice(ctx, &sliceId1); st != 0 {
		t.Fatalf("new slice: %s", st)
	}
	if st := m.Write(ctx, file1, 0, 0, Slice{sliceId1, 1024, 0, 1024}, time.Now()); st != 0 {
		t.Fatalf("write file1: %s", st)
	}
	if st := m.SetXattr(ctx, file1, "user.tag", []byte("hello"), XattrCreateOrReplace); st != 0 {
		t.Fatalf("setxattr file1: %s", st)
	}

	// empty file
	var file2 Ino
	if st := m.Mknod(ctx, srcDir, "file2", TypeFile, 0644, 022, 0, "", &file2, nil); st != 0 {
		t.Fatalf("mknod file2: %s", st)
	}

	// symlink
	var sym1 Ino
	if st := m.Symlink(ctx, srcDir, "sym1", "/tmp/target", &sym1, nil); st != 0 {
		t.Fatalf("symlink sym1: %s", st)
	}

	// create destination directory
	var dstDir Ino
	if st := m.Mkdir(ctx, RootInode, "batchDst", 0755, 022, 0, &dstDir, nil); st != 0 {
		t.Fatalf("mkdir batchDst: %s", st)
	}

	// read source entries
	var srcEntries []*Entry
	if st := m.Readdir(ctx, srcDir, 1, &srcEntries); st != 0 {
		t.Fatalf("readdir batchSrc: %s", st)
	}
	// filter out . and ..
	var nonDirEntries []*Entry
	for _, e := range srcEntries {
		name := string(e.Name)
		if name == "." || name == ".." {
			continue
		}
		nonDirEntries = append(nonDirEntries, e)
	}

	// --- test 1: successful batch clone ---
	var count uint64
	st := m.getBase().BatchClone(ctx, srcDir, dstDir, nonDirEntries, 0, 022, &count)
	if st == syscall.ENOTSUP {
		m.Remove(ctx, RootInode, "batchSrc", false, RmrDefaultThreads, nil)
		m.Remove(ctx, RootInode, "batchDst", false, RmrDefaultThreads, nil)
		return
	}

	if st != 0 {
		t.Fatalf("BatchClone: %s", st)
	}
	if count != uint64(len(nonDirEntries)) {
		t.Fatalf("BatchClone count: got %d, want %d", count, len(nonDirEntries))
	}

	// verify cloned entries exist
	var dstEntries []*Entry
	if st := m.Readdir(ctx, dstDir, 1, &dstEntries); st != 0 {
		t.Fatalf("readdir batchDst: %s", st)
	}
	dstMap := make(map[string]*Entry)
	for _, e := range dstEntries {
		name := string(e.Name)
		if name != "." && name != ".." {
			dstMap[name] = e
		}
	}
	if len(dstMap) != len(nonDirEntries) {
		t.Fatalf("cloned entry count: got %d, want %d", len(dstMap), len(nonDirEntries))
	}

	// verify file1 clone: data, xattr
	if e, ok := dstMap["file1"]; !ok {
		t.Fatalf("file1 not cloned")
	} else {
		if e.Attr.Typ != TypeFile {
			t.Fatalf("file1 type: got %d, want %d", e.Attr.Typ, TypeFile)
		}
		var slices []Slice
		if st := m.Read(ctx, e.Inode, 0, &slices); st != 0 {
			t.Fatalf("read cloned file1: %s", st)
		}
		if len(slices) == 0 {
			t.Fatal("cloned file1 has no slices")
		}
		var val []byte
		if st := m.GetXattr(ctx, e.Inode, "user.tag", &val); st != 0 {
			t.Fatalf("getxattr cloned file1: %s", st)
		}
		if string(val) != "hello" {
			t.Fatalf("xattr value: got %q, want %q", val, "hello")
		}
	}

	// verify sym1 clone: target
	if e, ok := dstMap["sym1"]; !ok {
		t.Fatalf("sym1 not cloned")
	} else {
		var target []byte
		if st := m.ReadLink(ctx, e.Inode, &target); st != 0 {
			t.Fatalf("readlink cloned sym1: %s", st)
		}
		if string(target) != "/tmp/target" {
			t.Fatalf("symlink target: got %q, want %q", target, "/tmp/target")
		}
	}

	// verify file2 clone: empty file
	if e, ok := dstMap["file2"]; !ok {
		t.Fatalf("file2 not cloned")
	} else {
		if e.Attr.Typ != TypeFile {
			t.Fatalf("file2 type: got %d, want %d", e.Attr.Typ, TypeFile)
		}
		if e.Attr.Length != 0 {
			t.Fatalf("file2 length: got %d, want 0", e.Attr.Length)
		}
	}

	// --- test 2: duplicate entry names (EEXIST) ---
	count = 0
	st = m.getBase().BatchClone(ctx, srcDir, dstDir, nonDirEntries, 0, 022, &count)
	if st != syscall.EEXIST {
		t.Fatalf("BatchClone duplicate: got %s, want EEXIST", st)
	}

	// --- test 3: dst parent doesn't exist ---
	count = 0
	st = m.getBase().BatchClone(ctx, srcDir, 999999, nonDirEntries, 0, 022, &count)
	if st != syscall.ENOENT {
		t.Fatalf("BatchClone non-existent dst: got %s, want ENOENT", st)
	}

	// --- test 4: dst parent is a file, not directory ---
	count = 0
	st = m.getBase().BatchClone(ctx, srcDir, file1, nonDirEntries, 0, 022, &count)
	if st != syscall.ENOTDIR {
		t.Fatalf("BatchClone file as dst: got %s, want ENOTDIR", st)
	}

	// --- test 5: dst parent is immutable ---
	var immDir Ino
	if st := m.Mkdir(ctx, RootInode, "batchImm", 0755, 022, 0, &immDir, nil); st != 0 {
		t.Fatalf("mkdir batchImm: %s", st)
	}
	if st := m.SetAttr(ctx, immDir, SetAttrFlag, 0, &Attr{Flags: FlagImmutable}); st != 0 {
		t.Fatalf("setattr immutable: %s", st)
	}
	count = 0
	st = m.getBase().BatchClone(ctx, srcDir, immDir, nonDirEntries, 0, 022, &count)
	if st != syscall.EPERM {
		t.Fatalf("BatchClone immutable dst: got %s, want EPERM", st)
	}
	// clean up immutable flag
	if st := m.SetAttr(ctx, immDir, SetAttrFlag, 0, &Attr{Flags: 0}); st != 0 {
		t.Fatalf("clear immutable: %s", st)
	}
	m.Remove(ctx, RootInode, "batchImm", false, RmrDefaultThreads, nil)

	// --- test 6: empty entries ---
	count = 0
	st = m.getBase().BatchClone(ctx, srcDir, dstDir, nil, 0, 022, &count)
	if st != 0 {
		t.Fatalf("BatchClone empty: %s", st)
	}
	if count != 0 {
		t.Fatalf("BatchClone empty count: got %d, want 0", count)
	}

	// --- test 7: preserve attr mode ---
	var dstDir2 Ino
	if st := m.Mkdir(ctx, RootInode, "batchDst2", 0755, 022, 0, &dstDir2, nil); st != 0 {
		t.Fatalf("mkdir batchDst2: %s", st)
	}
	count = 0
	st = m.getBase().BatchClone(ctx, srcDir, dstDir2, nonDirEntries, CLONE_MODE_PRESERVE_ATTR, 022, &count)
	if st != 0 {
		t.Fatalf("BatchClone preserve: %s", st)
	}
	// verify preserved attrs match source
	var dstEntries2 []*Entry
	if st := m.Readdir(ctx, dstDir2, 1, &dstEntries2); st != 0 {
		t.Fatalf("readdir batchDst2: %s", st)
	}
	srcMap := make(map[string]*Entry)
	for _, e := range nonDirEntries {
		srcMap[string(e.Name)] = e
	}
	for _, de := range dstEntries2 {
		name := string(de.Name)
		if name == "." || name == ".." {
			continue
		}
		se, ok := srcMap[name]
		if !ok {
			t.Fatalf("unexpected entry %q in batchDst2", name)
		}
		if de.Attr.Mode != se.Attr.Mode {
			t.Fatalf("preserve mode mismatch for %s: got %o, want %o", name, de.Attr.Mode, se.Attr.Mode)
		}
	}

	// cleanup
	m.Remove(ctx, RootInode, "batchSrc", false, RmrDefaultThreads, nil)
	m.Remove(ctx, RootInode, "batchDst", false, RmrDefaultThreads, nil)
	m.Remove(ctx, RootInode, "batchDst2", false, RmrDefaultThreads, nil)
}

func testClone(t *testing.T, m Meta) {
	// $ tree cloneDir
	// .
	// ├── dir
	// └── dir1
	//    ├── dir2
	//    │ ├── dir3
	//    │ │ └── file3
	//    │ ├── file2
	//    │ └── file2Hardlink
	//    ├── file1
	//    └── file1Symlink -> file1
	var cloneDir Ino
	if eno := m.Mkdir(Background(), RootInode, "cloneDir", 0777, 022, 0, &cloneDir, nil); eno != 0 {
		t.Fatalf("mkdir: %s", eno)
	}
	var dir1 Ino
	if eno := m.Mkdir(Background(), cloneDir, "dir1", 0777, 022, 0, &dir1, nil); eno != 0 {
		t.Fatalf("mkdir: %s", eno)
	}
	var dir Ino
	if eno := m.Mkdir(Background(), cloneDir, "dir", 0777, 022, 0, &dir, nil); eno != 0 {
		t.Fatalf("mkdir: %s", eno)
	}
	var dir2 Ino
	if eno := m.Mkdir(Background(), dir1, "dir2", 0777, 022, 0, &dir2, nil); eno != 0 {
		t.Fatalf("mkdir: %s", eno)
	}
	var dir3 Ino
	if eno := m.Mkdir(Background(), dir2, "dir3", 0777, 022, 0, &dir3, nil); eno != 0 {
		t.Fatalf("mkdir: %s", eno)
	}
	var file1 Ino
	if eno := m.Mknod(Background(), dir1, "file1", TypeFile, 0777, 022, 0, "", &file1, nil); eno != 0 {
		t.Fatalf("mknod: %s", eno)
	}
	var sliceId uint64
	if st := m.NewSlice(Background(), &sliceId); st != 0 {
		t.Fatalf("new chunk: %s", st)
	}
	if st := m.Write(Background(), file1, 0, 0, Slice{sliceId, 67108864, 0, 67108864}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}

	var file2 Ino
	if eno := m.Mknod(Background(), dir2, "file2", TypeFile, 0777, 022, 0, "", &file2, nil); eno != 0 {
		t.Fatalf("mknod: %s", eno)
	}
	var sliceId2 uint64
	if st := m.NewSlice(Background(), &sliceId2); st != 0 {
		t.Fatalf("new chunk: %s", st)
	}
	if st := m.Write(Background(), file2, 0, 0, Slice{sliceId2, 67108863, 0, 67108863}, time.Now()); st != 0 {
		t.Fatalf("write file %s", st)
	}
	var file3 Ino
	if eno := m.Mknod(Background(), dir3, "file3", TypeFile, 0777, 022, 0, "", &file3, nil); eno != 0 {
		t.Fatalf("mknod: %s", eno)
	}
	if eno := m.Fallocate(Background(), file3, 0, 0, 67108864, nil); eno != 0 {
		t.Fatalf("fallocate: %s", eno)
	}

	if eno := m.SetXattr(Background(), file1, "name", []byte("juicefs"), XattrCreateOrReplace); eno != 0 {
		t.Fatalf("setxattr: %s", eno)
	}
	if eno := m.SetXattr(Background(), file1, "name2", []byte("juicefs2"), XattrCreateOrReplace); eno != 0 {
		t.Fatalf("setxattr: %s", eno)
	}

	if eno := m.SetXattr(Background(), dir1, "name", []byte("juicefs"), XattrCreateOrReplace); eno != 0 {
		t.Fatalf("setxattr: %s", eno)
	}
	if eno := m.SetXattr(Background(), dir1, "name2", []byte("juicefs2"), XattrCreateOrReplace); eno != 0 {
		t.Fatalf("setxattr: %s", eno)
	}

	var file1Symlink Ino
	if eno := m.Symlink(Background(), dir1, "file1Symlink", "file1", &file1Symlink, nil); eno != 0 {
		t.Fatalf("symlink: %s", eno)
	}
	if eno := m.Link(Background(), file2, dir2, "file2Hardlink", nil); eno != 0 {
		t.Fatalf("hardlink: %s", eno)
	}

	var attr Attr
	attr.Mtime = 1
	m.SetAttr(Background(), cloneDir, SetAttrMtime, 0, &attr)
	var totalspace, availspace, iused, iavail, space, iused2 uint64
	m.StatFS(Background(), RootInode, &totalspace, &availspace, &iused, &iavail)
	space = totalspace - availspace
	iused2 = iused

	cloneDstName := "cloneDir1"
	var count, total uint64
	var cmode uint8
	cmode |= CLONE_MODE_PRESERVE_ATTR
	if eno := m.Clone(Background(), cloneDir, dir1, cloneDir, cloneDstName, cmode, 022, 4, &count, &total); eno != 0 {
		t.Fatalf("clone: %s", eno)
	}
	var entries1 []*Entry
	if eno := m.Readdir(Background(), cloneDir, 1, &entries1); eno != 0 {
		t.Fatalf("readdir: %s", eno)
	}

	if len(entries1) != 5 {
		t.Fatalf("clone dst dir not found or name not correct")
	}
	var idx int
	for i, ent := range entries1 {
		if string(ent.Name) == cloneDstName {
			idx = i
			break
		}
	}
	if idx == 0 {
		t.Fatalf("clone dst dir not found or name not correct")
	}
	cloneDstIno := entries1[idx].Inode
	cloneDstAttr := entries1[idx].Attr
	if cloneDstAttr.Mode != 0755 {
		t.Fatalf("mode should be 0755 %o", cloneDstAttr.Mode)
	}
	// check dst parent dir nlink
	var rootAttr Attr
	if eno := m.GetAttr(Background(), cloneDir, &rootAttr); eno != 0 {
		t.Fatalf("get rootAttr: %s", eno)
	}
	if rootAttr.Nlink != 5 {
		t.Fatalf("rootDir nlink not correct,nlink: %d", rootAttr.Nlink)
	}
	if rootAttr.Mtime == 1 {
		t.Fatalf("mtime of rootDir is not updated")
	}
	m.StatFS(Background(), cloneDir, &totalspace, &availspace, &iused, &iavail)
	if totalspace-availspace-space != 268451840 {
		time.Sleep(time.Second * 2)
		m.StatFS(Background(), cloneDir, &totalspace, &availspace, &iused, &iavail)
		if totalspace-availspace-space != 268451840 {
			t.Logf("warning: added space: %d", totalspace-availspace-space)
		}
	}
	if iused-iused2 != 8 {
		t.Fatalf("added inodes: %d", iused-iused2)
	}
	if eno := m.Clone(Background(), RootInode, dir1, cloneDir, "no_preserve", 0, 022, 4, &count, &total); eno != 0 {
		t.Fatalf("clone: %s", eno)
	}
	var d2 Ino
	var noPreserveAttr = new(Attr)
	m.Lookup(Background(), cloneDir, "no_preserve", &d2, noPreserveAttr, true)
	var cloneSrcAttr = new(Attr)
	m.GetAttr(Background(), dir1, cloneSrcAttr)
	if noPreserveAttr.Mtimensec == cloneSrcAttr.Mtimensec {
		t.Fatalf("clone: should not preserve mtime")
	}
	if eno := m.Remove(Background(), cloneDir, "no_preserve", false, RmrDefaultThreads, nil); eno != 0 {
		t.Fatalf("Rmdir: %s", eno)
	}
	// check attr
	var removedItem []interface{}
	checkEntryTree(t, m, dir1, cloneDstIno, func(srcEntry, dstEntry *Entry, dstIno Ino) {
		checkEntry(t, m, srcEntry, dstEntry, dstIno)

		switch m := m.(type) {
		case *redisMeta:
			removedItem = append(removedItem, m.inodeKey(dstEntry.Inode), m.entryKey(dstEntry.Inode), m.xattrKey(dstEntry.Inode), m.symKey(dstEntry.Inode))
		case *dbMeta:
			removedItem = append(removedItem, &node{Inode: dstEntry.Inode}, &edge{Inode: dstEntry.Inode, Parent: dstEntry.Attr.Parent}, &xattr{Inode: dstEntry.Inode}, &symlink{Inode: dstEntry.Inode})
		case *kvMeta:
			removedItem = append(removedItem, m.inodeKey(dstEntry.Inode), m.entryKey(dstEntry.Attr.Parent, string(dstEntry.Name)), m.symKey(dstEntry.Inode))
		}
	})
	// check slice ref after clone
	m.OnMsg(DeleteSlice, func(args ...interface{}) error {
		t.Fatalf("should not delete slice")
		return nil
	})
	if eno := m.Remove(Background(), cloneDir, "dir1", false, RmrDefaultThreads, nil); eno != 0 {
		t.Fatalf("Rmdir: %s", eno)
	}

	var sli1del, sli2del bool
	m.OnMsg(DeleteSlice, func(args ...interface{}) error {
		if args[0].(uint64) == sliceId {
			sli1del = true
		}
		if args[0].(uint64) == sliceId2 {
			sli2del = true
		}
		return nil
	})
	// check remove tree
	var dNode1, dNode2, dNode3, dNode4 Ino = 101, 102, 103, 104
	switch m := m.(type) {
	case *redisMeta:
		// del edge first
		if err := m.rdb.HDel(Background(), m.entryKey(cloneDstAttr.Parent), cloneDstName).Err(); err != nil {
			t.Fatalf("del edge error: %v", err)
		}
		// check remove tree
		if eno := m.doCleanupDetachedNode(Background(), cloneDstIno); eno != 0 {
			t.Fatalf("remove tree error rootInode: %v", cloneDstIno)
		}
		removedKeysStr := make([]string, len(removedItem))
		for i, key := range removedItem {
			removedKeysStr[i] = key.(string)
		}
		removedKeysStr = append(removedKeysStr, m.detachedNodes())
		if exists := m.rdb.Exists(Background(), removedKeysStr...).Val(); exists != 0 {
			t.Fatalf("has keys not removed: %v", removedItem)
		}
		// check detached node
		m.rdb.ZAdd(Background(), m.detachedNodes(), redis.Z{Member: dNode1.String(), Score: float64(time.Now().Add(-1 * time.Minute).Unix())}).Err()
		m.rdb.ZAdd(Background(), m.detachedNodes(), redis.Z{Member: dNode2.String(), Score: float64(time.Now().Add(-5 * time.Minute).Unix())}).Err()
		m.rdb.ZAdd(Background(), m.detachedNodes(), redis.Z{Member: dNode3.String(), Score: float64(time.Now().Add(-48 * time.Hour).Unix())}).Err()
		m.rdb.ZAdd(Background(), m.detachedNodes(), redis.Z{Member: dNode4.String(), Score: float64(time.Now().Add(-48 * time.Hour).Unix())}).Err()
	case *dbMeta:
		if n, err := m.db.Delete(&edge{Parent: cloneDstAttr.Parent, Name: []byte(cloneDstName)}); err != nil || n != 1 {
			t.Fatalf("del edge error: %v", err)
		}
		// check remove tree
		if eno := m.doCleanupDetachedNode(Background(), cloneDstIno); eno != 0 {
			t.Fatalf("remove tree error rootInode: %v", cloneDstIno)
		}
		removedItem = append(removedItem, &detachedNode{Inode: cloneDstIno})
		time.Sleep(1 * time.Second)
		if exists, err := m.db.Exist(removedItem...); err != nil || exists {
			t.Fatalf("has keys not removed: %v", removedItem)
		}
		m.txn(func(s *xorm.Session) error {
			return mustInsert(s,
				&detachedNode{Inode: dNode1, Added: time.Now().Add(-1 * time.Minute).Unix()},
				&detachedNode{Inode: dNode2, Added: time.Now().Add(-5 * time.Minute).Unix()},
				&detachedNode{Inode: dNode3, Added: time.Now().Add(-48 * time.Hour).Unix()},
				&detachedNode{Inode: dNode4, Added: time.Now().Add(-48 * time.Hour).Unix()},
			)
		})
	case *kvMeta:
		// del edge first
		if err := m.deleteKeys(m.entryKey(cloneDstAttr.Parent, cloneDstName)); err != nil {
			t.Fatalf("del edge error: %v", err)
		}
		// check remove tree
		if eno := m.doCleanupDetachedNode(Background(), cloneDstIno); eno != 0 {
			t.Fatalf("remove tree error rootInode: %v", cloneDstIno)
		}
		removedItem = append(removedItem, m.detachedKey(cloneDstIno))
		m.txn(Background(), func(tx *kvTxn) error {
			for _, key := range removedItem {
				if buf := tx.get(key.([]byte)); buf != nil {
					t.Fatalf("has keys not removed: %v", removedItem)
				}
			}
			tx.set(m.detachedKey(dNode1), m.packInt64(time.Now().Add(-1*time.Minute).Unix()))
			tx.set(m.detachedKey(dNode2), m.packInt64(time.Now().Add(-5*time.Minute).Unix()))
			tx.set(m.detachedKey(dNode3), m.packInt64(time.Now().Add(-48*time.Hour).Unix()))
			tx.set(m.detachedKey(dNode4), m.packInt64(time.Now().Add(-48*time.Hour).Unix()))
			return nil
		})

	}
	time.Sleep(1 * time.Second)
	if !sli1del || !sli2del {
		t.Fatalf("slice should be deleted")
	}
	nodes := m.(engine).doFindDetachedNodes(time.Now())
	if len(nodes) != 4 {
		t.Fatalf("find detached nodes error: %v", nodes)
	}
	nodes = m.(engine).doFindDetachedNodes(time.Now().Add(-24 * time.Hour))
	if len(nodes) != 2 {
		t.Fatalf("find detached nodes error: %v", nodes)
	}
	if eno := m.Clone(Background(), RootInode, TrashInode, cloneDir, "xxx", 0, 022, 4, &count, &total); !errors.Is(eno, syscall.EPERM) {
		t.Fatalf("cloning trash files are not supported")
	}
	if eno := m.Clone(Background(), TrashInode+1, 1000, cloneDir, "xxx", 0, 022, 4, &count, &total); !errors.Is(eno, syscall.EPERM) {
		t.Fatalf("cloning files in the trash is not supported")
	}
}

func checkEntryTree(t *testing.T, m Meta, srcIno, dstIno Ino, walkFunc func(srcEntry, dstEntry *Entry, dstIno Ino)) {
	var entries1 []*Entry
	if eno := m.Readdir(Background(), srcIno, 1, &entries1); eno != 0 {
		t.Fatalf("Readdir: %s", eno)
	}

	var entries2 []*Entry
	if eno := m.Readdir(Background(), dstIno, 1, &entries2); eno != 0 {
		t.Fatalf("Readdir: %s", eno)
	}
	sort.Slice(entries1, func(i, j int) bool { return string(entries1[i].Name) < string(entries1[j].Name) })
	sort.Slice(entries2, func(i, j int) bool { return string(entries2[i].Name) < string(entries2[j].Name) })
	if len(entries1) != len(entries2) {
		t.Fatalf("number of children: %d != %d", len(entries1), len(entries2))
	}
	for idx, entry := range entries1 {
		if string(entry.Name) == "." || string(entry.Name) == ".." {
			continue
		}
		if entry.Attr.Typ == TypeDirectory {
			checkEntryTree(t, m, entry.Inode, entries2[idx].Inode, walkFunc)
		}
		walkFunc(entry, entries2[idx], dstIno)
	}
}

func checkEntry(t *testing.T, m Meta, srcEntry, dstEntry *Entry, dstParentIno Ino) {
	if !bytes.Equal(srcEntry.Name, dstEntry.Name) {
		t.Fatalf("unmatched name: %s, %s", srcEntry.Name, dstEntry.Name)
	}
	srcAttr := srcEntry.Attr
	dstAttr := dstEntry.Attr
	if dstAttr.Parent != dstParentIno {
		t.Fatalf("unmatched parent: %d, %d", dstAttr.Parent, dstParentIno)
	}
	if srcAttr.Typ == TypeFile && dstAttr.Nlink != 1 || srcAttr.Typ != TypeFile && srcAttr.Nlink != dstAttr.Nlink {
		t.Fatalf("nlink not correct: srcType:%d,srcNlink:%d,dstType:%d,dstNlink:%d", srcAttr.Typ, srcAttr.Nlink, dstAttr.Typ, dstAttr.Nlink)
	}

	srcAttr.Nlink = 0
	dstAttr.Nlink = 0
	srcAttr.Parent = 0
	dstAttr.Parent = 0
	srcAttr.Atime = 0
	srcAttr.Atimensec = 0
	dstAttr.Atime = 0
	dstAttr.Atimensec = 0
	if *srcAttr != *dstAttr {
		t.Fatalf("unmatched attr: %#v, %#v", *srcAttr, *dstAttr)
	}

	// check xattr
	var value1 []byte
	if eno := m.ListXattr(Background(), srcEntry.Inode, &value1); eno != 0 {
		t.Fatalf("list xattr: %s", eno)
	}
	keys := bytes.Split(value1, []byte{0})
	for _, key := range keys {
		if key == nil || len(key) == 0 {
			continue
		}
		var v1, v2 []byte
		if eno := m.GetXattr(Background(), srcEntry.Inode, string(key), &v1); eno != 0 {
			t.Fatalf("get xattr: %s", eno)
		}
		if eno := m.GetXattr(Background(), dstEntry.Inode, string(key), &v2); eno != 0 {
			t.Fatalf("get xattr: %s", eno)
		}
		if !bytes.Equal(v1, v2) {
			t.Fatalf("xattr not equal")
		}
	}
}

func testQuota(t *testing.T, m Meta) {
	if err := m.NewSession(true); err != nil {
		t.Fatalf("New session: %s", err)
	}
	defer m.CloseSession()
	ctx := Background()
	var inode, parent Ino
	var attr Attr
	if st := m.Mkdir(ctx, RootInode, "quota", 0755, 0, 0, &parent, &attr); st != 0 {
		t.Fatalf("Mkdir quota: %s", st)
	}
	p := "/quota"
	if err := m.HandleQuota(ctx, QuotaSet, p, 0, 0, map[string]*Quota{p: {MaxSpace: 2 << 30, MaxInodes: 6}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set %s: %s", p, err)
	}
	m.getBase().loadQuotas()
	if st := m.Mkdir(ctx, parent, "d1", 0755, 0, 0, &inode, &attr); st != 0 {
		t.Fatalf("Mkdir quota/d1: %s", st)
	}
	p = "/quota/d1"
	if err := m.HandleQuota(ctx, QuotaSet, p, 0, 0, map[string]*Quota{p: {MaxSpace: 1 << 30, MaxInodes: 5}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota %s: %s", p, err)
	}
	m.getBase().loadQuotas()
	if st := m.Create(ctx, inode, "f1", 0644, 0, 0, nil, &attr); st != 0 {
		t.Fatalf("Create quota/d1/f1: %s", st)
	}
	if st := m.Mkdir(ctx, parent, "d2", 0755, 0, 0, &parent, &attr); st != 0 {
		t.Fatalf("Mkdir quota/d2: %s", st)
	}
	if st := m.Mkdir(ctx, parent, "d22", 0755, 0, 0, &inode, &attr); st != 0 {
		t.Fatalf("Mkdir quota/d2/d22: %s", st)
	}
	p = "/quota/d2/d22"
	if err := m.HandleQuota(ctx, QuotaSet, p, 0, 0, map[string]*Quota{p: {MaxSpace: 1 << 30, MaxInodes: 5}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota %s: %s", p, err)
	}
	m.getBase().loadQuotas()
	// parent -> d2, inode -> d22
	if st := m.Create(ctx, parent, "f2", 0644, 0, 0, nil, &attr); st != 0 {
		t.Fatalf("Create quota/d2/f2: %s", st)
	}
	if st := m.Create(ctx, inode, "f22", 0644, 0, 0, nil, &attr); st != 0 {
		t.Fatalf("Create quota/d22/f22: %s", st)
	}
	time.Sleep(time.Second * 5)

	qs := make(map[string]*Quota)
	p = "/quota"
	if err := m.HandleQuota(ctx, QuotaGet, p, 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get %s: %s", p, err)
	} else if q := qs[p]; q.MaxSpace != 2<<30 || q.MaxInodes != 6 || q.UsedSpace != 6*4<<10 || q.UsedInodes != 6 {
		t.Fatalf("HandleQuota get %s: %+v", p, q)
	}
	delete(qs, p)
	p = "/quota/d1"
	if err := m.HandleQuota(ctx, QuotaGet, p, 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get %s: %s", p, err)
	} else if q := qs[p]; q.MaxSpace != 1<<30 || q.MaxInodes != 5 || q.UsedSpace != 4<<10 || q.UsedInodes != 1 {
		t.Fatalf("HandleQuota get %s: %+v", p, q)
	}
	delete(qs, p)
	p = "/quota/d2/d22"
	if err := m.HandleQuota(ctx, QuotaGet, p, 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get %s: %s", p, err)
	} else if q := qs[p]; q.MaxSpace != 1<<30 || q.MaxInodes != 5 || q.UsedSpace != 4<<10 || q.UsedInodes != 1 {
		t.Fatalf("HandleQuota get %s: %+v", p, q)
	}
	delete(qs, p)

	if err := m.HandleQuota(ctx, QuotaList, "", 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota list: %s", err)
	} else {
		if len(qs) != 3 {
			t.Fatalf("HandleQuota list bad result: %d", len(qs))
		}
	}

	getUsedInodes := func(path string) int64 {
		m.getBase().doFlushQuotas()
		qs := make(map[string]*Quota)
		if err := m.HandleQuota(ctx, QuotaGet, path, 0, 0, qs, false, false, false); err != nil {
			t.Fatalf("HandleQuota list: %s", err)
		}
		return qs[path].UsedInodes
	}

	// unlink opened file
	var nInode Ino
	if st := m.Lookup(ctx, parent, "f2", &nInode, &attr, false); st != 0 {
		t.Fatalf("Lookup quota/d2/f2: %s", st)
	}

	if st := m.Open(ctx, nInode, 0, &attr); st != 0 {
		t.Fatalf("Open quota/d2/f2: %s", st)
	}

	if st := m.Unlink(ctx, parent, "f2"); st != 0 {
		t.Fatalf("Unlink quota/d2/f2 err: %s", st)
	}

	if st := m.Close(ctx, nInode); st != 0 {
		t.Fatalf("Close quota/d2/f2: %s", st)
	}

	if used := getUsedInodes("/quota"); used != 5 {
		t.Fatalf("used inodes of /quota should be 5, but got %d", used)
	}

	// rename opened file
	if st := m.Lookup(ctx, inode, "f22", &nInode, &attr, false); st != 0 {
		t.Fatalf("Lookup quota/d2/d22/f22: %s", st)
	}

	if st := m.Open(ctx, nInode, 0, &attr); st != 0 {
		t.Fatalf("Open quota/d2/d22/f22: %s", st)
	}

	if st := m.Rename(ctx, inode, "f22", inode, "f23", 0, &nInode, nil); st != 0 {
		t.Fatalf("Rename quota/d2/d22/f22 to quota/d2/d22/f23 err: %s", st)
	}

	if st := m.Close(ctx, nInode); st != 0 {
		t.Fatalf("Close quota/d2/d22/f23: %s", st)
	}

	if used := getUsedInodes("/quota"); used != 5 {
		t.Fatalf("used inodes of /quota should be 5, but got %d", used)
	}

	if st := m.Create(ctx, parent, "f3", 0644, 0, 0, &nInode, &attr); st != 0 {
		t.Fatalf("Create quota/d2/f3: %s", st)
	}

	if err := m.HandleQuota(ctx, QuotaDel, "/quota/d1", 0, 0, nil, false, false, false); err != nil {
		t.Fatalf("HandleQuota del /quota/d1: %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaDel, "/quota/d2", 0, 0, nil, false, false, false); err != nil {
		t.Fatalf("HandleQuota del /quota/d2: %s", err)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaList, "", 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota list: %s", err)
	} else {
		if len(qs) != 2 {
			t.Fatalf("HandleQuota list bad result: %d", len(qs))
		}
	}
	m.getBase().loadQuotas()
	if st := m.Create(ctx, parent, "f4", 0644, 0, 0, nil, &attr); st != syscall.EDQUOT {
		t.Fatalf("Create quota/d22/f4: %s", st)
	}
}

func testAtime(t *testing.T, m Meta) {
	ctx := Background()
	var inode, parent Ino
	var attr Attr
	if st := m.Mkdir(ctx, RootInode, "atime", 0755, 0, 0, &parent, &attr); st != 0 {
		t.Fatalf("Mkdir atime: %s", st)
	}

	// open, read, read atime < mtime, read recent, readdir, readlink, link
	testFn := func(name string) (ret [7]bool) {
		fname := "f-" + name
		if st := m.Create(ctx, parent, fname, 0644, 0, 0, &inode, &attr); st != 0 {
			t.Fatalf("Create atime/%s: %s", fname, st)
		}
		// atime < ctime
		attr.Atime, attr.Atimensec = 1234, 5678
		if st := m.SetAttr(ctx, inode, SetAttrAtime, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		if st := m.Open(ctx, inode, 0, &attr); st != 0 {
			t.Fatalf("Open atime/%s: %s", fname, st)
		}
		defer m.Close(ctx, inode)
		ret[0] = attr.Atime != 1234

		attr.Atime, attr.Atimensec = 1234, 5678
		if st := m.SetAttr(ctx, inode, SetAttrAtime, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		var slices []Slice
		if st := m.Read(ctx, inode, 0, &slices); st != 0 {
			t.Fatalf("Read atime/%s: %s", fname, st)
		}
		if st := m.GetAttr(ctx, inode, &attr); st != 0 {
			t.Fatalf("Getattr after read atime/%s: %s", fname, st)
		}
		ret[1] = attr.Atime != 1234

		// atime < mtime
		now := time.Now()
		attr.Atime = now.Unix() - 2
		attr.Mtime = now.Unix()
		if st := m.SetAttr(ctx, inode, SetAttrAtime|SetAttrMtime, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		if st := m.Read(ctx, inode, 0, &slices); st != 0 {
			t.Fatalf("Read atime/%s: %s", fname, st)
		}
		if st := m.GetAttr(ctx, inode, &attr); st != 0 {
			t.Fatalf("Getattr after read atime/%s: %s", fname, st)
		}
		ret[2] = attr.Atime >= now.Unix()

		// atime = ctime = mtime, atime = now
		if st := m.SetAttr(ctx, inode, SetAttrAtimeNow|SetAttrMtimeNow, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		time.Sleep(time.Second * 2)
		now = time.Now()
		if st := m.Read(ctx, inode, 0, &slices); st != 0 {
			t.Fatalf("Read atime/%s: %s", fname, st)
		}
		if st := m.GetAttr(ctx, inode, &attr); st != 0 {
			t.Fatalf("Getattr after read atime/%s: %s", fname, st)
		}
		ret[3] = attr.Atime >= now.Unix()

		// readdir
		fname = "d-" + name
		if st := m.Mkdir(ctx, parent, fname, 0755, 0, 0, &inode, &attr); st != 0 {
			t.Fatalf("Mkdir atime/%s: %s", fname, st)
		}
		attr.Atime, attr.Atimensec = 1234, 5678
		if st := m.SetAttr(ctx, inode, SetAttrAtime, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		var entries []*Entry
		if st := m.Readdir(ctx, inode, 0, &entries); st != 0 {
			t.Fatalf("Readdir atime/%s: %s", fname, st)
		}
		if st := m.GetAttr(ctx, inode, &attr); st != 0 {
			t.Fatalf("Getattr after readdir atime/%s: %s", fname, st)
		}
		ret[4] = attr.Atime != 1234

		// readlink
		fname = "s-" + name
		if st := m.Symlink(ctx, parent, fname, "f-"+name, &inode, &attr); st != 0 {
			t.Fatalf("Symlink atime/%s: %s", fname, st)
		}
		attr.Atime, attr.Atimensec = 1234, 5678
		if st := m.SetAttr(ctx, inode, SetAttrAtime, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		var target []byte
		if st := m.ReadLink(ctx, inode, &target); st != 0 {
			t.Fatalf("Readlink atime/%s: %s", fname, st)
		}
		if st := m.GetAttr(ctx, inode, &attr); st != 0 {
			t.Fatalf("Getattr after readlink atime/%s: %s", fname, st)
		}
		ret[5] = attr.Atime != 1234 && attr.Atimensec != 5678

		// test link ctime
		attr.Atime, attr.Atimensec = 1234, 5678
		if st := m.SetAttr(ctx, inode, SetAttrAtime, 0, &attr); st != 0 {
			t.Fatalf("Setattr atime/%s: %s", fname, st)
		}
		fname = "l-" + name
		if st := m.Link(ctx, inode, parent, fname, &attr); st != 0 {
			t.Fatalf("Link %s: %s", fname, st)
		}
		ret[6] = attr.Ctime != 1234 && attr.Ctimensec != 5678
		return
	}

	for name, exp := range map[string][7]bool{
		RelAtime:    {true, true, true, false, true, true, true},
		StrictAtime: {true, true, true, true, true, true, true},
		NoAtime:     {false, false, false, false, false, false, true},
	} {
		m.getBase().conf.AtimeMode = name
		if ret := testFn(name); ret != exp {
			t.Fatalf("Test %s: expected %v, got %v", name, exp, ret)
		}
	}
}

// TestQuotaEdgeCases
func TestQuotaEdgeCases(t *testing.T) {
	m := &baseMeta{}

	m.userQuotas = make(map[uint64]*Quota)
	m.groupQuotas = make(map[uint64]*Quota)
	m.quotaMu = sync.RWMutex{}

	m.fmt = &Format{
		UserGroupQuota: true,
	}

	fileOwnerUid := uint32(1001)
	fileOwnerGid := uint32(2001)
	operatorUid := uint32(1002)
	operatorGid := uint32(2002)

	t.Log("Testing inodes-only quota limit...")
	m.userQuotas[uint64(fileOwnerUid)] = &Quota{MaxSpace: 0, MaxInodes: 3}
	m.groupQuotas[uint64(fileOwnerGid)] = &Quota{MaxSpace: 0, MaxInodes: 5}

	operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}

	if err := m.checkQuota(operatorCtx, 10*1024*1024, 0, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass for large space usage (no space limit), got: %s", err)
	}

	if err := m.checkQuota(operatorCtx, 0, 4, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding inodes limit, got: %s", err)
	}

	t.Log("Testing space-only quota limit...")
	m.userQuotas[uint64(fileOwnerUid)] = &Quota{MaxSpace: 1024 * 1024, MaxInodes: 0}
	m.groupQuotas[uint64(fileOwnerGid)] = &Quota{MaxSpace: 2 * 1024 * 1024, MaxInodes: 0}

	if err := m.checkQuota(operatorCtx, 0, 100, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass for large inodes usage (no inodes limit), got: %s", err)
	}

	if err := m.checkQuota(operatorCtx, 2*1024*1024, 0, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding space limit, got: %s", err)
	}

	t.Log("Testing mixed quota limits...")
	m.userQuotas[uint64(fileOwnerUid)] = &Quota{MaxSpace: 0, MaxInodes: 2}
	m.groupQuotas[uint64(fileOwnerGid)] = &Quota{MaxSpace: 1024 * 1024, MaxInodes: 0}

	if err := m.checkQuota(operatorCtx, 512*1024, 3, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding user inodes limit, got: %s", err)
	}

	if err := m.checkQuota(operatorCtx, 2*1024*1024, 1, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding group space limit, got: %s", err)
	}

	if err := m.checkQuota(operatorCtx, 512*1024, 1, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass when within both limits, got: %s", err)
	}
}

// TestCheckQuotaFileOwner
func TestCheckQuotaFileOwner(t *testing.T) {
	m := &baseMeta{}

	m.userQuotas = make(map[uint64]*Quota)
	m.groupQuotas = make(map[uint64]*Quota)
	m.quotaMu = sync.RWMutex{}

	m.fmt = &Format{
		UserGroupQuota: true,
	}

	fileOwnerUid := uint32(1001)
	fileOwnerGid := uint32(2001)
	operatorUid := uint32(1002)
	operatorGid := uint32(2002)

	m.userQuotas[uint64(fileOwnerUid)] = &Quota{MaxSpace: 1 << 20, MaxInodes: 5}
	m.groupQuotas[uint64(fileOwnerGid)] = &Quota{MaxSpace: 2 << 20, MaxInodes: 10}

	operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}

	if err := m.checkQuota(operatorCtx, 1024, 1, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass for file owner's quota, got: %s", err)
	}

	if err := m.checkQuota(operatorCtx, 2<<20, 1, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding file owner's user quota, got: %s", err)
	}

	if err := m.checkQuota(operatorCtx, 1024, 15, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding file owner's group quota, got: %s", err)
	}

	m.userQuotas[uint64(fileOwnerUid)] = &Quota{MaxSpace: 0, MaxInodes: 0}
	if err := m.checkQuota(operatorCtx, 1, 1, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass when quota is zero (unlimited), got: %s", err)
	}

	delete(m.userQuotas, uint64(fileOwnerUid))
	delete(m.groupQuotas, uint64(fileOwnerGid))
	if err := m.checkQuota(operatorCtx, 1024, 1, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass when no quota limits, got: %s", err)
	}
}

func TestSymlinkCache(t *testing.T) {
	cache := newSymlinkCache(10000)

	job := make(chan Ino)
	var wg sync.WaitGroup
	for i := 0; i < 10; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for ino := range job {
				cache.Store(ino, []byte(fmt.Sprintf("file%d", ino)))
			}
		}()
	}

	for i := 0; i < 10000; i++ {
		job <- Ino(i)
	}
	close(job)
	wg.Wait()

	cache.doClean()
	require.Equal(t, int32(8000), cache.size.Load())
}

func TestTxBatchLock(t *testing.T) {
	var base baseMeta
	// 0 inode
	func() {
		defer base.txBatchLock()()
	}()
	// 1 inodes
	func() {
		defer base.txBatchLock(2)()
	}()
	// 2 inodes
	func() {
		defer base.txBatchLock(1, 2)()
	}()
	// no reentrant
	func() {
		defer base.txBatchLock(1, 1, nlocks+1)()
	}()
	// no deadlock - sequential
	func() {
		batch1 := []Ino{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
		batch2 := []Ino{1 + nlocks*9, 2 + nlocks*8, 3 + nlocks*7, 4 + nlocks*6, 5 + nlocks*5, 6 + nlocks*4, 7 + nlocks*3, 8 + nlocks*2, 9 + nlocks, 10}
		var wg sync.WaitGroup
		for i := 0; i < 100; i++ {
			wg.Add(2)
			go func() {
				defer wg.Done()
				defer base.txBatchLock(batch1...)()
			}()
			go func() {
				defer wg.Done()
				defer base.txBatchLock(batch2...)()
			}()
		}
		wg.Wait()
	}()
	// no deadlock - fuzz testing
	func() {
		var batch1, batch2 []Ino
		for i := 0; i < 100; i++ {
			batch1 = append(batch1, Ino(rand.Uint64()+1))
			batch2 = append(batch2, Ino(rand.Uint64()+1))
		}
		var wg sync.WaitGroup
		for i := 0; i < 100; i++ {
			wg.Add(2)
			go func() {
				defer wg.Done()
				defer base.txBatchLock(batch1...)()
			}()
			go func() {
				defer wg.Done()
				defer base.txBatchLock(batch2...)()
			}()
		}
		wg.Wait()
	}()
}

// testCheckQuotaFileOwnerSimple
func testCheckQuotaFileOwnerSimple(t *testing.T, m Meta) {
	ctx := Background()
	parent := RootInode

	fileOwnerUid := uint32(1001)
	fileOwnerGid := uint32(1001)
	operatorUid := uint32(1002)
	operatorGid := uint32(1002)

	format := m.getBase().getFormat()
	format.UserGroupQuota = true

	if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 4096, MaxInodes: 5}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set user quota: %s", err)
	}
	m.getBase().loadQuotas()

	var fileInode Ino
	var attr Attr
	if st := m.Create(ctx, parent, "testfile", 0644, 0, 0, &fileInode, &attr); st != 0 {
		t.Fatalf("Create testfile: %s", st)
	}
	if st := m.SetAttr(ctx, fileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
		t.Fatalf("SetAttr UID and GID: %s", st)
	}

	var sliceId uint64
	if st := m.NewSlice(ctx, &sliceId); st != 0 {
		t.Fatalf("NewSlice: %s", st)
	}
	slice := Slice{Id: sliceId, Size: 4096, Len: 4096}
	operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}

	if st := m.Write(operatorCtx, fileInode, 0, 0, slice, time.Now()); st != 0 {
		t.Fatalf("First write should succeed: %s", st)
	}

	var sliceId2 uint64
	if st := m.NewSlice(ctx, &sliceId2); st != 0 {
		t.Fatalf("NewSlice for second write: %s", st)
	}
	slice2 := Slice{Id: sliceId2, Size: 4096, Len: 4096}
	if st := m.Write(operatorCtx, fileInode, 1, 0, slice2, time.Now()); st != syscall.EDQUOT {
		t.Fatalf("Second write should fail with EDQUOT, got: %s", st)
	}

	m.CloseSession()
}

// testQuotaEdgeCases
func testQuotaEdgeCases(t *testing.T, m Meta) {
	ctx := Background()

	fileOwnerUid := uint32(1001)
	fileOwnerGid := uint32(1001)
	operatorUid := uint32(1002)
	operatorGid := uint32(2002)

	format := m.getBase().getFormat()
	format.UserGroupQuota = true

	t.Log("Testing inodes-only quota limit...")
	if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 0, MaxInodes: 2}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set inodes-only quota: %s", err)
	}
	m.getBase().loadQuotas()

	operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}

	if err := m.getBase().checkQuota(operatorCtx, 10*1024*1024, 0, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass for large space usage (no space limit), got: %s", err)
	}

	if err := m.getBase().checkQuota(operatorCtx, 0, 3, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding inodes limit, got: %s", err)
	}

	t.Log("Testing space-only quota limit...")
	if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1024 * 1024, MaxInodes: 0}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set space-only quota: %s", err)
	}
	m.getBase().loadQuotas()

	if err := m.getBase().checkQuota(operatorCtx, 0, 100, fileOwnerUid, fileOwnerGid); err != 0 {
		t.Fatalf("checkQuota should pass for large inodes usage (no inodes limit), got: %s", err)
	}

	if err := m.getBase().checkQuota(operatorCtx, 2*1024*1024, 0, fileOwnerUid, fileOwnerGid); err != syscall.EDQUOT {
		t.Fatalf("checkQuota should fail with EDQUOT when exceeding space limit, got: %s", err)
	}
}

// testQuotaEdgeCasesComplex
func testQuotaEdgeCasesComplex(t *testing.T, m Meta) {
	ctx := Background()
	parent := RootInode

	fileOwnerUid := uint32(1001)
	fileOwnerGid := uint32(1001)
	operatorUid := uint32(1002)
	operatorGid := uint32(1002)

	format := m.getBase().getFormat()
	format.UserGroupQuota = true

	t.Log("Testing inodes-only quota limit...")
	if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 0, MaxInodes: 2}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set inodes-only quota: %s", err)
	}
	m.getBase().loadQuotas()

	var fileInode Ino
	var attr Attr
	if st := m.Create(ctx, parent, "testfile_inodes", 0644, 0, 0, &fileInode, &attr); st != 0 {
		t.Fatalf("Create testfile_inodes: %s", st)
	}
	if st := m.SetAttr(ctx, fileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
		t.Fatalf("SetAttr UID and GID: %s", st)
	}

	operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
	for i := 0; i < 5; i++ {
		var sliceId uint64
		if st := m.NewSlice(ctx, &sliceId); st != 0 {
			t.Fatalf("NewSlice %d: %s", i, st)
		}
		slice := Slice{Id: sliceId, Size: 1024 * 1024, Len: 1024 * 1024}
		if st := m.Write(operatorCtx, fileInode, uint32(i), uint32(i*1024*1024), slice, time.Now()); st != 0 {
			t.Fatalf("Write %d should succeed (no space limit), got: %s", i, st)
		}
	}

	var newFileInode Ino
	if st := m.Create(ctx, parent, "testfile_inodes2", 0644, 0, 0, &newFileInode, &attr); st != syscall.EDQUOT {
		t.Fatalf("Create should fail with EDQUOT (inodes limit exceeded), got: %s", st)
	}

	t.Log("Testing space-only quota limit...")
	if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1024 * 1024, MaxInodes: 0}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set space-only quota: %s", err)
	}
	m.getBase().loadQuotas()

	if st := m.Create(ctx, parent, "testfile_space", 0644, 0, 0, &fileInode, &attr); st != 0 {
		t.Fatalf("Create testfile_space: %s", st)
	}
	if st := m.SetAttr(ctx, fileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
		t.Fatalf("SetAttr UID and GID: %s", st)
	}

	for i := 0; i < 10; i++ {
		var newFileInode Ino
		if st := m.Create(ctx, parent, fmt.Sprintf("testfile_space_%d", i), 0644, 0, 0, &newFileInode, &attr); st != 0 {
			t.Fatalf("Create file %d should succeed (no inodes limit), got: %s", i, st)
		}
		if st := m.SetAttr(ctx, newFileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for file %d: %s", i, st)
		}
	}

	var sliceId uint64
	if st := m.NewSlice(ctx, &sliceId); st != 0 {
		t.Fatalf("NewSlice for space test: %s", st)
	}
	slice := Slice{Id: sliceId, Size: 2 * 1024 * 1024, Len: 2 * 1024 * 1024}
	if st := m.Write(operatorCtx, fileInode, 0, 0, slice, time.Now()); st != syscall.EDQUOT {
		t.Fatalf("Write should fail with EDQUOT (space limit exceeded), got: %s", st)
	}
}

func testCheckQuotaFileOwner(t *testing.T, m Meta) {
	if err := m.NewSession(true); err != nil {
		t.Fatalf("New session: %s", err)
	}
	defer m.CloseSession()
	ctx := Background()
	var parent Ino
	var attr Attr

	if st := m.Mkdir(ctx, RootInode, "checkquota", 0755, 0, 0, &parent, &attr); st != 0 {
		t.Fatalf("Mkdir checkquota: %s", st)
	}

	fileOwnerUid := uint32(1001)
	fileOwnerGid := uint32(2001)
	operatorUid := uint32(1002)
	operatorGid := uint32(2002)

	t.Run("FileOwnerQuotaCheck", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1 << 20, MaxInodes: 5}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota set user quota for file owner uid %d: %s", fileOwnerUid, err)
		}
		if err := m.HandleQuota(ctx, QuotaSet, "", 0, fileOwnerGid, map[string]*Quota{fmt.Sprintf("gid:%d", fileOwnerGid): {MaxSpace: 2 << 20, MaxInodes: 10}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota set group quota for file owner gid %d: %s", fileOwnerGid, err)
		}
		m.getBase().loadQuotas()

		var fileInode Ino
		if st := m.Create(ctx, parent, "ownerfile", 0644, 0, 0, &fileInode, &attr); st != 0 {
			t.Fatalf("Create ownerfile: %s", st)
		}
		if st := m.SetAttr(ctx, fileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for ownerfile: %s", st)
		}

		var checkAttr Attr
		if st := m.GetAttr(ctx, fileInode, &checkAttr); st != 0 {
			t.Fatalf("GetAttr for ownerfile: %s", st)
		}
		if checkAttr.Uid != fileOwnerUid || checkAttr.Gid != fileOwnerGid {
			t.Fatalf("File owner not set correctly: expected uid=%d gid=%d, got uid=%d gid=%d",
				fileOwnerUid, fileOwnerGid, checkAttr.Uid, checkAttr.Gid)
		}

		var sliceId uint64
		if st := m.NewSlice(ctx, &sliceId); st != 0 {
			t.Fatalf("NewSlice: %s", st)
		}
		testSlice := Slice{Id: sliceId, Size: 1024, Len: 1024}

		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		if st := m.Write(operatorCtx, fileInode, 0, 0, testSlice, time.Now()); st != 0 {
			t.Fatalf("Write to ownerfile by different user: %s", st)
		}

		qs := make(map[string]*Quota)
		if err := m.HandleQuota(ctx, QuotaGet, "", fileOwnerUid, 0, qs, false, false, false); err != nil {
			t.Fatalf("HandleQuota get user quota: %s", err)
		}
		if q := qs[fmt.Sprintf("uid:%d", fileOwnerUid)]; q.UsedSpace < 1024 {
			t.Fatalf("User quota used space should be >= 1024, got %d", q.UsedSpace)
		}

		qs = make(map[string]*Quota)
		if err := m.HandleQuota(ctx, QuotaGet, "", 0, fileOwnerGid, qs, false, false, false); err != nil {
			t.Fatalf("HandleQuota get group quota: %s", err)
		}
		if q := qs[fmt.Sprintf("gid:%d", fileOwnerGid)]; q.UsedSpace < 1024 {
			t.Fatalf("Group quota used space should be >= 1024, got %d", q.UsedSpace)
		}
	})

	t.Run("QuotaExceededByFileOwner", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1024, MaxInodes: 2}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota set strict user quota: %s", err)
		}
		m.getBase().loadQuotas()

		var newFileInode Ino
		if st := m.Create(ctx, parent, "strictfile", 0644, 0, 0, &newFileInode, &attr); st != 0 {
			t.Fatalf("Create strictfile: %s", st)
		}
		if st := m.SetAttr(ctx, newFileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for strictfile: %s", st)
		}

		var smallSliceId uint64
		if st := m.NewSlice(ctx, &smallSliceId); st != 0 {
			t.Fatalf("NewSlice for small data: %s", st)
		}
		smallSlice := Slice{Id: smallSliceId, Size: 512, Len: 512}
		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		if st := m.Write(operatorCtx, newFileInode, 0, 0, smallSlice, time.Now()); st != 0 {
			t.Fatalf("Write small data: %s", st)
		}

		var largeSliceId uint64
		if st := m.NewSlice(ctx, &largeSliceId); st != 0 {
			t.Fatalf("NewSlice for large data: %s", st)
		}
		largeSlice := Slice{Id: largeSliceId, Size: 1024, Len: 1024}
		if st := m.Write(operatorCtx, newFileInode, 0, 512, largeSlice, time.Now()); st != syscall.EDQUOT {
			t.Fatalf("Write should fail with EDQUOT when exceeding file owner's quota, got: %s", st)
		}
	})

	t.Run("TruncateQuotaCheck", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1 << 20, MaxInodes: 10}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota reset user quota: %s", err)
		}
		m.getBase().loadQuotas()

		var truncFileInode Ino
		if st := m.Create(ctx, parent, "truncfile", 0644, 0, 0, &truncFileInode, &attr); st != 0 {
			t.Fatalf("Create truncfile: %s", st)
		}
		if st := m.SetAttr(ctx, truncFileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for truncfile: %s", st)
		}

		var initialSliceId uint64
		if st := m.NewSlice(ctx, &initialSliceId); st != 0 {
			t.Fatalf("NewSlice for initial data: %s", st)
		}
		initialSlice := Slice{Id: initialSliceId, Size: 512, Len: 512}
		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		if st := m.Write(operatorCtx, truncFileInode, 0, 0, initialSlice, time.Now()); st != 0 {
			t.Fatalf("Initial write to truncfile: %s", st)
		}

		fileOwnerCtx := &testContext{Context: context.Background(), uid: fileOwnerUid, gid: fileOwnerGid}
		if st := m.Truncate(fileOwnerCtx, truncFileInode, 0, 1024, &attr, false); st != 0 {
			t.Fatalf("Truncate truncfile by file owner: %s", st)
		}

		if attr.Length != 1024 {
			t.Fatalf("Truncate failed: expected length 1024, got %d", attr.Length)
		}
	})

	t.Run("MknodQuotaCheck", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1 << 20, MaxInodes: 10}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota reset user quota: %s", err)
		}
		m.getBase().loadQuotas()

		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		var deviceInode Ino
		if st := m.Mknod(operatorCtx, parent, "device", TypeFile, 0644, 0, 0, "", &deviceInode, &attr); st != 0 {
			t.Fatalf("Mknod device by operator: %s", st)
		}

		if attr.Uid != operatorUid || attr.Gid != operatorGid {
			t.Fatalf("Mknod file owner should be operator: expected uid=%d gid=%d, got uid=%d gid=%d",
				operatorUid, operatorGid, attr.Uid, attr.Gid)
		}

		m.Unlink(ctx, parent, "device")
	})

	t.Run("CloneQuotaCheck", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1 << 20, MaxInodes: 10}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota reset user quota: %s", err)
		}
		m.getBase().loadQuotas()

		var srcInode Ino
		if st := m.Create(ctx, parent, "srcfile", 0644, 0, 0, &srcInode, &attr); st != 0 {
			t.Fatalf("Create srcfile: %s", st)
		}
		if st := m.SetAttr(ctx, srcInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for srcfile: %s", st)
		}

		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		var count, total uint64
		if st := m.Clone(operatorCtx, parent, srcInode, parent, "clonefile", 0, 0, 4, &count, &total); st != 0 {
			t.Fatalf("Clone srcfile by operator: %s", st)
		}

		var cloneInode Ino
		var cloneAttr Attr
		if st := m.Lookup(ctx, parent, "clonefile", &cloneInode, &cloneAttr, false); st != 0 {
			t.Fatalf("Lookup clonefile: %s", st)
		}
		if cloneAttr.Uid != operatorUid || cloneAttr.Gid != operatorGid {
			t.Fatalf("Clone file owner should be operator: expected uid=%d gid=%d, got uid=%d gid=%d",
				operatorUid, operatorGid, cloneAttr.Uid, cloneAttr.Gid)
		}

		m.Unlink(ctx, parent, "srcfile")
		m.Unlink(ctx, parent, "clonefile")
	})

	t.Run("CrossUserOperations", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 1 << 20, MaxInodes: 10}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota set file owner quota: %s", err)
		}
		if err := m.HandleQuota(ctx, QuotaSet, "", operatorUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", operatorUid): {MaxSpace: 512, MaxInodes: 2}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota set operator quota: %s", err)
		}
		m.getBase().loadQuotas()

		var crossFileInode Ino
		if st := m.Create(ctx, parent, "crossfile", 0644, 0, 0, &crossFileInode, &attr); st != 0 {
			t.Fatalf("Create crossfile: %s", st)
		}
		if st := m.SetAttr(ctx, crossFileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for crossfile: %s", st)
		}

		var crossSliceId uint64
		if st := m.NewSlice(ctx, &crossSliceId); st != 0 {
			t.Fatalf("NewSlice for cross data: %s", st)
		}
		crossSlice := Slice{Id: crossSliceId, Size: 1024, Len: 1024}
		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		if st := m.Write(operatorCtx, crossFileInode, 0, 0, crossSlice, time.Now()); st != 0 {
			t.Fatalf("Write to crossfile by operator: %s", st)
		}

		qs := make(map[string]*Quota)
		if err := m.HandleQuota(ctx, QuotaGet, "", fileOwnerUid, 0, qs, false, false, false); err != nil {
			t.Fatalf("HandleQuota get file owner quota: %s", err)
		}
		if q := qs[fmt.Sprintf("uid:%d", fileOwnerUid)]; q.UsedSpace < 1024 {
			t.Fatalf("File owner quota should be used: expected >= 1024, got %d", q.UsedSpace)
		}

		qs = make(map[string]*Quota)
		if err := m.HandleQuota(ctx, QuotaGet, "", operatorUid, 0, qs, false, false, false); err != nil {
			t.Fatalf("HandleQuota get operator quota: %s", err)
		}
		if q := qs[fmt.Sprintf("uid:%d", operatorUid)]; q.UsedSpace > 0 {
			t.Fatalf("Operator quota should not be used for file owner's file: got %d", q.UsedSpace)
		}

		m.Unlink(ctx, parent, "crossfile")
	})

	t.Run("EdgeCases", func(t *testing.T) {
		if err := m.HandleQuota(ctx, QuotaSet, "", fileOwnerUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", fileOwnerUid): {MaxSpace: 0, MaxInodes: 0}}, false, false, false); err != nil {
			t.Fatalf("HandleQuota set zero quota: %s", err)
		}
		m.getBase().loadQuotas()

		var edgeFileInode Ino
		if st := m.Create(ctx, parent, "edgefile", 0644, 0, 0, &edgeFileInode, &attr); st != 0 {
			t.Fatalf("Create edgefile: %s", st)
		}
		if st := m.SetAttr(ctx, edgeFileInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: fileOwnerUid, Gid: fileOwnerGid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for edgefile: %s", st)
		}

		var edgeSliceId uint64
		if st := m.NewSlice(ctx, &edgeSliceId); st != 0 {
			t.Fatalf("NewSlice for edge data: %s", st)
		}
		edgeSlice := Slice{Id: edgeSliceId, Size: 1, Len: 1}
		operatorCtx := &testContext{Context: context.Background(), uid: operatorUid, gid: operatorGid}
		if st := m.Write(operatorCtx, edgeFileInode, 0, 0, edgeSlice, time.Now()); st != syscall.EDQUOT {
			t.Fatalf("Write should fail with EDQUOT when quota is zero, got: %s", st)
		}

		m.Unlink(ctx, parent, "edgefile")
	})

	m.Unlink(ctx, parent, "ownerfile")
	m.Unlink(ctx, parent, "strictfile")
	m.Unlink(ctx, parent, "truncfile")
}

// testContext
type testContext struct {
	context.Context
	uid uint32
	gid uint32
}

func (c *testContext) Uid() uint32                        { return c.uid }
func (c *testContext) Gid() uint32                        { return c.gid }
func (c *testContext) Gids() []uint32                     { return []uint32{c.gid} }
func (c *testContext) Pid() uint32                        { return 0 }
func (c *testContext) WithValue(k, v interface{}) Context { return c }
func (c *testContext) Cancel()                            {}
func (c *testContext) Canceled() bool                     { return false }
func (c *testContext) CheckPermission() bool              { return true }

func cleanupQuotaTest(ctx Context, m Meta, parent Ino, uid, gid uint32) {
	for i := 0; i < 3; i++ {
		filename := fmt.Sprintf("testfile%d", i)
		m.Unlink(ctx, parent, filename)
	}
	for i := 0; i < 2; i++ {
		filename := fmt.Sprintf("writefile%d", i)
		m.Unlink(ctx, parent, filename)
	}
	for i := 0; i < 4; i++ {
		filename := fmt.Sprintf("groupfile%d", i)
		m.Unlink(ctx, parent, filename)
	}

	m.Unlink(ctx, parent, "userfile")
	m.Unlink(ctx, parent, "groupfile")
	m.Unlink(ctx, parent, "hardlink")
	m.Rmdir(ctx, RootInode, "ugquota")

	m.HandleQuota(ctx, QuotaDel, "", uid, 0, nil, false, false, false)
	m.HandleQuota(ctx, QuotaDel, "", 0, gid, nil, false, false, false)
	m.HandleQuota(ctx, QuotaDel, "/path1", uid, 0, nil, false, false, false)
	m.HandleQuota(ctx, QuotaDel, "/path2", 0, gid, nil, false, false, false)
	for i := 0; i < 5; i++ {
		testUid := uint32(3000 + i)
		m.HandleQuota(ctx, QuotaDel, "", testUid, 0, nil, false, false, false)
	}
}

func testBasicQuotaOperations(t *testing.T, m Meta, ctx Context, uid, gid uint32) {
	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 1 << 30, MaxInodes: 10}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set user quota for uid %d: %s", uid, err)
	}
	m.getBase().loadQuotas()

	if err := m.HandleQuota(ctx, QuotaSet, "", 0, gid, map[string]*Quota{fmt.Sprintf("gid:%d", gid): {MaxSpace: 2 << 30, MaxInodes: 20}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set group quota for gid %d: %s", gid, err)
	}
	m.getBase().loadQuotas()

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get user quota for uid %d: %s", uid, err)
	} else if q := qs[fmt.Sprintf("uid:%d", uid)]; q.MaxSpace != 1<<30 || q.MaxInodes != 10 {
		t.Fatalf("HandleQuota get user quota for uid %d: bad result %+v", uid, q)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", 0, gid, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get group quota for gid %d: %s", gid, err)
	} else if q := qs[fmt.Sprintf("gid:%d", gid)]; q.MaxSpace != 2<<30 || q.MaxInodes != 20 {
		t.Fatalf("HandleQuota get group quota for gid %d: bad result %+v", gid, q)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaList, "", 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota list: %s", err)
	} else {
		if len(qs) < 2 {
			t.Fatalf("HandleQuota list bad result: expected at least 2, got %d", len(qs))
		}
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", uid, 0, nil, false, false, false); err != nil {
		t.Fatalf("HandleQuota del user quota for uid %d: %s", uid, err)
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", 0, gid, nil, false, false, false); err != nil {
		t.Fatalf("HandleQuota del group quota for gid %d: %s", gid, err)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaList, "", 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota list after deletion: %s", err)
	}

	m.getBase().loadQuotas()
}

func testQuotaFileOperations(t *testing.T, m Meta, ctx Context, parent Ino, uid, gid uint32) {
	var userInode Ino
	var attr Attr
	if st := m.Create(ctx, parent, "userfile", 0644, 0, 0, &userInode, &attr); st != 0 {
		t.Fatalf("Create ugquota/userfile: %s", st)
	}
	if st := m.SetAttr(ctx, userInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
		t.Fatalf("SetAttr UID and GID for userfile: %s", st)
	}

	var checkAttr Attr
	if st := m.GetAttr(ctx, userInode, &checkAttr); st != 0 {
		t.Fatalf("GetAttr for userfile: %s", st)
	}
	if checkAttr.Uid != uid {
		t.Fatalf("SetAttr UID failed: expected %d, got %d", uid, checkAttr.Uid)
	}
	if checkAttr.Gid != gid {
		t.Fatalf("SetAttr GID failed: expected %d, got %d", gid, checkAttr.Gid)
	}

	var groupInode Ino
	if st := m.Create(ctx, parent, "groupfile", 0644, 0, 0, &groupInode, &attr); st != 0 {
		t.Fatalf("Create ugquota/groupfile: %s", st)
	}
	if st := m.SetAttr(ctx, groupInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
		t.Fatalf("SetAttr UID and GID for groupfile: %s", st)
	}

	m.FlushSession()
	time.Sleep(time.Second * 2)

	if err := m.HandleQuota(ctx, QuotaDel, "", uid, 0, nil, false, false, false); err != nil {
		t.Logf("HandleQuota delete user quota (may not exist): %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 1 << 30, MaxInodes: 10}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set user quota for uid %d: %s", uid, err)
	}

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get user quota after file creation: %s", err)
	} else if q := qs[fmt.Sprintf("uid:%d", uid)]; q.UsedInodes < 1 {
		t.Fatalf("HandleQuota get user quota: used inodes should be >= 1, got %d", q.UsedInodes)
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", 0, gid, nil, false, false, false); err != nil {
		t.Logf("HandleQuota delete group quota (may not exist): %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaSet, "", 0, gid, map[string]*Quota{fmt.Sprintf("gid:%d", gid): {MaxSpace: 2 << 30, MaxInodes: 20}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set group quota for gid %d: %s", gid, err)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get user quota after file creation: %s", err)
	} else if q := qs[fmt.Sprintf("uid:%d", uid)]; q.UsedInodes < 1 {
		t.Fatalf("HandleQuota get user quota: used inodes should be >= 1, got %d", q.UsedInodes)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", 0, gid, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get group quota after file creation: %s", err)
	} else if q := qs[fmt.Sprintf("gid:%d", gid)]; q.UsedInodes < 1 {
		t.Fatalf("HandleQuota get group quota: used inodes should be >= 1, got %d", q.UsedInodes)
	}

	m.getBase().doFlushQuotas()
}

func testQuotaErrorCases(t *testing.T, m Meta, ctx Context, uid, gid uint32) {
	if err := m.HandleQuota(ctx, QuotaSet, "", 0, 0, map[string]*Quota{"": {MaxSpace: 1 << 30, MaxInodes: 10}}, false, false, false); err == nil {
		t.Fatalf("HandleQuota should fail for invalid quota type (no path, uid, or gid)")
	}

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, 99, "", uid, 0, qs, false, false, false); err == nil {
		t.Fatalf("HandleQuota should fail for invalid command")
	}

	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 0, MaxInodes: 10}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set user quota with MaxSpace=0: %s", err)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "", 0, gid, map[string]*Quota{fmt.Sprintf("gid:%d", gid): {MaxSpace: 1 << 30, MaxInodes: 0}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set group quota with MaxInodes=0: %s", err)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 1 << 62, MaxInodes: 1 << 30}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set user quota with large values: %s", err)
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", 9999, 0, nil, false, false, false); err != nil {
		t.Fatalf("HandleQuota del non-existent user quota should not fail: %s", err)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", 9999, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get non-existent user quota should not fail: %s", err)
	}
}

func testQuotaConcurrentOperations(t *testing.T, m Meta, ctx Context) {
	var wg sync.WaitGroup

	for i := 0; i < 5; i++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			testUid := uint32(3000 + id)
			err := m.HandleQuota(ctx, QuotaSet, "", testUid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", testUid): {MaxSpace: 1 << 20, MaxInodes: 5}}, false, false, false)
			if err != nil {
				t.Errorf("Concurrent HandleQuota set user quota for uid %d: %s", testUid, err)
			}
		}(i)
	}
	wg.Wait()

	for i := 0; i < 5; i++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			testUid := uint32(3000 + id)
			qs := make(map[string]*Quota)
			err := m.HandleQuota(ctx, QuotaGet, "", testUid, 0, qs, false, false, false)
			if err != nil {
				t.Errorf("Concurrent HandleQuota get user quota for uid %d: %s", testUid, err)
			}
		}(i)
	}
	wg.Wait()
}

func testQuotaMixedTypes(t *testing.T, m Meta, ctx Context, uid, gid uint32) {
	var attr Attr

	var path1Inode Ino
	if st := m.Mkdir(ctx, RootInode, "path1", 0755, 0, 0, &path1Inode, &attr); st != 0 {
		t.Fatalf("Mkdir path1: %s", st)
	}

	var path2Inode Ino
	if st := m.Mkdir(ctx, RootInode, "path2", 0755, 0, 0, &path2Inode, &attr); st != 0 {
		t.Fatalf("Mkdir path2: %s", st)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "/path1", uid, 0, map[string]*Quota{"/path1": {MaxSpace: 100 << 20, MaxInodes: 20}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set path quota for uid %d: %s", uid, err)
	}

	if err := m.HandleQuota(ctx, QuotaSet, "/path2", 0, gid, map[string]*Quota{"/path2": {MaxSpace: 200 << 20, MaxInodes: 30}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set path quota for gid %d: %s", gid, err)
	}

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaList, "", 0, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota list mixed quota types: %s", err)
	}
	if len(qs) < 4 {
		t.Fatalf("HandleQuota list mixed quota types: expected at least 4, got %d", len(qs))
	}
}

func testQuotaUsageStatistics(t *testing.T, m Meta, ctx Context, parent Ino, uid, gid uint32) {
	var attr Attr

	for i := 0; i < 3; i++ {
		filename := fmt.Sprintf("testfile%d", i)
		var testInode Ino
		if st := m.Create(ctx, parent, filename, 0644, 0, 0, &testInode, &attr); st != 0 {
			t.Fatalf("Create %s: %s", filename, st)
		}
		if st := m.SetAttr(ctx, testInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for %s: %s", filename, st)
		}
	}

	for i := 0; i < 4; i++ {
		filename := fmt.Sprintf("groupfile%d", i)
		var groupTestInode Ino
		if st := m.Create(ctx, parent, filename, 0644, 0, 0, &groupTestInode, &attr); st != 0 {
			t.Fatalf("Create %s: %s", filename, st)
		}
		if st := m.SetAttr(ctx, groupTestInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for %s: %s", filename, st)
		}
	}

	// Set parent directory attributes to be included in quotas
	if st := m.SetAttr(ctx, parent, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
		t.Fatalf("SetAttr UID and GID for parent directory: %s", st)
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", uid, 0, nil, false, false, false); err != nil {
		t.Logf("HandleQuota delete user quota (may not exist): %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 1 << 30, MaxInodes: 10}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set user quota for uid %d: %s", uid, err)
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", 0, gid, nil, false, false, false); err != nil {
		t.Logf("HandleQuota delete group quota (may not exist): %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaSet, "", 0, gid, map[string]*Quota{fmt.Sprintf("gid:%d", gid): {MaxSpace: 2 << 30, MaxInodes: 20}}, false, false, false); err != nil {
		t.Fatalf("HandleQuota set group quota for gid %d: %s", gid, err)
	}

	time.Sleep(time.Second * 2)

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, 0, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get user quota for usage verification: %s", err)
	} else if q := qs[fmt.Sprintf("uid:%d", uid)]; q.UsedInodes < 4 {
		t.Fatalf("HandleQuota user quota usage: expected >= 4 inodes, got %d", q.UsedInodes)
	}

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", 0, gid, qs, false, false, false); err != nil {
		t.Fatalf("HandleQuota get group quota for usage verification: %s", err)
	} else if q := qs[fmt.Sprintf("gid:%d", gid)]; q.UsedInodes < 5 {
		t.Fatalf("HandleQuota group quota usage: expected >= 5 inodes, got %d", q.UsedInodes)
	}
}

func testUserGroupQuota(t *testing.T, m Meta) {
	if err := m.NewSession(true); err != nil {
		t.Fatalf("New session: %s", err)
	}
	defer m.CloseSession()
	ctx := Background()
	var parent Ino
	var attr Attr

	if st := m.Mkdir(ctx, RootInode, "ugquota", 0755, 0, 0, &parent, &attr); st != 0 {
		t.Fatalf("Mkdir ugquota: %s", st)
	}

	uid := uint32(1001)
	gid := uint32(2001)

	t.Run("BasicQuotaOperations", func(t *testing.T) {
		testBasicQuotaOperations(t, m, ctx, uid, gid)
	})

	t.Run("QuotaFileOperations", func(t *testing.T) {
		testQuotaFileOperations(t, m, ctx, parent, uid, gid)
	})

	t.Run("QuotaErrorCases", func(t *testing.T) {
		testQuotaErrorCases(t, m, ctx, uid, gid)
	})

	t.Run("QuotaConcurrentOperations", func(t *testing.T) {
		testQuotaConcurrentOperations(t, m, ctx)
	})

	t.Run("QuotaMixedTypes", func(t *testing.T) {
		testQuotaMixedTypes(t, m, ctx, uid, gid)
	})

	t.Run("QuotaUsageStatistics", func(t *testing.T) {
		testQuotaUsageStatistics(t, m, ctx, parent, uid, gid)
	})

	t.Run("CheckQuotaFileOwner", func(t *testing.T) {
		testCheckQuotaFileOwnerSimple(t, m)
	})

	t.Run("QuotaEdgeCases", func(t *testing.T) {
		testQuotaEdgeCases(t, m)
	})

	t.Run("HardlinkQuota", func(t *testing.T) {
		testHardlinkQuota(t, m, ctx, parent, uid, gid)
	})

	t.Run("BatchUnlinkWithUserGroupQuota", func(t *testing.T) {
		testBatchUnlinkWithUserGroupQuota(t, m, ctx, parent, uid, gid)
	})

	cleanupQuotaTest(ctx, m, parent, uid, gid)

}

func testHardlinkQuota(t *testing.T, m Meta, ctx Context, parent Ino, uid, gid uint32) {
	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 100 << 20, MaxInodes: 100}}, false, false, false); err != nil {
		t.Fatalf("Set user quota: %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaSet, "", 0, gid, map[string]*Quota{fmt.Sprintf("gid:%d", gid): {MaxSpace: 100 << 20, MaxInodes: 100}}, false, false, false); err != nil {
		t.Fatalf("Set group quota: %s", err)
	}

	var parentPath string
	if parent == RootInode {
		parentPath = "/"
	} else {
		parentPath = "/ugquota"
	}

	if err := m.HandleQuota(ctx, QuotaSet, parentPath, 0, 0, map[string]*Quota{parentPath: {MaxSpace: 200 << 20, MaxInodes: 200}}, false, false, false); err != nil {
		t.Fatalf("Set directory quota for %s: %s", parentPath, err)
	}

	m.getBase().loadQuotas()

	var originalFile Ino
	var attr Attr
	fileSize := uint64(8192) // 8KB 文件
	if st := m.Create(ctx, parent, "test_original_file", 0644, 0, 0, &originalFile, &attr); st != 0 {
		t.Fatalf("Create original file: %s", st)
	}
	if st := m.SetAttr(ctx, originalFile, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
		t.Fatalf("SetAttr UID and GID for original file: %s", st)
	}

	var sliceId uint64
	if st := m.NewSlice(ctx, &sliceId); st != 0 {
		t.Fatalf("NewSlice: %s", st)
	}
	slice := Slice{Id: sliceId, Size: uint32(fileSize), Len: uint32(fileSize)}
	if st := m.Write(ctx, originalFile, 0, 0, slice, time.Now()); st != 0 {
		t.Fatalf("Write data to original file: %s", st)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(100 * time.Millisecond)

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after file creation: %s", err)
	}
	ugQuotaAfterFile := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfterFile == nil {
		t.Fatalf("User group quota not found after file creation")
	}

	dirQs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, parentPath, 0, 0, dirQs, false, false, false); err != nil {
		t.Fatalf("Get directory quota after file creation: %s", err)
	}
	dirQuotaAfterFile := dirQs[parentPath]
	if dirQuotaAfterFile == nil {
		t.Fatalf("Directory quota not found after file creation")
	}

	if st := m.Link(ctx, originalFile, parent, "test_hardlink_file", &attr); st != 0 {
		t.Fatalf("Create hardlink: %s", st)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(100 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after hardlink creation: %s", err)
	}
	ugQuotaAfterHardlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfterHardlink == nil {
		t.Fatalf("User group quota not found after hardlink creation")
	}

	dirQs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, parentPath, 0, 0, dirQs, false, false, false); err != nil {
		t.Fatalf("Get directory quota after hardlink creation: %s", err)
	}
	dirQuotaAfterHardlink := dirQs[parentPath]
	if dirQuotaAfterHardlink == nil {
		t.Fatalf("Directory quota not found after hardlink creation")
	}
	// After the new strategy, creating a hardlink does not increase user/group quota
	// because hardlink only creates a new directory entry, not a new file
	expectedSpaceIncrease := int64(0)
	expectedInodeIncrease := int64(0)

	actualSpaceIncrease := ugQuotaAfterHardlink.UsedSpace - ugQuotaAfterFile.UsedSpace
	actualInodeIncrease := ugQuotaAfterHardlink.UsedInodes - ugQuotaAfterFile.UsedInodes

	if actualSpaceIncrease != expectedSpaceIncrease {
		t.Fatalf("UG quota space increase mismatch: expected %d, got %d", expectedSpaceIncrease, actualSpaceIncrease)
	}
	if actualInodeIncrease != expectedInodeIncrease {
		t.Fatalf("UG quota inode increase mismatch: expected %d, got %d", expectedInodeIncrease, actualInodeIncrease)
	}

	dirExpectedSpaceIncrease := int64(8192)
	dirExpectedInodeIncrease := int64(1)

	dirActualSpaceIncrease := dirQuotaAfterHardlink.UsedSpace - dirQuotaAfterFile.UsedSpace
	dirActualInodeIncrease := dirQuotaAfterHardlink.UsedInodes - dirQuotaAfterFile.UsedInodes

	if dirActualSpaceIncrease != dirExpectedSpaceIncrease {
		t.Fatalf("Directory quota space increase mismatch: expected %d, got %d", dirExpectedSpaceIncrease, dirActualSpaceIncrease)
	}
	if dirActualInodeIncrease != dirExpectedInodeIncrease {
		t.Fatalf("Directory quota inode increase mismatch: expected %d, got %d", dirExpectedInodeIncrease, dirActualInodeIncrease)
	}

	if st := m.Unlink(ctx, parent, "test_hardlink_file", true); st != 0 {
		t.Fatalf("Unlink hardlink: %s", st)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(100 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after hardlink deletion: %s", err)
	}
	ugQuotaAfterUnlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfterUnlink == nil {
		t.Fatalf("User group quota not found after hardlink deletion")
	}

	dirQs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, parentPath, 0, 0, dirQs, false, false, false); err != nil {
		t.Fatalf("Get directory quota after hardlink deletion: %s", err)
	}
	dirQuotaAfterUnlink := dirQs[parentPath]
	if dirQuotaAfterUnlink == nil {
		t.Fatalf("Directory quota not found after hardlink deletion")
	}

	// After the new strategy, deleting a hardlink does not decrease user/group quota
	// because hardlink only deletes a directory entry, not the actual file
	expectedSpaceDecrease := int64(0)
	expectedInodeDecrease := int64(0)

	actualSpaceDecrease := ugQuotaAfterHardlink.UsedSpace - ugQuotaAfterUnlink.UsedSpace
	actualInodeDecrease := ugQuotaAfterHardlink.UsedInodes - ugQuotaAfterUnlink.UsedInodes

	if actualSpaceDecrease != expectedSpaceDecrease {
		t.Fatalf("UG quota space decrease mismatch: expected %d, got %d", expectedSpaceDecrease, actualSpaceDecrease)
	}
	if actualInodeDecrease != expectedInodeDecrease {
		t.Fatalf("UG quota inode decrease mismatch: expected %d, got %d", expectedInodeDecrease, actualInodeDecrease)
	}

	dirExpectedSpaceDecrease := int64(8192)
	dirExpectedInodeDecrease := int64(1)

	dirActualSpaceDecrease := dirQuotaAfterHardlink.UsedSpace - dirQuotaAfterUnlink.UsedSpace
	dirActualInodeDecrease := dirQuotaAfterHardlink.UsedInodes - dirQuotaAfterUnlink.UsedInodes

	if dirActualSpaceDecrease != dirExpectedSpaceDecrease {
		t.Fatalf("Directory quota space decrease mismatch: expected %d, got %d", dirExpectedSpaceDecrease, dirActualSpaceDecrease)
	}
	if dirActualInodeDecrease != dirExpectedInodeDecrease {
		t.Fatalf("Directory quota inode decrease mismatch: expected %d, got %d", dirExpectedInodeDecrease, dirActualInodeDecrease)
	}

	m.Unlink(ctx, parent, "test_original_file")
	m.HandleQuota(ctx, QuotaDel, "", uid, gid, nil, false, false, false)
	m.HandleQuota(ctx, QuotaDel, parentPath, 0, 0, nil, false, false, false)
}

func testBatchUnlinkWithUserGroupQuota(t *testing.T, m Meta, ctx Context, parent Ino, uid, gid uint32) {
	if err := m.HandleQuota(ctx, QuotaSet, "", uid, 0, map[string]*Quota{fmt.Sprintf("uid:%d", uid): {MaxSpace: 100 << 20, MaxInodes: 100}}, false, false, false); err != nil {
		t.Fatalf("Set user quota: %s", err)
	}
	if err := m.HandleQuota(ctx, QuotaSet, "", 0, gid, map[string]*Quota{fmt.Sprintf("gid:%d", gid): {MaxSpace: 100 << 20, MaxInodes: 100}}, false, false, false); err != nil {
		t.Fatalf("Set group quota: %s", err)
	}
	m.getBase().loadQuotas()

	var fileInodes []Ino
	var fileAttrs []Attr
	fileNames := []string{"batch_file1", "batch_file2", "batch_file3"}
	fileSize := uint64(4096) // 4KB per file

	for _, fileName := range fileNames {
		var inode Ino
		var attr Attr
		if st := m.Create(ctx, parent, fileName, 0644, 0, 0, &inode, &attr); st != 0 {
			t.Fatalf("Create %s: %s", fileName, st)
		}
		if st := m.SetAttr(ctx, inode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for %s: %s", fileName, st)
		}
		var sliceId uint64
		if st := m.NewSlice(ctx, &sliceId); st != 0 {
			t.Fatalf("NewSlice for %s: %s", fileName, st)
		}
		slice := Slice{Id: sliceId, Size: uint32(fileSize), Len: uint32(fileSize)}
		if st := m.Write(ctx, inode, 0, 0, slice, time.Now()); st != 0 {
			t.Fatalf("Write data to %s: %s", fileName, st)
		}
		fileInodes = append(fileInodes, inode)
		fileAttrs = append(fileAttrs, attr)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs := make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota before batch unlink: %s", err)
	}
	ugQuotaBefore := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaBefore == nil {
		t.Fatalf("User group quota not found before batch unlink")
	}

	var entries []*Entry
	for i, fileName := range fileNames {
		var attr Attr
		if st := m.GetAttr(ctx, fileInodes[i], &attr); st != 0 {
			t.Fatalf("GetAttr for %s: %s", fileName, st)
		}
		entries = append(entries, &Entry{
			Inode: fileInodes[i],
			Name:  []byte(fileName),
			Attr:  &attr,
		})
	}

	var count uint64
	if st := m.getBase().BatchUnlink(ctx, parent, entries, &count, false); st != 0 {
		t.Fatalf("BatchUnlink failed: %s", st)
	}

	if count != uint64(len(fileNames)) {
		t.Fatalf("BatchUnlink count mismatch: expected %d, got %d", len(fileNames), count)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after batch unlink: %s", err)
	}
	ugQuotaAfter := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfter == nil {
		t.Fatalf("User group quota not found after batch unlink")
	}

	// After the new strategy, files moved to trash do not decrease user/group quota
	// Only files permanently deleted from trash decrease quota
	expectedInodeDecrease := int64(0)
	actualInodeDecrease := ugQuotaBefore.UsedInodes - ugQuotaAfter.UsedInodes

	if actualInodeDecrease != expectedInodeDecrease {
		t.Fatalf("User group quota inode decrease mismatch: expected %d, got %d", expectedInodeDecrease, actualInodeDecrease)
	}

	expectedSpaceDecrease := int64(0)
	actualSpaceDecrease := ugQuotaBefore.UsedSpace - ugQuotaAfter.UsedSpace

	if actualSpaceDecrease != expectedSpaceDecrease {
		t.Fatalf("User group quota space decrease mismatch: expected %d, got %d", expectedSpaceDecrease, actualSpaceDecrease)
	}

	var originalInode Ino
	var originalAttr Attr
	hardlinkFileSize := uint64(8192) // 8KB
	hardlinkFileName := "hardlink_original"
	if st := m.Create(ctx, parent, hardlinkFileName, 0644, 0, 0, &originalInode, &originalAttr); st != 0 {
		t.Fatalf("Create original file for hardlink test: %s", st)
	}
	if st := m.SetAttr(ctx, originalInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
		t.Fatalf("SetAttr UID and GID for original file: %s", st)
	}
	var sliceId uint64
	if st := m.NewSlice(ctx, &sliceId); st != 0 {
		t.Fatalf("NewSlice for original file: %s", st)
	}
	slice := Slice{Id: sliceId, Size: uint32(hardlinkFileSize), Len: uint32(hardlinkFileSize)}
	if st := m.Write(ctx, originalInode, 0, 0, slice, time.Now()); st != 0 {
		t.Fatalf("Write data to original file: %s", st)
	}

	hardlinkFileName2 := "hardlink_link"
	if st := m.Link(ctx, originalInode, parent, hardlinkFileName2, &originalAttr); st != 0 {
		t.Fatalf("Create hardlink: %s", st)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota before hardlink unlink: %s", err)
	}
	ugQuotaBeforeHardlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaBeforeHardlink == nil {
		t.Fatalf("User group quota not found before hardlink unlink")
	}

	var hardlinkAttr Attr
	var hardlinkInode Ino
	if st := m.Lookup(ctx, parent, hardlinkFileName2, &hardlinkInode, &hardlinkAttr, false); st != 0 {
		t.Fatalf("Lookup hardlink file: %s", st)
	}
	if hardlinkInode != originalInode {
		t.Fatalf("Hardlink inode mismatch: expected %d, got %d", originalInode, hardlinkInode)
	}
	if hardlinkAttr.Nlink < 2 {
		t.Fatalf("Expected Nlink >= 2 for hardlink, got %d", hardlinkAttr.Nlink)
	}

	var hardlinkEntry Attr
	if st := m.GetAttr(ctx, hardlinkInode, &hardlinkEntry); st != 0 {
		t.Fatalf("GetAttr for hardlink: %s", st)
	}
	hardlinkEntries := []*Entry{
		{
			Inode: hardlinkInode,
			Name:  []byte(hardlinkFileName2),
			Attr:  &hardlinkEntry,
		},
	}

	count = 0
	if st := m.getBase().BatchUnlink(ctx, parent, hardlinkEntries, &count, false); st != 0 {
		t.Fatalf("BatchUnlink hardlink failed: %s", st)
	}

	if count != 1 {
		t.Fatalf("BatchUnlink hardlink count mismatch: expected 1, got %d", count)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after hardlink unlink: %s", err)
	}
	ugQuotaAfterHardlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfterHardlink == nil {
		t.Fatalf("User group quota not found after hardlink unlink")
	}

	// After the new strategy, hardlinks moved to trash do not decrease user/group quota
	expectedHardlinkInodeDecrease := int64(0)
	expectedHardlinkSpaceDecrease := int64(0)

	actualHardlinkInodeDecrease := ugQuotaBeforeHardlink.UsedInodes - ugQuotaAfterHardlink.UsedInodes
	actualHardlinkSpaceDecrease := ugQuotaBeforeHardlink.UsedSpace - ugQuotaAfterHardlink.UsedSpace

	if actualHardlinkInodeDecrease != expectedHardlinkInodeDecrease {
		t.Fatalf("Hardlink unlink: user group quota inode decrease mismatch: expected %d, got %d", expectedHardlinkInodeDecrease, actualHardlinkInodeDecrease)
	}
	if actualHardlinkSpaceDecrease != expectedHardlinkSpaceDecrease {
		t.Fatalf("Hardlink unlink: user group quota space decrease mismatch: expected %d, got %d (should be 0 for hardlink deletion)", expectedHardlinkSpaceDecrease, actualHardlinkSpaceDecrease)
	}

	var checkAttr Attr
	if st := m.GetAttr(ctx, originalInode, &checkAttr); st != 0 {
		t.Fatalf("Original file should still exist after hardlink deletion: %s", st)
	}
	if checkAttr.Nlink != hardlinkAttr.Nlink-1 {
		t.Fatalf("Original file Nlink should decrease by 1: expected %d, got %d", hardlinkAttr.Nlink-1, checkAttr.Nlink)
	}

	m.Unlink(ctx, parent, hardlinkFileName)

	// Test: Batch unlink multiple hardlinks pointing to the same inode in one call
	var multiHardlinkOriginal Ino
	var multiHardlinkOriginalAttr Attr
	multiHardlinkFileSize := uint64(12288) // 12KB
	multiHardlinkOriginalName := "multi_hardlink_original"
	if st := m.Create(ctx, parent, multiHardlinkOriginalName, 0644, 0, 0, &multiHardlinkOriginal, &multiHardlinkOriginalAttr); st != 0 {
		t.Fatalf("Create original file for multi-hardlink test: %s", st)
	}
	if st := m.SetAttr(ctx, multiHardlinkOriginal, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
		t.Fatalf("SetAttr UID and GID for multi-hardlink original file: %s", st)
	}
	var multiHardlinkSliceId uint64
	if st := m.NewSlice(ctx, &multiHardlinkSliceId); st != 0 {
		t.Fatalf("NewSlice for multi-hardlink original file: %s", st)
	}
	multiHardlinkSlice := Slice{Id: multiHardlinkSliceId, Size: uint32(multiHardlinkFileSize), Len: uint32(multiHardlinkFileSize)}
	if st := m.Write(ctx, multiHardlinkOriginal, 0, 0, multiHardlinkSlice, time.Now()); st != 0 {
		t.Fatalf("Write data to multi-hardlink original file: %s", st)
	}

	hardlinkNames := []string{"multi_hardlink1", "multi_hardlink2", "multi_hardlink3"}
	for _, linkName := range hardlinkNames {
		if st := m.Link(ctx, multiHardlinkOriginal, parent, linkName, &multiHardlinkOriginalAttr); st != 0 {
			t.Fatalf("Create hardlink %s: %s", linkName, st)
		}
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota before multi-hardlink batch unlink: %s", err)
	}
	ugQuotaBeforeMultiHardlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaBeforeMultiHardlink == nil {
		t.Fatalf("User group quota not found before multi-hardlink batch unlink")
	}

	var initialAttr Attr
	if st := m.GetAttr(ctx, multiHardlinkOriginal, &initialAttr); st != 0 {
		t.Fatalf("GetAttr for multi-hardlink original file: %s", st)
	}
	initialNlink := initialAttr.Nlink
	expectedFinalNlink := initialNlink - uint32(len(hardlinkNames))
	if initialNlink < uint32(len(hardlinkNames)+1) {
		t.Fatalf("Expected Nlink >= %d, got %d", len(hardlinkNames)+1, initialNlink)
	}

	var multiHardlinkEntries []*Entry
	for _, linkName := range hardlinkNames {
		var linkAttr Attr
		if st := m.GetAttr(ctx, multiHardlinkOriginal, &linkAttr); st != 0 {
			t.Fatalf("GetAttr for hardlink %s: %s", linkName, st)
		}
		multiHardlinkEntries = append(multiHardlinkEntries, &Entry{
			Inode: multiHardlinkOriginal,
			Name:  []byte(linkName),
			Attr:  &linkAttr,
		})
	}

	count = 0
	if st := m.getBase().BatchUnlink(ctx, parent, multiHardlinkEntries, &count, false); st != 0 {
		t.Fatalf("BatchUnlink multiple hardlinks failed: %s", st)
	}

	if count != uint64(len(hardlinkNames)) {
		t.Fatalf("BatchUnlink multiple hardlinks count mismatch: expected %d, got %d", len(hardlinkNames), count)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after multi-hardlink batch unlink: %s", err)
	}
	ugQuotaAfterMultiHardlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfterMultiHardlink == nil {
		t.Fatalf("User group quota not found after multi-hardlink batch unlink")
	}

	// After the new strategy, hardlinks moved to trash do not decrease user/group quota
	expectedMultiHardlinkInodeDecrease := int64(0)
	expectedMultiHardlinkSpaceDecrease := int64(0)

	actualMultiHardlinkInodeDecrease := ugQuotaBeforeMultiHardlink.UsedInodes - ugQuotaAfterMultiHardlink.UsedInodes
	actualMultiHardlinkSpaceDecrease := ugQuotaBeforeMultiHardlink.UsedSpace - ugQuotaAfterMultiHardlink.UsedSpace

	if actualMultiHardlinkInodeDecrease != expectedMultiHardlinkInodeDecrease {
		t.Fatalf("Multi-hardlink batch unlink: user group quota inode decrease mismatch: expected %d, got %d", expectedMultiHardlinkInodeDecrease, actualMultiHardlinkInodeDecrease)
	}
	if actualMultiHardlinkSpaceDecrease != expectedMultiHardlinkSpaceDecrease {
		t.Fatalf("Multi-hardlink batch unlink: user group quota space decrease mismatch: expected %d, got %d (should be 0 for hardlink deletion)", expectedMultiHardlinkSpaceDecrease, actualMultiHardlinkSpaceDecrease)
	}

	var finalAttr Attr
	if st := m.GetAttr(ctx, multiHardlinkOriginal, &finalAttr); st != 0 {
		t.Fatalf("Original file should still exist after multi-hardlink deletion: %s", st)
	}
	if finalAttr.Nlink != expectedFinalNlink {
		t.Fatalf("Original file Nlink mismatch: expected %d, got %d (initial was %d, deleted %d links)", expectedFinalNlink, finalAttr.Nlink, initialNlink, len(hardlinkNames))
	}

	for _, linkName := range hardlinkNames {
		var lookupInode Ino
		var lookupAttr Attr
		if st := m.Lookup(ctx, parent, linkName, &lookupInode, &lookupAttr, false); st == 0 {
			t.Fatalf("Hardlink %s should have been deleted, but still exists", linkName)
		}
	}

	var originalLookupInode Ino
	var originalLookupAttr Attr
	if st := m.Lookup(ctx, parent, multiHardlinkOriginalName, &originalLookupInode, &originalLookupAttr, false); st != 0 {
		t.Fatalf("Original file %s should still exist: %s", multiHardlinkOriginalName, st)
	}
	if originalLookupInode != multiHardlinkOriginal {
		t.Fatalf("Original file inode mismatch: expected %d, got %d", multiHardlinkOriginal, originalLookupInode)
	}

	m.Unlink(ctx, parent, multiHardlinkOriginalName)

	// Test: Batch unlink symlinks
	symlinkNames := []string{"symlink1", "symlink2", "symlink3"}
	var symlinkInodes []Ino
	var symlinkAttrs []Attr
	for _, symlinkName := range symlinkNames {
		var symlinkInode Ino
		var symlinkAttr Attr
		target := "/target/" + symlinkName
		if st := m.Symlink(ctx, parent, symlinkName, target, &symlinkInode, &symlinkAttr); st != 0 {
			t.Fatalf("Create symlink %s: %s", symlinkName, st)
		}
		if st := m.SetAttr(ctx, symlinkInode, SetAttrUID|SetAttrGID, 0, &Attr{Uid: uid, Gid: gid}); st != 0 {
			t.Fatalf("SetAttr UID and GID for symlink %s: %s", symlinkName, st)
		}
		symlinkInodes = append(symlinkInodes, symlinkInode)
		symlinkAttrs = append(symlinkAttrs, symlinkAttr)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota before symlink batch unlink: %s", err)
	}
	ugQuotaBeforeSymlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaBeforeSymlink == nil {
		t.Fatalf("User group quota not found before symlink batch unlink")
	}

	var symlinkEntries []*Entry
	for i, symlinkName := range symlinkNames {
		var symlinkAttr Attr
		if st := m.GetAttr(ctx, symlinkInodes[i], &symlinkAttr); st != 0 {
			t.Fatalf("GetAttr for symlink %s: %s", symlinkName, st)
		}
		symlinkEntries = append(symlinkEntries, &Entry{
			Inode: symlinkInodes[i],
			Name:  []byte(symlinkName),
			Attr:  &symlinkAttr,
		})
	}

	count = 0
	if st := m.getBase().BatchUnlink(ctx, parent, symlinkEntries, &count, false); st != 0 {
		t.Fatalf("BatchUnlink symlinks failed: %s", st)
	}

	if count != uint64(len(symlinkNames)) {
		t.Fatalf("BatchUnlink symlinks count mismatch: expected %d, got %d", len(symlinkNames), count)
	}

	m.getBase().doFlushQuotas()
	time.Sleep(200 * time.Millisecond)

	qs = make(map[string]*Quota)
	if err := m.HandleQuota(ctx, QuotaGet, "", uid, gid, qs, false, false, false); err != nil {
		t.Fatalf("Get user group quota after symlink batch unlink: %s", err)
	}
	ugQuotaAfterSymlink := qs[fmt.Sprintf("uid:%d", uid)]
	if ugQuotaAfterSymlink == nil {
		t.Fatalf("User group quota not found after symlink batch unlink")
	}

	expectedSymlinkInodeDecrease := int64(3)
	expectedSymlinkSpaceDecrease := 3 * align4K(0)

	actualSymlinkInodeDecrease := ugQuotaBeforeSymlink.UsedInodes - ugQuotaAfterSymlink.UsedInodes
	actualSymlinkSpaceDecrease := ugQuotaBeforeSymlink.UsedSpace - ugQuotaAfterSymlink.UsedSpace

	if actualSymlinkInodeDecrease != expectedSymlinkInodeDecrease {
		t.Fatalf("Symlink batch unlink: user group quota inode decrease mismatch: expected %d, got %d", expectedSymlinkInodeDecrease, actualSymlinkInodeDecrease)
	}
	if actualSymlinkSpaceDecrease != expectedSymlinkSpaceDecrease {
		t.Fatalf("Symlink batch unlink: user group quota space decrease mismatch: expected %d, got %d (should be %d for symlink deletion)", expectedSymlinkSpaceDecrease, actualSymlinkSpaceDecrease, expectedSymlinkSpaceDecrease)
	}

	for _, symlinkName := range symlinkNames {
		var lookupInode Ino
		var lookupAttr Attr
		if st := m.Lookup(ctx, parent, symlinkName, &lookupInode, &lookupAttr, false); st == 0 {
			t.Fatalf("Symlink %s should have been deleted, but still exists", symlinkName)
		}
	}

	if err := m.HandleQuota(ctx, QuotaDel, "", uid, gid, nil, false, false, false); err != nil {
		t.Fatalf("Delete user group quota: %s", err)
	}
}


================================================
FILE: pkg/meta/benchmarks_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"fmt"
	"syscall"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/sirupsen/logrus"
)

const (
	redisAddr = "redis://127.0.0.1/1"
	sqlAddr   = "sqlite3://juicefs.db"
	// sqlAddr = "mysql://root:@/juicefs" // MySQL
	// sqlAddr = "mysql://root:@tcp(127.0.0.1:4000)/juicefs" // TiDB
	tkvAddr = "badger://test_db"
	// tkvAddr = "tikv://127.0.0.1:2379/juicefs"
)

func init() {
	utils.SetLogLevel(logrus.InfoLevel)
	// utils.SetOutFile("bench-test.log")
}

func encodeSlices(size int) []string {
	w := utils.NewBuffer(24)
	w.Put32(0)
	w.Put64(1014)
	w.Put32(122)
	w.Put32(0)
	w.Put32(122)
	v := string(w.Bytes())
	vals := make([]string, size)
	for i := range vals {
		vals[i] = v
	}
	return vals
}

func encodeSlicesAsBuf(nSlices uint32) []byte {
	w := utils.NewBuffer(nSlices * sliceBytes)
	for i := uint32(0); i < nSlices; i++ {
		w.Put32(0)
		w.Put64(1014)
		w.Put32(122)
		w.Put32(0)
		w.Put32(122)
	}
	return w.Bytes()
}

func BenchmarkReadSlices(b *testing.B) {
	cases := []struct {
		desc string
		size int
	}{
		{"small", 4},
		{"mid", 64},
		{"large", 1024},
	}
	for _, c := range cases {
		b.Run(c.desc, func(b *testing.B) {
			vals := encodeSlices(c.size)
			b.ResetTimer()
			var slices []*slice
			for i := 0; i < b.N; i++ {
				slices = readSlices(vals)
			}
			if len(slices) != len(vals) {
				b.Fail()
			}
		})
	}
}

func BenchmarkReadSliceBuf(b *testing.B) {
	cases := []struct {
		desc string
		size uint32
	}{
		{"small", 4},
		{"mid", 64},
		{"large", 1024},
	}
	for _, c := range cases {
		b.Run(c.desc, func(b *testing.B) {
			buf := encodeSlicesAsBuf(c.size)
			b.ResetTimer()
			var slices []*slice
			for i := 0; i < b.N; i++ {
				slices = readSliceBuf(buf)
			}
			if len(slices) != int(c.size) {
				b.Fail()
			}
		})
	}
}

func prepareParent(m Meta, name string, inode *Ino) error {
	ctx := Background()
	if err := m.Remove(ctx, 1, name, true, RmrDefaultThreads, nil); err != 0 && err != syscall.ENOENT {
		return fmt.Errorf("remove: %s", err)
	}
	if err := m.Mkdir(ctx, 1, name, 0755, 0, 0, inode, nil); err != 0 {
		return fmt.Errorf("mkdir: %s", err)
	}
	return nil
}

func benchMkdir(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchMkdir", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Mkdir(ctx, parent, fmt.Sprintf("d%d", i), 0755, 0, 0, &inode, nil); err != 0 {
			b.Fatalf("mkdir: %s", err)
		}
	}
}

func benchMvdir(b *testing.B, m Meta) { // rename dir
	var parent, inode Ino
	if err := prepareParent(m, "benchMvdir", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Mkdir(ctx, parent, "d0", 0755, 0, 0, &inode, nil); err != 0 {
		b.Fatalf("mkdir: %s", err)
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Rename(ctx, parent, fmt.Sprintf("d%d", i), parent, fmt.Sprintf("d%d", i+1), 0, nil, nil); err != 0 {
			b.Fatalf("rename dir: %s", err)
		}
	}
}

func benchRmdir(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchRmdir", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		b.StopTimer()
		if err := m.Mkdir(ctx, parent, "dir", 0755, 0, 0, &inode, nil); err != 0 {
			b.Fatalf("mkdir: %s", err)
		}
		b.StartTimer()
		if err := m.Rmdir(ctx, parent, "dir"); err != 0 {
			b.Fatalf("rmdir: %s", err)
		}
	}
}

func benchResolve(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchResolve", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	var child Ino = parent
	for i := 0; i < 5; i++ {
		if err := m.Mkdir(ctx, child, "d", 0755, 0, 0, &child, nil); err != 0 {
			b.Fatalf("mkdir: %s", err)
		}
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Resolve(ctx, parent, "d/d/d/d/d", nil, nil); err != 0 {
			if err == syscall.ENOTSUP {
				b.SkipNow()
				return
			}
			b.Fatalf("resolve: %s", err)
		}
	}
}

func benchReaddir(b *testing.B, m Meta, n int) {
	var parent Ino
	if err := prepareParent(m, "benchReaddir", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	for j := 0; j < n; j++ {
		if err := m.Create(ctx, parent, fmt.Sprintf("f%d", j), 0644, 022, 0, nil, nil); err != 0 {
			b.Fatalf("create: %s", err)
		}
	}
	var entries []*Entry
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		entries = entries[:0]
		if err := m.Readdir(ctx, parent, 1, &entries); err != 0 {
			b.Fatalf("readdir: %s", err)
		}
		if len(entries) != n+2 {
			b.Fatalf("files: %d != %d", len(entries), n+2)
		}
	}
}

func benchMknod(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchMknod", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Mknod(ctx, parent, fmt.Sprintf("f%d", i), TypeFile, 0644, 022, 0, "", nil, nil); err != 0 {
			b.Fatalf("mknod: %s", err)
		}
	}
}

func benchCreate(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchCreate", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Create(ctx, parent, fmt.Sprintf("f%d", i), 0644, 022, 0, nil, nil); err != 0 {
			b.Fatalf("create: %s", err)
		}
	}
}

func benchRename(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchRename", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "f0", 0644, 022, 0, nil, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Rename(ctx, parent, fmt.Sprintf("f%d", i), parent, fmt.Sprintf("f%d", i+1), 0, nil, nil); err != 0 {
			b.Fatalf("rename file: %s", err)
		}
	}
}

func benchUnlink(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchUnlink", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		b.StopTimer()
		if err := m.Create(ctx, parent, "file", 0644, 022, 0, nil, nil); err != 0 {
			b.Fatalf("create: %s", err)
		}
		b.StartTimer()
		if err := m.Unlink(ctx, parent, "file"); err != 0 {
			b.Fatalf("unlink: %s", err)
		}
	}
}

func benchLookup(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchLookup", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "file", 0644, 022, 0, nil, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	var inode Ino
	var attr Attr
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Lookup(ctx, parent, "file", &inode, &attr, false); err != 0 {
			b.Fatalf("lookup: %s", err)
		}
	}
}

func benchGetAttr(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchGetAttr", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	var attr Attr
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.GetAttr(ctx, inode, &attr); err != 0 {
			b.Fatalf("getattr: %s", err)
		}
	}
}

func benchSetAttr(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchSetAttr", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	var attr = Attr{Mode: 0644}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		attr.Mode ^= 1
		if err := m.SetAttr(ctx, inode, SetAttrMode, 0, &attr); err != 0 {
			b.Fatalf("setattr: %s", err)
		}
	}
}

func benchAccess(b *testing.B, m Meta) { // contains a Getattr
	var parent, inode Ino
	if err := prepareParent(m, "benchAccess", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	myCtx := NewContext(100, 1, []uint32{1})
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Access(myCtx, inode, 4, nil); err != 0 && err != syscall.EACCES {
			b.Fatalf("access: %s", err)
		}
	}
}

func benchSetXattr(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchSetXattr", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	b.ResetTimer()
	value := []byte("value0")
	for i := 0; i < b.N; i++ {
		value[5] = byte(i%10 + 48)
		if err := m.SetXattr(ctx, inode, "key", value, 0); err != 0 {
			b.Fatalf("setxattr: %s", err)
		}
	}
}

func benchGetXattr(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchGetXattr", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	if err := m.SetXattr(ctx, inode, "key", []byte("value"), 0); err != 0 {
		b.Fatalf("setxattr: %s", err)
	}
	var buf []byte
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.GetXattr(ctx, inode, "key", &buf); err != 0 {
			b.Fatalf("getxattr: %s", err)
		}
	}
}

func benchRemoveXattr(b *testing.B, m Meta) {
	var parent, inode Ino
	if err := prepareParent(m, "benchRemoveXattr", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		b.StopTimer()
		if err := m.SetXattr(ctx, inode, "key", []byte("value"), 0); err != 0 {
			b.Fatalf("setxattr: %s", err)
		}
		b.StartTimer()
		if err := m.RemoveXattr(ctx, inode, "key"); err != 0 {
			b.Fatalf("removexattr: %s", err)
		}
	}
}

func benchListXattr(b *testing.B, m Meta, n int) {
	var parent, inode Ino
	if err := prepareParent(m, "benchListXattr", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	if err := m.Create(ctx, parent, "fxattr", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	for j := 0; j < n; j++ {
		if err := m.SetXattr(ctx, inode, fmt.Sprintf("key%d", j), []byte("value"), 0); err != 0 {
			b.Fatalf("setxattr: %s", err)
		}
	}
	var buf []byte
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.ListXattr(ctx, inode, &buf); err != 0 {
			b.Fatalf("removexattr: %s", err)
		}
	}
}

func benchLink(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchLink", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	var inode Ino
	if err := m.Create(ctx, parent, "source", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Link(ctx, inode, parent, fmt.Sprintf("l%d", i), nil); err != 0 {
			b.Fatalf("link: %s", err)
		}
	}
}

func benchSymlink(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchSymlink", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	var inode Ino
	if err := m.Create(ctx, parent, "source", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Symlink(ctx, parent, fmt.Sprintf("s%d", i), "/benchSymlink/source", nil, nil); err != 0 {
			b.Fatalf("symlink: %s", err)
		}
	}
}

/*
func benchReadlink(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchReadlink", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	var inode Ino
	if err := m.Create(ctx, parent, "source", 0644, 022, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	if err := m.Symlink(ctx, parent, "slink", "/benchReadlink/source", &inode, nil); err != 0 {
		b.Fatalf("symlink: %s", err)
	}
	var buf []byte
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.ReadLink(ctx, inode, &buf); err != 0 {
			b.Fatalf("readlink: %s", err)
		}
	}
}
*/

func benchNewChunk(b *testing.B, m Meta) {
	ctx := Background()
	var sliceId uint64
	for i := 0; i < b.N; i++ {
		if err := m.NewSlice(ctx, &sliceId); err != 0 {
			b.Fatalf("newchunk: %s", err)
		}
	}
}

func benchWrite(b *testing.B, m Meta) {
	var parent Ino
	if err := prepareParent(m, "benchWrite", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	var inode Ino
	if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	var (
		sliceId uint64
		offset  uint32
		step    uint32 = 1024
	)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.NewSlice(ctx, &sliceId); err != 0 {
			b.Fatalf("newchunk: %s", err)
		}
		if err := m.Write(ctx, inode, 0, offset, Slice{Id: sliceId, Size: step, Len: step}, time.Now()); err != 0 {
			b.Fatalf("write: %s", err)
		}
		offset += step
		if offset+step > ChunkSize {
			offset = 0
		}
	}
}

func benchRead(b *testing.B, m Meta, n int) {
	var parent Ino
	if err := prepareParent(m, "benchRead", &parent); err != nil {
		b.Fatal(err)
	}
	ctx := Background()
	var inode Ino
	if err := m.Create(ctx, parent, "file", 0644, 022, 0, &inode, nil); err != 0 {
		b.Fatalf("create: %s", err)
	}
	var sliceId uint64
	var step uint32 = 1024
	for j := 0; j < n; j++ {
		if err := m.NewSlice(ctx, &sliceId); err != 0 {
			b.Fatalf("newchunk: %s", err)
		}
		if err := m.Write(ctx, inode, 0, uint32(j)*step, Slice{Id: sliceId, Size: step, Len: step}, time.Now()); err != 0 {
			b.Fatalf("write: %s", err)
		}
	}
	var slices []Slice
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		if err := m.Read(ctx, inode, 0, &slices); err != 0 {
			b.Fatalf("read: %s", err)
		}
	}
}

func benchmarkDir(b *testing.B, m Meta) { // mkdir, rename dir, rmdir, readdir
	b.Run("mkdir", func(b *testing.B) { benchMkdir(b, m) })
	b.Run("mvdir", func(b *testing.B) { benchMvdir(b, m) })
	b.Run("rmdir", func(b *testing.B) { benchRmdir(b, m) })
	b.Run("resolve", func(b *testing.B) { benchResolve(b, m) })
	b.Run("readdir_10", func(b *testing.B) { benchReaddir(b, m, 10) })
	b.Run("readdir_1k", func(b *testing.B) { benchReaddir(b, m, 1000) })
	// b.Run("readdir_100k", func(b *testing.B) { benchReaddir(b, m, 100000) })
}

func benchmarkFile(b *testing.B, m Meta) {
	b.Run("mknod", func(b *testing.B) { benchMknod(b, m) })
	b.Run("create", func(b *testing.B) { benchCreate(b, m) })
	b.Run("rename", func(b *testing.B) { benchRename(b, m) })
	b.Run("unlink", func(b *testing.B) { benchUnlink(b, m) })
	b.Run("lookup", func(b *testing.B) { benchLookup(b, m) })
	b.Run("getattr", func(b *testing.B) { benchGetAttr(b, m) })
	b.Run("setattr", func(b *testing.B) { benchSetAttr(b, m) })
	b.Run("access", func(b *testing.B) { benchAccess(b, m) })
}

func benchmarkXattr(b *testing.B, m Meta) {
	b.Run("setxattr", func(b *testing.B) { benchSetXattr(b, m) })
	b.Run("getxattr", func(b *testing.B) { benchGetXattr(b, m) })
	b.Run("removexattr", func(b *testing.B) { benchRemoveXattr(b, m) })
	b.Run("listxattr_1", func(b *testing.B) { benchListXattr(b, m, 1) })
	b.Run("listxattr_10", func(b *testing.B) { benchListXattr(b, m, 10) })
}

func benchmarkLink(b *testing.B, m Meta) {
	b.Run("link", func(b *testing.B) { benchLink(b, m) })
	b.Run("symlink", func(b *testing.B) { benchSymlink(b, m) })
	// maybe meaningless since symlink would be cached
	// b.Run("readlink", func(b *testing.B) { benchReadlink(b, m) })
}

func benchmarkData(b *testing.B, m Meta) {
	m.OnMsg(DeleteSlice, func(args ...interface{}) error { return nil })
	m.OnMsg(CompactChunk, func(args ...interface{}) error { return nil })
	b.Run("newchunk", func(b *testing.B) { benchNewChunk(b, m) })
	b.Run("write", func(b *testing.B) { benchWrite(b, m) })
	b.Run("read_1", func(b *testing.B) { benchRead(b, m, 1) })
	b.Run("read_10", func(b *testing.B) { benchRead(b, m, 10) })
}

func benchmarkAll(b *testing.B, m Meta) {
	_ = m.Init(&Format{Name: "benchmarkAll", DirStats: true}, true)
	_ = m.NewSession(false)
	benchmarkDir(b, m)
	benchmarkFile(b, m)
	benchmarkXattr(b, m)
	benchmarkLink(b, m)
	benchmarkData(b, m)
}

func BenchmarkRedis(b *testing.B) {
	m := NewClient(redisAddr, nil)
	benchmarkAll(b, m)
}

func BenchmarkSQL(b *testing.B) {
	m := NewClient(sqlAddr, nil)
	benchmarkAll(b, m)
}

func BenchmarkTKV(b *testing.B) {
	m := NewClient(tkvAddr, nil)
	benchmarkAll(b, m)
}


================================================
FILE: pkg/meta/config.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"crypto/aes"
	"crypto/cipher"
	"crypto/md5"
	"crypto/rand"
	"encoding/base64"
	"encoding/json"
	"fmt"
	"io"
	"time"

	"github.com/emmansun/gmsm/sm3"
	"github.com/emmansun/gmsm/sm4"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/pkg/errors"
)

// Config for clients.
type Config struct {
	Retries            int
	MaxDeletes         int
	SkipDirNlink       int
	CaseInsensi        bool
	ReadOnly           bool
	NoBGJob            bool // disable background jobs like clean-up, backup, etc.
	OpenCache          time.Duration
	OpenCacheLimit     uint64 // max number of files to cache (soft limit)
	Heartbeat          time.Duration
	MountPoint         string
	Subdir             string
	AtimeMode          string
	DirStatFlushPeriod time.Duration
	SkipDirMtime       time.Duration
	Sid                uint64
	SortDir            bool
	FastStatfs         bool
	NetworkInterfaces  []string // list of network interfaces to use for IP discovery (empty means all)
}

func DefaultConf() *Config {
	return &Config{Retries: 10, MaxDeletes: 2, Heartbeat: 12 * time.Second, AtimeMode: NoAtime, DirStatFlushPeriod: 1 * time.Second}
}

func (c *Config) SelfCheck() {
	if c.MaxDeletes == 0 {
		logger.Warnf("Deleting object will be disabled since max-deletes is 0")
	}
	if c.Heartbeat != 0 && c.Heartbeat < time.Second {
		logger.Warnf("heartbeat should not be less than 1 second")
		c.Heartbeat = time.Second
	}
	if c.Heartbeat > time.Minute*10 {
		logger.Warnf("heartbeat should not be greater than 10 minutes")
		c.Heartbeat = time.Minute * 10
	}
}

type Format struct {
	Name             string
	UUID             string
	Storage          string
	StorageClass     string `json:",omitempty"`
	Bucket           string
	AccessKey        string `json:",omitempty"`
	SecretKey        string `json:",omitempty"`
	SessionToken     string `json:",omitempty"`
	BlockSize        int
	Compression      string `json:",omitempty"`
	Shards           int    `json:",omitempty"`
	HashPrefix       bool   `json:",omitempty"`
	Capacity         uint64 `json:",omitempty"`
	Inodes           uint64 `json:",omitempty"`
	EncryptKey       string `json:",omitempty"`
	EncryptAlgo      string `json:",omitempty"`
	KeyEncrypted     bool   `json:",omitempty"`
	UploadLimit      int64  `json:",omitempty"` // Mbps
	DownloadLimit    int64  `json:",omitempty"` // Mbps
	TrashDays        int
	MetaVersion      int    `json:",omitempty"`
	MinClientVersion string `json:",omitempty"`
	MaxClientVersion string `json:",omitempty"`
	DirStats         bool   `json:",omitempty"`
	UserGroupQuota   bool   `json:",omitempty"`
	EnableACL        bool
	RangerRestUrl    string `json:",omitempty"`
	RangerService    string `json:",omitempty"`

	//kerberos
	KerbConf string `json:",omitempty"`
}

func (f *Format) update(old *Format, force bool) error {
	if force {
		logger.Warnf("Existing volume will be overwrited: %s", old)
	} else {
		var args []interface{}
		switch {
		case f.Name != old.Name:
			args = []interface{}{"name", old.Name, f.Name}
		case f.BlockSize != old.BlockSize:
			args = []interface{}{"block size", old.BlockSize, f.BlockSize}
		case f.Compression != old.Compression:
			args = []interface{}{"compression", old.Compression, f.Compression}
		case f.Shards != old.Shards:
			args = []interface{}{"shards", old.Shards, f.Shards}
		case f.HashPrefix != old.HashPrefix:
			args = []interface{}{"hash prefix", old.HashPrefix, f.HashPrefix}
		case f.MetaVersion != old.MetaVersion:
			args = []interface{}{"meta version", old.MetaVersion, f.MetaVersion}
		}
		if args == nil {
			if f.UUID != old.UUID {
				if err := f.Decrypt(); err != nil {
					return fmt.Errorf("decrypt format: %s", err)
				}
				f.UUID = old.UUID // UUID cannot be changed alone
				if err := f.Encrypt(); err != nil {
					return fmt.Errorf("encrypt format: %s", err)
				}
			}
		} else {
			return fmt.Errorf("cannot update volume %s from %v to %v", args...)
		}
	}
	return nil
}

func (f *Format) RemoveSecret() {
	if f.SecretKey != "" {
		f.SecretKey = "removed"
	}
	if f.SessionToken != "" {
		f.SessionToken = "removed"
	}
	if f.EncryptKey != "" {
		f.EncryptKey = "removed"
	}
}

func (f *Format) String() string {
	t := *f
	t.RemoveSecret()
	s, _ := json.MarshalIndent(t, "", "  ")
	return string(s)
}

func (f *Format) CheckVersion() error {
	if f.MetaVersion > MaxVersion {
		return fmt.Errorf("incompatible metadata version: %d; please upgrade the client", f.MetaVersion)
	}

	ver := version.GetVersion()
	return f.CheckCliVersion(&ver)
}

func (f *Format) CheckCliVersion(ver *version.Semver) error {
	if ver == nil {
		return errors.New("version is nil")
	}

	if f.MinClientVersion != "" {
		minClientVer := version.Parse(f.MinClientVersion)
		r, err := version.CompareVersions(ver, minClientVer)
		if err == nil && r < 0 {
			err = fmt.Errorf("allowed minimum version: %s; please upgrade the client", f.MinClientVersion)
		}
		if err != nil {
			return err
		}
	}
	if f.MaxClientVersion != "" {
		maxClientVer := version.Parse(f.MaxClientVersion)
		r, err := version.CompareVersions(ver, maxClientVer)
		if err == nil && r > 0 {
			err = fmt.Errorf("allowed maximum version: %s; please use an older client", f.MaxClientVersion)
		}
		if err != nil {
			return err
		}
	}
	return nil
}

func newCipher(algo string, key string) (cipher.AEAD, error) {
	switch algo {
	case object.SM4GCM:
		block, err := sm4.NewCipher(sm3.Kdf([]byte(key), 16))
		if err != nil {
			return nil, fmt.Errorf("new sm4 cipher: %s", err)
		}
		aead, err := cipher.NewGCM(block)
		if err != nil {
			return nil, fmt.Errorf("new sm4 GCM: %s", err)
		}
		return aead, nil
	default:
		hashKey := md5.Sum([]byte(key))
		block, err := aes.NewCipher(hashKey[:])
		if err != nil {
			return nil, fmt.Errorf("new cipher: %s", err)
		}
		aead, err := cipher.NewGCM(block)
		if err != nil {
			return nil, fmt.Errorf("new GCM: %s", err)
		}
		return aead, nil
	}
}

func (f *Format) Encrypt() error {
	if f.KeyEncrypted || f.SecretKey == "" && f.EncryptKey == "" && f.SessionToken == "" {
		return nil
	}
	ci, err := newCipher(f.EncryptAlgo, f.UUID)
	if err != nil {
		return err
	}
	encrypt := func(k *string) {
		if *k == "" {
			return
		}
		nonce := make([]byte, ci.NonceSize())
		if _, err = io.ReadFull(rand.Reader, nonce); err != nil {
			logger.Fatalf("generate nonce for secret key: %s", err)
		}
		ciphertext := ci.Seal(nil, nonce, []byte(*k), nil)
		buf := make([]byte, ci.NonceSize()+len(ciphertext))
		copy(buf, nonce)
		copy(buf[ci.NonceSize():], ciphertext)
		*k = base64.StdEncoding.EncodeToString(buf)
	}

	encrypt(&f.SecretKey)
	encrypt(&f.SessionToken)
	encrypt(&f.EncryptKey)
	f.KeyEncrypted = true
	return nil
}

func (f *Format) Decrypt() error {
	if !f.KeyEncrypted {
		return nil
	}

	ci, err := newCipher(f.EncryptAlgo, f.UUID)
	if err != nil {
		return err
	}
	decrypt := func(k *string) {
		if *k == "" {
			return
		}
		if *k == "removed" {
			err = fmt.Errorf("secret was removed; please correct it with `config` command")
			return
		}
		buf, e := base64.StdEncoding.DecodeString(*k)
		if e != nil {
			err = fmt.Errorf("decode key: %s", e)
			return
		}
		plaintext, e := ci.Open(nil, buf[:ci.NonceSize()], buf[ci.NonceSize():], nil)
		if e != nil {
			err = fmt.Errorf("open cipher: %s", e)
			return
		}
		*k = string(plaintext)
	}

	decrypt(&f.EncryptKey)
	decrypt(&f.SecretKey)
	decrypt(&f.SessionToken)
	f.KeyEncrypted = false
	return err
}


================================================
FILE: pkg/meta/config_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"strings"
	"testing"

	"github.com/juicedata/juicefs/pkg/object"
	"github.com/stretchr/testify/assert"
)

func TestRemoveSecret(t *testing.T) {
	format := Format{Name: "test", SecretKey: "testSecret", EncryptKey: "testEncrypt", SessionToken: "token"}
	if err := format.Encrypt(); err != nil {
		t.Fatal(err)
	}

	format.RemoveSecret()
	if format.SecretKey != "removed" || format.EncryptKey != "removed" || format.SessionToken != "removed" {
		t.Fatalf("invalid format: %+v", format)
	}

	if err := format.Decrypt(); err != nil && !strings.Contains(err.Error(), "secret was removed") {
		t.Fatal(err)
	}
}

func TestEncrypt(t *testing.T) {
	cases := []struct {
		algo string
	}{
		{object.AES256GCM_RSA},
		{object.CHACHA20_RSA},
		{object.SM4GCM},
	}
	format := Format{Name: "test", SecretKey: "testSecret", SessionToken: "token", EncryptKey: "testEncrypt"}
	for _, c := range cases {
		format.EncryptAlgo = c.algo
		t.Run(c.algo, func(t *testing.T) {
			if err := format.Encrypt(); err != nil {
				t.Fatalf("Format encrypt: %s", err)
			}
			if format.SecretKey == "testSecret" || format.SessionToken == "token" || format.EncryptKey == "testEncrypt" {
				t.Fatalf("invalid format: %+v", format)
			}
			if err := format.Decrypt(); err != nil {
				t.Fatalf("Format decrypt: %s", err)
			}
			if format.SecretKey != "testSecret" || format.SessionToken != "token" || format.EncryptKey != "testEncrypt" {
				t.Fatalf("invalid format: %+v", format)
			}
		})
	}
}

func TestFormat_Update_KeyConflict(t *testing.T) {
	oldFormat := Format{Name: "test", UUID: "UUID-A"}

	newFormat := Format{Name: "test", UUID: "UUID-B", SecretKey: "secret"}
	if err := newFormat.Encrypt(); err != nil {
		t.Fatal(err)
	}
	assert.True(t, newFormat.KeyEncrypted)

	if err := newFormat.update(&oldFormat, false); err != nil {
		t.Fatal(err)
	}

	assert.Equal(t, "UUID-A", newFormat.UUID)
	assert.True(t, newFormat.KeyEncrypted)

	if err := newFormat.Decrypt(); err != nil {
		t.Fatalf("failed to decrypt with new UUID (which is old UUID A): %s", err)
	}

	assert.Equal(t, "secret", newFormat.SecretKey)
}


================================================
FILE: pkg/meta/context.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"time"
)

type CtxKey string

type Context interface {
	context.Context
	Gid() uint32
	Gids() []uint32
	Uid() uint32
	Pid() uint32
	WithValue(k, v interface{}) Context // should remain const semantics, so user can chain it
	Cancel()
	Canceled() bool
	CheckPermission() bool
}

func Background() Context {
	return WrapContext(context.Background())
}

type wrapContext struct {
	context.Context
	cancel func()
	pid    uint32
	uid    uint32
	gids   []uint32
}

func (c *wrapContext) Uid() uint32 {
	return c.uid
}

func (c *wrapContext) Gid() uint32 {
	return c.gids[0]
}

func (c *wrapContext) Gids() []uint32 {
	return c.gids
}

func (c *wrapContext) Pid() uint32 {
	return c.pid
}

func (c *wrapContext) Cancel() {
	if c.cancel != nil {
		c.cancel()
	}
}

func (c *wrapContext) Canceled() bool {
	return c.Err() != nil
}

func (c *wrapContext) WithValue(k, v interface{}) Context {
	wc := *c // gids is a const, so it's safe to shallow copy
	wc.Context = context.WithValue(c.Context, k, v)
	return &wc
}

func (c *wrapContext) CheckPermission() bool {
	return true
}

func NewContext(pid, uid uint32, gids []uint32) Context {
	return WrapWithCancel(context.Background(), pid, uid, gids)
}

func WrapContext(ctx context.Context) Context {
	return WrapWithCancel(ctx, 0, 0, []uint32{0})
}

func WrapWithCancel(ctx context.Context, pid, uid uint32, gids []uint32) Context {
	c, cancel := context.WithCancel(ctx)
	return &wrapContext{c, cancel, pid, uid, gids}
}

func WrapWithTimeout(ctx Context, timeout time.Duration) Context {
	c, cancel := context.WithTimeout(ctx, timeout)
	return &wrapContext{c, cancel, ctx.Pid(), ctx.Uid(), ctx.Gids()}
}

func WrapWithoutCancel(ctx context.Context, pid, uid uint32, gids []uint32) Context {
	return &wrapContext{ctx, nil, pid, uid, gids}
}

func containsGid(ctx Context, gid uint32) bool {
	for _, g := range ctx.Gids() {
		if g == gid {
			return true
		}
	}
	return false
}


================================================
FILE: pkg/meta/dump.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"strings"
	"sync"
	"unicode/utf8"

	"github.com/goccy/go-json"
	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/utils"
)

const (
	jsonIndent    = "  "
	jsonWriteSize = 64 << 10
)

type DumpedCounters struct {
	UsedSpace         int64 `json:"usedSpace"`
	UsedInodes        int64 `json:"usedInodes"`
	NextInode         int64 `json:"nextInodes"`
	NextChunk         int64 `json:"nextChunk"`
	NextSession       int64 `json:"nextSession"`
	NextTrash         int64 `json:"nextTrash"`
	NextCleanupSlices int64 `json:"nextCleanupSlices,omitempty"` // deprecated, always 0
}

type DumpedDelFile struct {
	Inode  Ino    `json:"inode"`
	Length uint64 `json:"length"`
	Expire int64  `json:"expire"`
}

type DumpedSustained struct {
	Sid    uint64 `json:"sid"`
	Inodes []Ino  `json:"inodes"`
}

type DumpedAttr struct {
	Inode     Ino    `json:"inode"`
	Flags     uint8  `json:"flags,omitempty"`
	Type      string `json:"type"`
	Mode      uint16 `json:"mode"`
	Uid       uint32 `json:"uid"`
	Gid       uint32 `json:"gid"`
	Atime     int64  `json:"atime"`
	Mtime     int64  `json:"mtime"`
	Ctime     int64  `json:"ctime"`
	Atimensec uint32 `json:"atimensec,omitempty"`
	Mtimensec uint32 `json:"mtimensec,omitempty"`
	Ctimensec uint32 `json:"ctimensec,omitempty"`
	Nlink     uint32 `json:"nlink"`
	Length    uint64 `json:"length"`
	Rdev      uint32 `json:"rdev,omitempty"`
	full      bool
}

type DumpedSlice struct {
	Chunkid uint64 `json:"chunkid,omitempty"`
	Id      uint64 `json:"id"`
	Pos     uint32 `json:"pos,omitempty"`
	Size    uint32 `json:"size"`
	Off     uint32 `json:"off,omitempty"`
	Len     uint32 `json:"len"`
}

type DumpedChunk struct {
	Index  uint32         `json:"index"`
	Slices []*DumpedSlice `json:"slices"`
}

type DumpedXattr struct {
	Name  string `json:"name"`
	Value string `json:"value"`
}

type DumpedQuota struct {
	MaxSpace   int64 `json:"maxSpace"`
	MaxInodes  int64 `json:"maxInodes"`
	UsedSpace  int64 `json:"-"`
	UsedInodes int64 `json:"-"`
}

type DumpedACLEntry struct {
	Id   uint32 `json:"id"`
	Perm uint16 `json:"perm"`
}

type DumpedACL struct {
	Owner  uint16           `json:"owner"`
	Group  uint16           `json:"group"`
	Other  uint16           `json:"other"`
	Mask   uint16           `json:"mask"`
	Users  []DumpedACLEntry `json:"users"`
	Groups []DumpedACLEntry `json:"groups"`
}

type DumpedEntry struct {
	Name       string                  `json:"-"`
	Parents    []Ino                   `json:"-"`
	Attr       *DumpedAttr             `json:"attr,omitempty"`
	Symlink    string                  `json:"symlink,omitempty"`
	Xattrs     []*DumpedXattr          `json:"xattrs,omitempty"`
	Chunks     []*DumpedChunk          `json:"chunks,omitempty"`
	Entries    map[string]*DumpedEntry `json:"entries,omitempty"`
	AccessACL  *DumpedACL              `json:"posix_acl_access,omitempty"`
	DefaultACL *DumpedACL              `json:"posix_acl_default,omitempty"`
}

type wrapEntryPool struct {
	sync.Pool
}

func (p *wrapEntryPool) Get() *DumpedEntry {
	return p.Pool.Get().(*DumpedEntry)
}

func (p *wrapEntryPool) Put(de *DumpedEntry) {
	if de == nil {
		return
	}

	de.Name = ""
	de.Xattrs = nil
	de.Chunks = nil
	de.Symlink = ""
	de.AccessACL = nil
	de.DefaultACL = nil
	de.Entries = nil
	p.Pool.Put(de)
}

var entryPool = wrapEntryPool{
	Pool: sync.Pool{
		New: func() interface{} {
			return &DumpedEntry{
				Attr: &DumpedAttr{},
			}
		},
	},
}

var CHARS = []byte("0123456789ABCDEF")

func escape(original string) string {
	// similar to url.Escape but backward compatible if no '%' in it
	var escValue = make([]byte, 0, len(original))
	for i, r := range original {
		if r == utf8.RuneError || r < 32 || r == '%' || r == '"' || r == '\\' {
			if escValue == nil {
				escValue = make([]byte, i, len(original)*2)
				for j := 0; j < i; j++ {
					escValue[j] = original[j]
				}
			}
			c := byte(r)
			if r == utf8.RuneError {
				c = original[i]
			}
			escValue = append(escValue, '%')
			escValue = append(escValue, CHARS[(c>>4)&0xF])
			escValue = append(escValue, CHARS[c&0xF])
		} else if escValue != nil {
			n := utf8.RuneLen(r)
			escValue = append(escValue, original[i:i+n]...)
		}
	}
	if escValue == nil {
		return original
	}
	return string(escValue)
}

func parseHex(c byte) (byte, error) {
	if c >= '0' && c <= '9' {
		return c - '0', nil
	} else if c >= 'A' && c <= 'F' {
		return 10 + (c - 'A'), nil
	} else {
		return 0, fmt.Errorf("hex expected: %c", c)
	}
}

func unescape(s string) []byte {
	if !strings.ContainsRune(s, '%') {
		return []byte(s)
	}

	p := []byte(s)
	n := 0
	for i := 0; i < len(p); i++ {
		c := p[i]
		if c == '%' && i+2 < len(p) {
			h, e1 := parseHex(p[i+1])
			l, e2 := parseHex(p[i+2])
			if e1 == nil && e2 == nil {
				c = h*16 + l
				i += 2
			}
		}
		p[n] = c
		n++
	}
	return p[:n]
}

func (de *DumpedEntry) writeJSON(bw *bufio.Writer, depth int) error {
	prefix := strings.Repeat(jsonIndent, depth)
	fieldPrefix := prefix + jsonIndent
	write := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}
	write(fmt.Sprintf("\n%s\"%s\": {", prefix, escape(de.Name)))
	data, err := json.Marshal(de.Attr)
	if err != nil {
		panic(err)
	}
	write(fmt.Sprintf("\n%s\"attr\": %s", fieldPrefix, data))
	if len(de.Symlink) > 0 {
		write(fmt.Sprintf(",\n%s\"symlink\": \"%s\"", fieldPrefix, escape(de.Symlink)))
	}
	if len(de.Xattrs) > 0 {
		for _, dumpedXattr := range de.Xattrs {
			dumpedXattr.Value = escape(dumpedXattr.Value)
		}
		if data, err = json.Marshal(de.Xattrs); err != nil {
			panic(err)
		}
		write(fmt.Sprintf(",\n%s\"xattrs\": %s", fieldPrefix, data))
	}
	if de.AccessACL != nil {
		if data, err = json.Marshal(de.AccessACL); err != nil {
			return err
		}
		write(fmt.Sprintf(",\n%s\"posix_acl_access\": %s", fieldPrefix, data))
	}
	if de.DefaultACL != nil {
		if data, err = json.Marshal(de.DefaultACL); err != nil {
			return err
		}
		write(fmt.Sprintf(",\n%s\"posix_acl_default\": %s", fieldPrefix, data))
	}
	if len(de.Chunks) == 1 {
		if data, err = json.Marshal(de.Chunks); err != nil {
			panic(err)
		}
		write(fmt.Sprintf(",\n%s\"chunks\": %s", fieldPrefix, data))
	} else if len(de.Chunks) > 1 {
		chunkPrefix := fieldPrefix + jsonIndent
		write(fmt.Sprintf(",\n%s\"chunks\": [", fieldPrefix))
		for i, c := range de.Chunks {
			if data, err = json.Marshal(c); err != nil {
				panic(err)
			}
			write(fmt.Sprintf("\n%s%s", chunkPrefix, data))
			if i != len(de.Chunks)-1 {
				write(",")
			}
		}
		write(fmt.Sprintf("\n%s]", fieldPrefix))
	}
	write(fmt.Sprintf("\n%s}", prefix))
	return nil
}

func (de *DumpedEntry) writeJsonWithOutEntry(bw *bufio.Writer, depth int) error {
	prefix := strings.Repeat(jsonIndent, depth)
	fieldPrefix := prefix + jsonIndent
	write := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}
	write(fmt.Sprintf("\n%s\"%s\": {", prefix, escape(de.Name)))
	data, err := json.Marshal(de.Attr)
	if err != nil {
		panic(err)
	}
	write(fmt.Sprintf("\n%s\"attr\": %s", fieldPrefix, data))
	if len(de.Xattrs) > 0 {
		for _, dumpedXattr := range de.Xattrs {
			dumpedXattr.Value = escape(dumpedXattr.Value)
		}
		if data, err = json.Marshal(de.Xattrs); err != nil {
			panic(err)
		}
		write(fmt.Sprintf(",\n%s\"xattrs\": %s", fieldPrefix, data))
	}
	if de.AccessACL != nil {
		if data, err = json.Marshal(de.AccessACL); err != nil {
			return err
		}
		write(fmt.Sprintf(",\n%s\"posix_acl_access\": %s", fieldPrefix, data))
	}
	if de.DefaultACL != nil {
		if data, err = json.Marshal(de.DefaultACL); err != nil {
			return err
		}
		write(fmt.Sprintf(",\n%s\"posix_acl_default\": %s", fieldPrefix, data))
	}
	write(fmt.Sprintf(",\n%s\"entries\": {", fieldPrefix))
	return nil
}

type DumpedMeta struct {
	Setting   Format
	Counters  *DumpedCounters
	Sustained []*DumpedSustained
	DelFiles  []*DumpedDelFile
	Quotas    map[Ino]*DumpedQuota `json:",omitempty"`
	FSTree    *DumpedEntry         `json:",omitempty"`
	Trash     *DumpedEntry         `json:",omitempty"`
}

func (dm *DumpedMeta) validate() error {
	if dm.Counters == nil {
		return errors.New("invalid dumped meta: missing 'Counters'")
	}
	return nil
}

func (dm *DumpedMeta) writeJsonWithOutTree(w io.Writer) (*bufio.Writer, error) {
	if dm.FSTree != nil || dm.Trash != nil {
		return nil, fmt.Errorf("invalid dumped meta")
	}
	data, err := json.MarshalIndent(dm, "", jsonIndent)
	if err != nil {
		return nil, err
	}
	bw := bufio.NewWriterSize(w, jsonWriteSize)
	if _, err = bw.Write(append(data[:len(data)-2], ',')); err != nil { // delete \n}
		return nil, err
	}
	return bw, nil
}

func (m *baseMeta) loadDumpedQuotas(ctx Context, quotas map[Ino]*DumpedQuota) {
	// update quota
	for inode, q := range quotas {
		if _, err := m.en.doSetQuota(ctx, DirQuotaType, uint64(inode), &Quota{q.MaxSpace, q.MaxInodes, q.UsedSpace, q.UsedInodes, 0, 0}); err != nil {
			logger.Warnf("reset quota of %d: %s", inode, err)
			continue
		}
	}
}

func dumpAttr(a *Attr, d *DumpedAttr) {
	if a.Typ > 0 {
		d.Type = typeToString(a.Typ)
	}
	d.Flags = a.Flags
	d.Mode = a.Mode
	d.Uid = a.Uid
	d.Gid = a.Gid
	d.Atime = a.Atime
	d.Mtime = a.Mtime
	d.Ctime = a.Ctime
	d.Atimensec = a.Atimensec
	d.Mtimensec = a.Mtimensec
	d.Ctimensec = a.Ctimensec
	d.Nlink = a.Nlink
	d.Rdev = a.Rdev
	if a.Typ == TypeFile {
		d.Length = a.Length
	} else {
		d.Length = 0
	}
	d.full = a.Full
}

func loadAttr(d *DumpedAttr) *Attr {
	return &Attr{
		Flags:     d.Flags,
		Typ:       typeFromString(d.Type),
		Mode:      d.Mode,
		Uid:       d.Uid,
		Gid:       d.Gid,
		Atime:     d.Atime,
		Mtime:     d.Mtime,
		Ctime:     d.Ctime,
		Atimensec: d.Atimensec,
		Mtimensec: d.Mtimensec,
		Ctimensec: d.Ctimensec,
		Nlink:     d.Nlink,
		Rdev:      d.Rdev,
		Full:      true,
	} // Length and Parent not set
}

type chunkKey struct {
	id   uint64
	size uint32
}

func loadEntries(r io.Reader, load func(*DumpedEntry), addChunk func(*chunkKey)) (dm *DumpedMeta,
	counters *DumpedCounters, parents map[Ino][]Ino, refs map[chunkKey]int64, err error) {
	logger.Infoln("Loading from file ...")
	dec := json.NewDecoder(r)
	if _, err = dec.Token(); err != nil {
		return
	}

	progress := utils.NewProgress(false)
	bar := progress.AddCountBar("Loaded entries", 1) // with root
	dm = &DumpedMeta{}
	counters = &DumpedCounters{ // rebuild counters
		NextInode: 2,
		NextChunk: 1,
	}
	parents = make(map[Ino][]Ino)
	refs = make(map[chunkKey]int64)
	var name json.Token
	for dec.More() {
		name, err = dec.Token()
		if err != nil {
			err = fmt.Errorf("parse name: %s", err)
			return
		}
		switch name {
		case "Setting":
			if err = dec.Decode(&dm.Setting); err == nil {
				_, err = json.MarshalIndent(dm.Setting, "", "")
			}
		case "Counters":
			if err = dec.Decode(&dm.Counters); err == nil {
				bar.SetTotal(dm.Counters.UsedInodes) // TODO
			}
		case "Sustained":
			err = dec.Decode(&dm.Sustained)
		case "DelFiles":
			err = dec.Decode(&dm.DelFiles)
		case "Quotas":
			err = dec.Decode(&dm.Quotas)
		case "FSTree":
			_, err = decodeEntry(dec, 0, counters, parents, dm.Quotas, refs, bar, load, addChunk)
		case "Trash":
			_, err = decodeEntry(dec, 1, counters, parents, nil, refs, bar, load, addChunk)
		}
		if err != nil {
			err = fmt.Errorf("load %v: %s", name, err)
			return
		}
	}
	_, _ = dec.Token() // }
	progress.Done()

	if err = dm.validate(); err != nil {
		return
	}

	logger.Infof("Dumped counters: %+v", *dm.Counters)
	logger.Infof("Loaded counters: %+v", *counters)
	return
}

func decodeEntry(dec *json.Decoder, parent Ino, cs *DumpedCounters, parents map[Ino][]Ino, quotas map[Ino]*DumpedQuota,
	refs map[chunkKey]int64, bar *utils.Bar, load func(*DumpedEntry), addChunk func(*chunkKey)) (*DumpedEntry, error) {
	if _, err := dec.Token(); err != nil {
		return nil, err
	}
	var e = DumpedEntry{}
	for dec.More() {
		name, err := dec.Token()
		if err != nil {
			return nil, err
		}
		switch name {
		case "attr":
			err = dec.Decode(&e.Attr)
			if err == nil {
				if parent == 0 {
					parent = 1
					e.Attr.Inode = 1 // fix loading from subdir
				}
				inode := e.Attr.Inode
				if typeFromString(e.Attr.Type) == TypeDirectory {
					e.Attr.Nlink = 2
				} else {
					e.Attr.Nlink = 1
				}
				e.Parents = append(parents[inode], parent)
				parents[inode] = e.Parents
				if len(e.Parents) == 1 {
					if inode > 1 && inode != TrashInode {
						cs.UsedSpace += align4K(e.Attr.Length)
						cs.UsedInodes += 1
					}
					if inode < TrashInode {
						if cs.NextInode <= int64(inode) {
							cs.NextInode = int64(inode) + 1
						}
					} else {
						if cs.NextTrash < int64(inode-TrashInode) {
							cs.NextTrash = int64(inode - TrashInode)
						}
					}
				}
			}
		case "chunks":
			err = dec.Decode(&e.Chunks)
			if err == nil && len(e.Parents) == 1 {
				for _, c := range e.Chunks {
					for _, s := range c.Slices {
						if s.Chunkid != 0 && s.Id == 0 {
							s.Id = s.Chunkid
							s.Chunkid = 0
						}
						ck := chunkKey{s.Id, s.Size}
						refs[ck]++
						if addChunk != nil && refs[ck] == 1 {
							addChunk(&ck)
						}
						if cs.NextChunk <= int64(s.Id) {
							cs.NextChunk = int64(s.Id) + 1
						}
					}
				}
			}
		case "entries":
			e.Entries = make(map[string]*DumpedEntry)
			_, err = dec.Token()
			var usedSpace, usedInodes int64
			if err == nil {
				for dec.More() {
					var n json.Token
					n, err = dec.Token()
					if err != nil {
						break
					}
					var child *DumpedEntry
					child, err = decodeEntry(dec, e.Attr.Inode, cs, parents, quotas, refs, bar, load, addChunk)
					if err != nil {
						break
					}
					if e.Attr.Inode < TrashInode && typeFromString(child.Attr.Type) == TypeDirectory {
						e.Attr.Nlink++
					}
					e.Entries[n.(string)] = &DumpedEntry{
						Attr: &DumpedAttr{
							Inode:  child.Attr.Inode,
							Type:   child.Attr.Type,
							Length: child.Attr.Length,
						},
					}
					usedSpace += align4K(child.Attr.Length)
					usedInodes++
				}
				if err == nil {
					i := e.Attr.Inode
					for {
						if q := quotas[i]; q != nil {
							q.UsedSpace += usedSpace
							q.UsedInodes += usedInodes
						}
						if i <= 1 || len(parents[i]) == 0 {
							break
						}
						i = parents[i][0]
					}

					var t json.Token
					t, err = dec.Token()
					if err == nil && t != json.Delim('}') {
						err = fmt.Errorf("unexpected %v", t)
					}
				}
			}
		case "symlink":
			err = dec.Decode(&e.Symlink)
		case "xattrs":
			err = dec.Decode(&e.Xattrs)
		case "posix_acl_access":
			err = dec.Decode(&e.AccessACL)
		case "posix_acl_default":
			err = dec.Decode(&e.DefaultACL)
		}
		if err != nil {
			return nil, fmt.Errorf("decode %v: %s", name, err)
		}
	}
	if len(e.Parents) == 1 {
		load(&e)
		bar.Increment()
	}
	if _, err := dec.Token(); err != nil {
		return nil, err
	}
	return &e, nil
}

func dumpACL(rule *aclAPI.Rule) *DumpedACL {
	if rule == nil {
		return nil
	}
	return &DumpedACL{
		Owner:  rule.Owner,
		Group:  rule.Group,
		Other:  rule.Other,
		Mask:   rule.Mask,
		Users:  dumpACLEntries(rule.NamedUsers),
		Groups: dumpACLEntries(rule.NamedGroups),
	}
}

func dumpACLEntries(entries aclAPI.Entries) []DumpedACLEntry {
	if len(entries) == 0 {
		return nil
	}
	dumpedEnts := make([]DumpedACLEntry, len(entries))
	for i, ent := range entries {
		dumpedEnts[i].Id = ent.Id
		dumpedEnts[i].Perm = ent.Perm
	}
	return dumpedEnts
}

func loadACL(dumped *DumpedACL) *aclAPI.Rule {
	if dumped == nil {
		return nil
	}
	return &aclAPI.Rule{
		Owner:       dumped.Owner,
		Group:       dumped.Group,
		Mask:        dumped.Mask,
		Other:       dumped.Other,
		NamedUsers:  loadACLEntries(dumped.Users),
		NamedGroups: loadACLEntries(dumped.Groups),
	}
}

func loadACLEntries(dumpedEnts []DumpedACLEntry) aclAPI.Entries {
	if len(dumpedEnts) == 0 {
		return nil
	}
	ents := make(aclAPI.Entries, len(dumpedEnts))
	for i, d := range dumpedEnts {
		ents[i].Id = d.Id
		ents[i].Perm = d.Perm
	}
	return ents
}


================================================
FILE: pkg/meta/info.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"fmt"
	"strconv"
	"strings"
)

type redisVersion struct {
	ver          string
	major, minor int
}

var oldestSupportedVer = redisVersion{"4.0.x", 4, 0}

func parseRedisVersion(v string) (ver redisVersion, err error) {
	parts := strings.Split(v, ".")
	if len(parts) < 2 {
		err = fmt.Errorf("invalid redisVersion: %v", v)
		return
	}
	ver.ver = v
	ver.major, err = strconv.Atoi(parts[0])
	if err != nil {
		return
	}
	ver.minor, err = strconv.Atoi(parts[1])
	return
}

func (ver redisVersion) olderThan(v2 redisVersion) bool {
	if ver.major < v2.major {
		return true
	}
	if ver.major > v2.major {
		return false
	}
	return ver.minor < v2.minor
}

func (ver redisVersion) String() string {
	return ver.ver
}

type redisInfo struct {
	aofEnabled      bool
	maxMemoryPolicy string
	redisVersion    string
	storageProvider string // redis is "", keyDB is "none" or "flash"
}

func checkRedisInfo(rawInfo string) (info redisInfo, err error) {
	lines := strings.Split(strings.TrimSpace(rawInfo), "\n")
	for _, l := range lines {
		l = strings.TrimSpace(l)
		if l == "" || strings.HasPrefix(l, "#") {
			continue
		}
		kvPair := strings.SplitN(l, ":", 2)
		if len(kvPair) < 2 {
			continue
		}
		key, val := kvPair[0], kvPair[1]
		switch key {
		case "aof_enabled":
			info.aofEnabled = val == "1"
			if val == "0" {
				logger.Warnf("AOF is not enabled, you may lose data if Redis is not shutdown properly.")
			}
		case "maxmemory_policy":
			info.maxMemoryPolicy = val
		case "redis_version":
			info.redisVersion = val
			ver, err := parseRedisVersion(val)
			if err != nil {
				logger.Warnf("Failed to parse Redis server version %q: %s", ver, err)
			} else {
				if ver.olderThan(oldestSupportedVer) {
					logger.Fatalf("Redis version should not be older than %s", oldestSupportedVer)
				}
			}
		case "storage_provider":
			// if storage_provider is none reset it to ""
			if val == "flash" {
				info.storageProvider = val
			}
		}
	}
	return
}


================================================
FILE: pkg/meta/info_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import "testing"

func TestOlderThan(t *testing.T) {
	v := redisVersion{"2.2.10", 2, 2}
	if !v.olderThan(redisVersion{"6.2", 6, 2}) {
		t.Fatal("Expect true, got false.")
	}
	if !v.olderThan(redisVersion{"2.3", 2, 3}) {
		t.Fatal("Expect true, got false.")
	}
	if v.olderThan(redisVersion{"2.2", 2, 2}) {
		t.Fatal("Expect false, got true.")
	}
	if v.olderThan(redisVersion{"2.1", 2, 1}) {
		t.Fatal("Expect false, got true.")
	}
	if v.olderThan(v) {
		t.Fatal("Expect false, got true.")
	}
	if v.olderThan(redisVersion{}) {
		t.Fatal("Expect false, got true.")
	}
}

func TestParseRedisVersion(t *testing.T) {
	t.Run("Should return error for invalid redisVersion", func(t *testing.T) {
		invalidVers := []string{"", "2.sadf.1", "3", "t.3.4"}
		for _, v := range invalidVers {
			_, err := parseRedisVersion(v)
			if err == nil {
				t.Fail()
			}
		}
	})
	t.Run("Should parse redisVersion", func(t *testing.T) {
		ver, err := parseRedisVersion("6.2.19")
		if err != nil {
			t.Fatalf("Failed to parse a valid redisVersion: %s", err)
		}
		if !(ver.major == 6 && ver.minor == 2) {
			t.Fatalf("Expect %s, got %s", "6.2", ver)
		}
		if ver.String() != "6.2.19" {
			t.Fatalf("Expect %s, got %s", "6.2.19", ver)
		}
	})
}

func TestParseRedisInfo(t *testing.T) {
	t.Run("Should parse the fields we are interested in", func(t *testing.T) {
		input := `# Server
	redis_version:6.1.240
	redis_git_sha1:00000000
	redis_git_dirty:0
	redis_build_id:a26db646ea64a07c
	redis_mode:standalone
	os:Linux 5.4.0-1017-aws x86_64
	arch_bits:64
	multiplexing_api:epoll
	atomicvar_api:c11-builtin
	gcc_version:9.3.0
	process_id:2755423
	process_supervised:no
	run_id:d04b36ea49704b152d8ce82bf563d26bcd52e741
	tcp_port:6379
	server_time_usec:1610404734862725
	uptime_in_seconds:2430194
	uptime_in_days:28
	hz:10
	configured_hz:10
	lru_clock:16569214
	executable:/usr/local/bin/redis-server
	config_file:/etc/redis/redis.conf
	io_threads_active:0

		# Clients
	connected_clients:2
	cluster_connections:0
	maxclients:10000
	client_recent_max_input_buffer:24
	client_recent_max_output_buffer:0
	blocked_clients:0
	tracking_clients:0
	clients_in_timeout_table:0

		# Memory
	used_memory:200001664
	used_memory_human:190.74M
	used_memory_rss:210456576
	used_memory_rss_human:200.71M
	used_memory_peak:200060312
	used_memory_peak_human:190.79M
	used_memory_peak_perc:99.97%
		used_memory_overhead:54246680
	used_memory_startup:803648
	used_memory_dataset:145754984
	used_memory_dataset_perc:73.17%
		allocator_allocated:199994624
	allocator_active:200847360
	allocator_resident:209551360
	total_system_memory:16596942848
	total_system_memory_human:15.46G
	used_memory_lua:37888
	used_memory_lua_human:37.00K
	used_memory_scripts:0
	used_memory_scripts_human:0B
	number_of_cached_scripts:0
	maxmemory:200000000
	maxmemory_human:190.73M
	maxmemory_policy:allkeys-lru
	allocator_frag_ratio:1.00
	allocator_frag_bytes:852736
	allocator_rss_ratio:1.04
	allocator_rss_bytes:8704000
	rss_overhead_ratio:1.00
	rss_overhead_bytes:905216
	mem_fragmentation_ratio:1.05
	mem_fragmentation_bytes:10538760
	mem_not_counted_for_evict:0
	mem_replication_backlog:0
	mem_clients_slaves:0
	mem_clients_normal:41008
	mem_aof_buffer:0
	mem_allocator:jemalloc-5.1.0
	active_defrag_running:0
	lazyfree_pending_objects:0
	lazyfreed_objects:0

		# Persistence
	loading:0
	rdb_changes_since_last_save:6407091
	rdb_bgsave_in_progress:0
	rdb_last_save_time:1607974540
	rdb_last_bgsave_status:ok
	rdb_last_bgsave_time_sec:-1
	rdb_current_bgsave_time_sec:-1
	rdb_last_cow_size:0
	aof_enabled:0
	aof_rewrite_in_progress:0
	aof_rewrite_scheduled:0
	aof_last_rewrite_time_sec:-1
	aof_current_rewrite_time_sec:-1
	aof_last_bgrewrite_status:ok
	aof_last_write_status:ok
	aof_last_cow_size:0
	module_fork_in_progress:0
	module_fork_last_cow_size:0

		# Stats
	total_connections_received:127469
	total_commands_processed:15725530
	instantaneous_ops_per_sec:8
	total_net_input_bytes:1305500885
	total_net_output_bytes:237264322
	instantaneous_input_kbps:0.74
	instantaneous_output_kbps:0.10
	rejected_connections:0
	sync_full:0
	sync_partial_ok:0
	sync_partial_err:0
	expired_keys:41809
	expired_stale_perc:0.00
	expired_time_cap_reached_count:0
	expire_cycle_cpu_milliseconds:75107
	evicted_keys:182417
	keyspace_hits:3627925
	keyspace_misses:1661042
	pubsub_channels:0
	pubsub_patterns:0
	latest_fork_usec:0
	total_forks:0
	migrate_cached_sockets:0
	slave_expires_tracked_keys:0
	active_defrag_hits:0
	active_defrag_misses:0
	active_defrag_key_hits:0
	active_defrag_key_misses:0
	tracking_total_keys:0
	tracking_total_items:0
	tracking_total_prefixes:0
	unexpected_error_replies:0
	dump_payload_sanitizations:0
	total_reads_processed:15835400
	total_writes_processed:15835323
	io_threaded_reads_processed:0
	io_threaded_writes_processed:0

		# Replication
	role:master
	connected_slaves:0
	master_replid:d4fc9b96fa0c5d3eb4c4444a394ba6e4e40cc0d5
	master_replid2:0000000000000000000000000000000000000000
	master_repl_offset:0
	second_repl_offset:-1
	repl_backlog_active:0
	repl_backlog_size:1048576
	repl_backlog_first_byte_offset:0
	repl_backlog_histlen:0

		# CPU
	used_cpu_sys:3574.527853
	used_cpu_user:13274.227145
	used_cpu_sys_children:0.000000
	used_cpu_user_children:0.000000
	used_cpu_sys_main_thread:3553.579738
	used_cpu_user_main_thread:13249.100447

		# Modules

		# Cluster
	cluster_enabled:0

		# Keyspace
	db0:keys=1125326,expires=5,avg_ttl=321749445601195`
		info, err := checkRedisInfo(input)
		if err != nil {
			t.Fatalf("Failed to parse redis info: %s", err)
		}
		if info.redisVersion != "6.1.240" {
			t.Fatalf("Expect %s, got %q", "6.1.240", info.redisVersion)
		}
		if info.aofEnabled {
			t.Fatalf("Expect %t, got %t", false, true)
		}
		if info.maxMemoryPolicy != "allkeys-lru" {
			t.Fatalf("Expect %s, got %s", "allkeys-lru", info.maxMemoryPolicy)
		}
	})
}


================================================
FILE: pkg/meta/interface.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"fmt"
	"io"
	"net/url"
	"os"
	"strconv"
	"strings"
	"syscall"
	"time"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
)

const (
	// MaxVersion is the max of supported versions.
	MaxVersion = 1
	// ChunkBits is the size of a chunk.
	ChunkBits = 26
	// ChunkSize is size of a chunk
	ChunkSize = 1 << ChunkBits // 64M
	// DeleteSlice is a message to delete a slice from object store.
	DeleteSlice = 1000
	// CompactChunk is a message to compact a chunk in object store.
	CompactChunk = 1001
	// Rmr is a message to remove a directory recursively.
	Rmr = 1002
	// LegacyInfo is a message to get the internal info for file or directory.
	LegacyInfo = 1003
	// FillCache is a message to build cache for target directories/files
	FillCache = 1004
	// InfoV2 is a message to get the internal info for file or directory.
	InfoV2 = 1005
	// Clone is a message to clone a file or dir from another.
	Clone = 1006
	// OpSummary is a message to get tree summary of directories.
	OpSummary = 1007
	// CompactPath is a message to trigger compact
	CompactPath = 1008
)

const (
	TypeFile      = 1 // type for regular file
	TypeDirectory = 2 // type for directory
	TypeSymlink   = 3 // type for symlink
	TypeFIFO      = 4 // type for FIFO node
	TypeBlockDev  = 5 // type for block device
	TypeCharDev   = 6 // type for character device
	TypeSocket    = 7 // type for socket
)

const (
	RenameNoReplace = 1 << iota
	RenameExchange
	RenameWhiteout
	_renameReserved1
	_renameReserved2
	RenameRestore // internal
)

const (
	// SetAttrMode is a mask to update a attribute of node
	SetAttrMode = 1 << iota
	SetAttrUID
	SetAttrGID
	SetAttrSize
	SetAttrAtime
	SetAttrMtime
	SetAttrCtime
	SetAttrAtimeNow
	SetAttrMtimeNow
	SetAttrCtimeNow
	SetAttrFlag = 1 << 15
)

const (
	FlagImmutable = 1 << iota // same as Windows FILE_ATTRIBUTE_READONLY
	FlagAppend
	FlagWindowsHidden
	FlagWindowsSystem
	FlagWindowsArchive
	FlagSkipTrash // skip moving to .trash - Mapped to 's' in chattr
)

const (
	QuotaSet uint8 = iota
	QuotaGet
	QuotaDel
	QuotaList
	QuotaCheck
)

const MaxName = 255
const MaxSymlink = 4096

type Ino uint64

const RootInode Ino = 1
const TrashInode Ino = 0x7FFFFFFF10000000 // larger than vfs.minInternalNode

const RmrDefaultThreads = 50

func (i Ino) String() string {
	return strconv.FormatUint(uint64(i), 10)
}

func (i Ino) IsValid() bool {
	return i >= RootInode
}

func (i Ino) IsTrash() bool {
	return i >= TrashInode
}

func (i Ino) IsNormal() bool {
	return i >= RootInode && i < TrashInode
}

var TrashName = ".trash"

type internalNode struct {
	inode Ino
	name  string
}

// Type of control messages
const CPROGRESS = 0xFE // 16 bytes: progress increment
const CDATA = 0xFF     // 4 bytes: data length

// MsgCallback is a callback for messages from meta service.
type MsgCallback func(...interface{}) error

// Attr represents attributes of a node.
type Attr struct {
	Flags     uint8  // flags
	Typ       uint8  // type of a node
	Mode      uint16 // permission mode
	Uid       uint32 // owner id
	Gid       uint32 // group id of owner
	Rdev      uint32 // device number
	Atime     int64  // last access time
	Mtime     int64  // last modified time
	Ctime     int64  // last change time for meta
	Atimensec uint32 // nanosecond part of atime
	Mtimensec uint32 // nanosecond part of mtime
	Ctimensec uint32 // nanosecond part of ctime
	Nlink     uint32 // number of links (sub-directories or hardlinks)
	Length    uint64 // length of regular file

	Parent    Ino  // inode of parent; 0 means tracked by parentKey (for hardlinks)
	Full      bool // the attributes are completed or not
	KeepCache bool // whether to keep the cached page or not

	AccessACL  uint32 // access ACL id (identical ACL rules share the same access ACL ID.)
	DefaultACL uint32 // default ACL id (default ACL and the access ACL share the same cache and store)
}

func (attr *Attr) Marshal() []byte {
	size := uint32(36 + 24 + 4 + 8)
	if attr.AccessACL|attr.DefaultACL != aclAPI.None {
		size += 8
	}
	w := utils.NewBuffer(size)
	w.Put8(attr.Flags)
	w.Put16((uint16(attr.Typ) << 12) | (attr.Mode & 0xfff))
	w.Put32(attr.Uid)
	w.Put32(attr.Gid)
	w.Put64(uint64(attr.Atime))
	w.Put32(attr.Atimensec)
	w.Put64(uint64(attr.Mtime))
	w.Put32(attr.Mtimensec)
	w.Put64(uint64(attr.Ctime))
	w.Put32(attr.Ctimensec)
	w.Put32(attr.Nlink)
	w.Put64(attr.Length)
	w.Put32(attr.Rdev)
	w.Put64(uint64(attr.Parent))
	if attr.AccessACL+attr.DefaultACL > 0 {
		w.Put32(attr.AccessACL)
		w.Put32(attr.DefaultACL)
	}
	logger.Tracef("attr: %+v -> %+v", attr, w.Bytes())
	return w.Bytes()
}

func (attr *Attr) Unmarshal(buf []byte) {
	if attr == nil || len(buf) == 0 {
		return
	}
	rb := utils.FromBuffer(buf)
	attr.Flags = rb.Get8()
	attr.Mode = rb.Get16()
	attr.Typ = uint8(attr.Mode >> 12)
	attr.Mode &= 0xfff
	attr.Uid = rb.Get32()
	attr.Gid = rb.Get32()
	attr.Atime = int64(rb.Get64())
	attr.Atimensec = rb.Get32()
	attr.Mtime = int64(rb.Get64())
	attr.Mtimensec = rb.Get32()
	attr.Ctime = int64(rb.Get64())
	attr.Ctimensec = rb.Get32()
	attr.Nlink = rb.Get32()
	attr.Length = rb.Get64()
	attr.Rdev = rb.Get32()
	if rb.Left() >= 8 {
		attr.Parent = Ino(rb.Get64())
	}
	attr.Full = true
	if rb.Left() >= 8 {
		attr.AccessACL = rb.Get32()
		attr.DefaultACL = rb.Get32()
	}
	logger.Tracef("attr: %+v -> %+v", buf, attr)
}

func typeToStatType(_type uint8) uint32 {
	switch _type & 0x7F {
	case TypeDirectory:
		return syscall.S_IFDIR
	case TypeSymlink:
		return syscall.S_IFLNK
	case TypeFile:
		return syscall.S_IFREG
	case TypeFIFO:
		return syscall.S_IFIFO
	case TypeSocket:
		return syscall.S_IFSOCK
	case TypeBlockDev:
		return syscall.S_IFBLK
	case TypeCharDev:
		return syscall.S_IFCHR
	default:
		panic(_type)
	}
}

func typeToString(_type uint8) string {
	switch _type {
	case TypeFile:
		return "regular"
	case TypeDirectory:
		return "directory"
	case TypeSymlink:
		return "symlink"
	case TypeFIFO:
		return "fifo"
	case TypeBlockDev:
		return "blockdev"
	case TypeCharDev:
		return "chardev"
	case TypeSocket:
		return "socket"
	default:
		return "unknown"
	}
}

func typeFromString(s string) uint8 {
	switch s {
	case "regular":
		return TypeFile
	case "directory":
		return TypeDirectory
	case "symlink":
		return TypeSymlink
	case "fifo":
		return TypeFIFO
	case "blockdev":
		return TypeBlockDev
	case "chardev":
		return TypeCharDev
	case "socket":
		return TypeSocket
	default:
		panic(s)
	}
}

// SMode is the file mode including type and unix permission.
func (a *Attr) SMode() uint32 {
	return typeToStatType(a.Typ) | uint32(a.Mode)
}

// Entry is an entry inside a directory.
type Entry struct {
	Inode Ino
	Name  []byte
	Attr  *Attr
}

// Slice is a slice of a chunk.
// Multiple slices could be combined together as a chunk.
type Slice struct {
	Id   uint64
	Size uint32
	Off  uint32
	Len  uint32
}

// Summary represents the total number of files/directories and
// total length of all files inside a directory.
type Summary struct {
	Length uint64
	Size   uint64
	Files  uint64
	Dirs   uint64
}

type TreeSummary struct {
	Inode    Ino
	Path     string
	Type     uint8
	Size     uint64
	Files    uint64
	Dirs     uint64
	Children []*TreeSummary `json:",omitempty"`
}

type SessionInfo struct {
	Version    string
	HostName   string
	IPAddrs    []string `json:",omitempty"`
	MountPoint string
	MountTime  time.Time
	ProcessID  int
}

type Flock struct {
	Inode Ino
	Owner uint64
	Ltype string
}

type Plock struct {
	Inode   Ino
	Owner   uint64
	Records []plockRecord
}

// Session contains detailed information of a client session
type Session struct {
	Sid    uint64
	Expire time.Time
	SessionInfo
	Sustained []Ino   `json:",omitempty"`
	Flocks    []Flock `json:",omitempty"`
	Plocks    []Plock `json:",omitempty"`
}

// Meta is a interface for a meta service for file system.
type Meta interface {
	// Name of database
	Name() string
	// Init is used to initialize a meta service.
	Init(format *Format, force bool) error
	// Shutdown close current database connections.
	Shutdown() error
	// Reset cleans up all metadata, VERY DANGEROUS!
	Reset() error
	// Load loads the existing setting of a formatted volume from meta service.
	Load(checkVersion bool) (*Format, error)
	// NewSession creates or update client session.
	NewSession(record bool) error
	// CloseSession does cleanup and close the session.
	CloseSession() error
	// FlushSession flushes the status to meta service.
	FlushSession()
	// GetSession retrieves information of session with sid
	GetSession(sid uint64, detail bool) (*Session, error)
	// ListSessions returns all client sessions.
	ListSessions() ([]*Session, error)
	// ScanDeletedObject scan deleted objects by customized scanner.
	ScanDeletedObject(Context, trashSliceScan, pendingSliceScan, trashFileScan, pendingFileScan) error
	// ListLocks returns all locks of a inode.
	ListLocks(ctx context.Context, inode Ino) ([]PLockItem, []FLockItem, error)
	// CleanStaleSessions cleans up sessions not active for more than 5 minutes
	CleanStaleSessions(ctx Context)
	// CleanupTrashBefore deletes all files in trash before the given time.
	CleanupTrashBefore(ctx Context, edge time.Time, increProgress func(int), stats *CleanupTrashStats) syscall.Errno
	// CleanupDetachedNodesBefore deletes all detached nodes before the given time.
	CleanupDetachedNodesBefore(ctx Context, edge time.Time, increProgress func())

	// StatFS returns summary statistics of a volume.
	StatFS(ctx Context, ino Ino, totalspace, availspace, iused, iavail *uint64) syscall.Errno
	// Access checks the access permission on given inode.
	Access(ctx Context, inode Ino, modemask uint8, attr *Attr) syscall.Errno
	// Lookup returns the inode and attributes for the given entry in a directory.
	Lookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr, checkPerm bool) syscall.Errno
	// Resolve fetches the inode and attributes for an entry identified by the given path.
	// ENOTSUP will be returned if there's no natural implementation for this operation or
	// if there are any symlink following involved.
	Resolve(ctx Context, parent Ino, path string, inode *Ino, attr *Attr) syscall.Errno
	// GetAttr returns the attributes for given node.
	GetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno
	// SetAttr updates the attributes for given node.
	SetAttr(ctx Context, inode Ino, set uint16, sggidclearmode uint8, attr *Attr) syscall.Errno
	// Check setting attr is allowed or not
	CheckSetAttr(ctx Context, inode Ino, set uint16, attr Attr) syscall.Errno
	// Truncate changes the length for given file.
	Truncate(ctx Context, inode Ino, flags uint8, attrlength uint64, attr *Attr, skipPermCheck bool) syscall.Errno
	// Fallocate preallocate given space for given file.
	Fallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64, length *uint64) syscall.Errno
	// ReadLink returns the target of a symlink.
	ReadLink(ctx Context, inode Ino, path *[]byte) syscall.Errno
	// Symlink creates a symlink in a directory with given name.
	Symlink(ctx Context, parent Ino, name string, path string, inode *Ino, attr *Attr) syscall.Errno
	// Mknod creates a node in a directory with given name, type and permissions.
	Mknod(ctx Context, parent Ino, name string, _type uint8, mode uint16, cumask uint16, rdev uint32, path string, inode *Ino, attr *Attr) syscall.Errno
	// Mkdir creates a sub-directory with given name and mode.
	Mkdir(ctx Context, parent Ino, name string, mode uint16, cumask uint16, copysgid uint8, inode *Ino, attr *Attr) syscall.Errno
	// Unlink removes a file entry from a directory.
	// The file will be deleted if it's not linked by any entries and not open by any sessions.
	Unlink(ctx Context, parent Ino, name string, skipCheckTrash ...bool) syscall.Errno
	// BatchUnlink remove some file entries from the same directory
	BatchUnlink(ctx Context, parent Ino, entries []*Entry, count *uint64, skipCheckTrash bool) syscall.Errno
	// Rmdir removes an empty sub-directory.
	Rmdir(ctx Context, parent Ino, name string, skipCheckTrash ...bool) syscall.Errno
	// Rename move an entry from a source directory to another with given name.
	// The targeted entry will be overwrited if it's a file or empty directory.
	// For Hadoop, the target should not be overwritten.
	Rename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode *Ino, attr *Attr) syscall.Errno
	// Link creates an entry for node.
	Link(ctx Context, inodeSrc, parent Ino, name string, attr *Attr) syscall.Errno
	// Readdir returns all entries for given directory, which include attributes if plus is true.
	Readdir(ctx Context, inode Ino, wantattr uint8, entries *[]*Entry) syscall.Errno
	// NewDirHandler returns a stream for directory entries.
	NewDirHandler(ctx Context, inode Ino, plus bool, initEntries []*Entry) (DirHandler, syscall.Errno)
	// Create creates a file in a directory with given name.
	Create(ctx Context, parent Ino, name string, mode uint16, cumask uint16, flags uint32, inode *Ino, attr *Attr) syscall.Errno
	// Open checks permission on a node and track it as open.
	Open(ctx Context, inode Ino, flags uint32, attr *Attr) syscall.Errno
	// Close a file.
	Close(ctx Context, inode Ino) syscall.Errno
	// Read returns the list of slices on the given chunk.
	Read(ctx Context, inode Ino, indx uint32, slices *[]Slice) syscall.Errno
	// NewSlice returns an id for new slice.
	NewSlice(ctx Context, id *uint64) syscall.Errno
	// Write put a slice of data on top of the given chunk.
	Write(ctx Context, inode Ino, indx uint32, off uint32, slice Slice, mtime time.Time) syscall.Errno
	// InvalidateChunkCache invalidate chunk cache
	InvalidateChunkCache(ctx Context, inode Ino, indx uint32) syscall.Errno
	// CopyFileRange copies part of a file to another one.
	CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied, outLength *uint64) syscall.Errno
	// GetParents returns a map of node parents (> 1 parents if hardlinked)
	GetParents(ctx Context, inode Ino) map[Ino]int
	// GetDirStat returns the space and inodes usage of a directory.
	GetDirStat(ctx Context, inode Ino) (stat *dirStat, st syscall.Errno)

	// GetXattr returns the value of extended attribute for given name.
	GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno
	// ListXattr returns all extended attributes of a node.
	ListXattr(ctx Context, inode Ino, dbuff *[]byte) syscall.Errno
	// SetXattr update the extended attribute of a node.
	SetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno
	// RemoveXattr removes the extended attribute of a node.
	RemoveXattr(ctx Context, inode Ino, name string) syscall.Errno
	// Flock tries to put a lock on given file.
	Flock(ctx Context, inode Ino, owner uint64, ltype uint32, block bool) syscall.Errno
	// Getlk returns the current lock owner for a range on a file.
	Getlk(ctx Context, inode Ino, owner uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno
	// Setlk sets a file range lock on given file.
	Setlk(ctx Context, inode Ino, owner uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno

	// Compact all the chunks by merge small slices together
	CompactAll(ctx Context, threads int, bar *utils.Bar) syscall.Errno
	// Compact chunks for specified path
	Compact(ctx Context, inode Ino, concurrency int, preFunc, postFunc func()) syscall.Errno

	// ListSlices returns all slices used by all files.
	ListSlices(ctx Context, slices map[Ino][]Slice, scanPending, delete bool, showProgress func()) syscall.Errno
	// Remove all files and directories recursively.
	// count represents the number of attempted deletions of entries (even if failed).
	Remove(ctx Context, parent Ino, name string, skipTrash bool, numThreads int, count *uint64) syscall.Errno
	// Get summary of a node; for a directory it will accumulate all its child nodes
	GetSummary(ctx Context, inode Ino, summary *Summary, recursive bool, strict bool) syscall.Errno
	// GetTreeSummary returns a summary in tree structure
	GetTreeSummary(ctx Context, root *TreeSummary, depth, topN uint8, strict bool, updateProgress func(count uint64, bytes uint64)) syscall.Errno
	// Clone a file or directory
	Clone(ctx Context, srcParentIno, srcIno, dstParentIno Ino, dstName string, cmode uint8, cumask uint16, concurrency uint8, count, total *uint64) syscall.Errno
	// GetPaths returns all paths of an inode
	GetPaths(ctx Context, inode Ino) []string
	// Check integrity of an absolute path and repair it if asked
	Check(ctx Context, fpath string, opt *CheckOpt) error
	// Change root to a directory specified by subdir
	Chroot(ctx Context, subdir string) syscall.Errno
	// chroot set the root directory by inode
	chroot(inode Ino)
	// Get a copy of the current format
	GetFormat() Format

	// OnMsg add a callback for the given message type.
	OnMsg(mtype uint32, cb MsgCallback)
	// OnReload register a callback for any change founded after reloaded.
	OnReload(func(new *Format))

	HandleQuota(ctx Context, cmd uint8, dpath string, uid uint32, gid uint32, quotas map[string]*Quota, strict, repair bool, create bool) error
	//Triggers a global user group quota scan
	ScanUserGroupUsage(ctx Context) error

	// Dump the tree under root, which may be modified by checkRoot
	DumpMeta(w io.Writer, root Ino, threads int, keepSecret, fast, skipTrash bool) error
	LoadMeta(r io.Reader) error

	DumpMetaV2(ctx Context, w io.Writer, opt *DumpOption) error
	LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error

	// getBase return the base engine.
	getBase() *baseMeta
	InitMetrics(registerer prometheus.Registerer)
	InitSharedMetrics(registerer prometheus.Registerer)

	SetFacl(ctx Context, ino Ino, aclType uint8, n *aclAPI.Rule) syscall.Errno
	GetFacl(ctx Context, ino Ino, aclType uint8, n *aclAPI.Rule) syscall.Errno

	// kerberos
	StoreToken(ctx Context, token []byte) (id uint32, st syscall.Errno)
	UpdateToken(ctx Context, id uint32, token []byte) syscall.Errno
	LoadToken(ctx Context, id uint32) (token []byte, st syscall.Errno)
	DeleteTokens(ctx Context, ids []uint32) syscall.Errno
	ListTokens(ctx Context) (tokens map[uint32][]byte, st syscall.Errno)
}

type CheckOpt struct {
	Repair        bool
	Recursive     bool
	SyncDirStat   bool
	RepairDirMode uint16
	ShowProgress  func(n int)
	Slices        map[Ino][]Slice
}

type CleanupTrashStats struct {
	DeletedFiles int64
}

type Creator func(driver, addr string, conf *Config) (Meta, error)

var metaDrivers = make(map[string]Creator)

func Register(name string, register Creator) {
	metaDrivers[name] = register
}

func injectPasswordIntoURI(uri, password string) (string, error) {
	atIndex := strings.LastIndex(uri, "@")
	if atIndex == -1 {
		return "", fmt.Errorf("invalid uri: %s", uri)
	}
	dIndex := strings.Index(uri, "://") + 3
	s := strings.Split(uri[dIndex:atIndex], ":")

	if len(s) > 2 {
		return "", fmt.Errorf("invalid uri: %s", uri)
	}

	if len(s) == 2 && s[1] != "" {
		return uri, nil
	}
	pwd := url.UserPassword("", password) // escape only password
	return uri[:dIndex] + s[0] + pwd.String() + uri[atIndex:], nil
}

func readPasswordFromFile(filePath string) (string, error) {
	content, err := os.ReadFile(filePath)
	if err != nil {
		return "", fmt.Errorf("failed to read password file %s: %w", filePath, err)
	}
	return strings.TrimSpace(string(content)), nil
}

func setPasswordFromEnv(uri string) (string, error) {
	var password string
	var err error

	if metaPassword := os.Getenv("META_PASSWORD"); metaPassword != "" {
		password = metaPassword
	} else if passwordFile := os.Getenv("META_PASSWORD_FILE"); passwordFile != "" {
		password, err = readPasswordFromFile(passwordFile)
		if err != nil {
			return "", err
		}
	} else {
		// No password source available, return original URI
		return uri, nil
	}

	return injectPasswordIntoURI(uri, password)
}

// NewClient creates a Meta client for given uri.
func NewClient(uri string, conf *Config) Meta {
	var err error
	if !strings.Contains(uri, "://") {
		uri = "redis://" + uri
	}
	p := strings.Index(uri, "://")
	if p < 0 {
		logger.Fatalf("invalid uri: %s", uri)
	}
	driver := uri[:p]
	if driver == "mysql" || driver == "postgres" {
		if uri, err = setPasswordFromEnv(uri); err != nil {
			logger.Fatalf(err.Error())
		}
	}
	logger.Infof("Meta address: %s", utils.RemovePassword(uri))
	f, ok := metaDrivers[driver]
	if !ok {
		logger.Fatalf("Invalid meta driver: %s", driver)
	}
	if conf == nil {
		conf = DefaultConf()
	} else {
		conf.SelfCheck()
	}
	m, err := f(driver, uri[p+3:], conf)
	if err != nil {
		logger.Fatalf("Meta %s is not available: %s", utils.RemovePassword(uri), err)
	}
	return m
}


================================================
FILE: pkg/meta/interface_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"os"
	"path/filepath"
	"testing"
)

func Test_injectPasswordIntoURI(t *testing.T) {
	const dbPasswd = "dbPasswd"
	tests := []struct {
		uri     string
		want    string
		wantErr bool
	}{
		//mysql
		{
			uri:     "mysql://root:password@(127.0.0.1:3306)/juicefs",
			want:    "mysql://root:password@(127.0.0.1:3306)/juicefs",
			wantErr: false,
		},
		{
			uri:     "mysql://root:@(127.0.0.1:3306)/juicefs",
			want:    "mysql://root:dbPasswd@(127.0.0.1:3306)/juicefs",
			wantErr: false,
		},
		{
			uri:     "mysql://root@(127.0.0.1:3306)/juicefs",
			want:    "mysql://root:dbPasswd@(127.0.0.1:3306)/juicefs",
			wantErr: false,
		},
		{
			uri:     "mysql://root@@(127.0.0.1:3306)/juicefs",
			want:    "mysql://root@:dbPasswd@(127.0.0.1:3306)/juicefs",
			wantErr: false,
		},
		// no user is ok
		{
			uri:     "mysql://:@(127.0.0.1:3306)/juicefs",
			want:    "mysql://:dbPasswd@(127.0.0.1:3306)/juicefs",
			wantErr: false,
		},
		{
			uri:     "mysql://:pwd@(127.0.0.1:3306)/juicefs",
			want:    "mysql://:pwd@(127.0.0.1:3306)/juicefs",
			wantErr: false,
		},
		//postgres
		{
			uri:     "postgres://root:password@192.168.1.6:5432/juicefs",
			want:    "postgres://root:password@192.168.1.6:5432/juicefs",
			wantErr: false,
		},
		{
			uri:     "postgres://root:@192.168.1.6:5432/juicefs",
			want:    "postgres://root:dbPasswd@192.168.1.6:5432/juicefs",
			wantErr: false,
		},
		{
			uri:     "postgres://root@192.168.1.6:5432/juicefs",
			want:    "postgres://root:dbPasswd@192.168.1.6:5432/juicefs",
			wantErr: false,
		},
		{
			uri:     "postgres://root@/pgtest?host=/tmp/pgsocket/&port=5433",
			want:    "postgres://root:dbPasswd@/pgtest?host=/tmp/pgsocket/&port=5433",
			wantErr: false,
		},
		{
			uri:     "postgres://@/pgtest?host=/tmp/pgsocket/&port=5433&user=pguser",
			want:    "postgres://:dbPasswd@/pgtest?host=/tmp/pgsocket/&port=5433&user=pguser",
			wantErr: false,
		},
		// Error conditions
		{
			uri:     "mysql://root(127.0.0.1:3306)/juicefs", // missing @
			want:    "",
			wantErr: true,
		},
		{
			uri:     "mysql://a:b:c:@(127.0.0.1:3306)/juicefs",
			want:    "",
			wantErr: true,
		},
	}
	for _, tt := range tests {
		t.Run("", func(t *testing.T) {
			got, err := injectPasswordIntoURI(tt.uri, dbPasswd)

			if tt.wantErr {
				if err == nil {
					t.Errorf("injectPasswordIntoURI() expected error but got none")
					return
				}
			} else {
				if err != nil {
					t.Errorf("injectPasswordIntoURI() unexpected error = %v", err)
					return
				}
				if got != tt.want {
					t.Errorf("injectPasswordIntoURI() = %q, want %q", got, tt.want)
				}
			}
		})
	}
}

func Test_setPasswordFromEnv(t *testing.T) {
	tempDir := t.TempDir()
	passwordFile := filepath.Join(tempDir, "password.txt")
	err := os.WriteFile(passwordFile, []byte("filePassword"), 0600)
	if err != nil {
		t.Fatalf("Failed to create test password file: %v", err)
	}

	tests := []struct {
		name             string
		metaPassword     string
		metaPasswordFile string
		uri              string
		want             string
		wantErr          bool
	}{
		{
			name:         "META_PASSWORD only",
			metaPassword: "envPassword",
			uri:          "mysql://root@localhost/db",
			want:         "mysql://root:envPassword@localhost/db",
			wantErr:      false,
		},
		{
			name:             "META_PASSWORD_FILE only",
			metaPasswordFile: passwordFile,
			uri:              "mysql://root@localhost/db",
			want:             "mysql://root:filePassword@localhost/db",
			wantErr:          false,
		},
		{
			name:             "META_PASSWORD takes precedence over META_PASSWORD_FILE",
			metaPassword:     "envPassword",
			metaPasswordFile: passwordFile,
			uri:              "mysql://root@localhost/db",
			want:             "mysql://root:envPassword@localhost/db",
			wantErr:          false,
		},
		{
			name:    "neither environment variable set",
			uri:     "mysql://root@localhost/db",
			want:    "mysql://root@localhost/db",
			wantErr: false,
		},
		{
			name:             "META_PASSWORD_FILE points to non-existent file",
			metaPasswordFile: "/non/existent/file",
			uri:              "mysql://root@localhost/db",
			want:             "",
			wantErr:          true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Clean environment
			defer os.Unsetenv("META_PASSWORD")
			defer os.Unsetenv("META_PASSWORD_FILE")
			// Just to be safe
			os.Unsetenv("META_PASSWORD")
			os.Unsetenv("META_PASSWORD_FILE")

			// Set environment variables as needed
			if tt.metaPassword != "" {
				os.Setenv("META_PASSWORD", tt.metaPassword)
			}
			if tt.metaPasswordFile != "" {
				os.Setenv("META_PASSWORD_FILE", tt.metaPasswordFile)
			}

			got, err := setPasswordFromEnv(tt.uri)

			if tt.wantErr {
				if err == nil {
					t.Errorf("setPasswordFromEnv() expected error but got none")
					return
				}
			} else {
				if err != nil {
					t.Errorf("setPasswordFromEnv() unexpected error = %v", err)
					return
				}
				if got != tt.want {
					t.Errorf("setPasswordFromEnv() = %q, want %q", got, tt.want)
				}
			}
		})
	}
}

func Test_readPasswordFromFile(t *testing.T) {
	// Create temporary directory for test files
	tempDir := t.TempDir()

	tests := []struct {
		name       string
		content    string
		filename   string
		createFile bool
		want       string
		wantErr    bool
	}{
		{
			name:       "valid password file",
			content:    "mypassword",
			filename:   "password.txt",
			createFile: true,
			want:       "mypassword",
			wantErr:    false,
		},
		{
			name:       "password with leading and trailing whitespace",
			content:    "\n  mypassword  \n\t",
			filename:   "password_with_spaces.txt",
			createFile: true,
			want:       "mypassword",
			wantErr:    false,
		},
		{
			name:       "empty file",
			content:    "",
			filename:   "empty.txt",
			createFile: true,
			want:       "",
			wantErr:    false,
		},
		{
			name:       "complex password with special characters",
			content:    "pa$$w0rd!@#",
			filename:   "complex.txt",
			createFile: true,
			want:       "pa$$w0rd!@#",
			wantErr:    false,
		},
		{
			name:       "file does not exist",
			content:    "",
			filename:   "nonexistent.txt",
			createFile: false,
			want:       "",
			wantErr:    true,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			var filePath string
			if tt.createFile {
				filePath = filepath.Join(tempDir, tt.filename)
				err := os.WriteFile(filePath, []byte(tt.content), 0600)
				if err != nil {
					t.Fatalf("Failed to create test file %s: %v", filePath, err)
				}
			} else {
				filePath = filepath.Join(tempDir, tt.filename)
			}

			got, err := readPasswordFromFile(filePath)

			if tt.wantErr {
				if err == nil {
					t.Errorf("readPasswordFromFile() expected error but got none")
					return
				}
			} else {
				if err != nil {
					t.Errorf("readPasswordFromFile() unexpected error = %v", err)
					return
				}
				if got != tt.want {
					t.Errorf("readPasswordFromFile() = %q, want %q", got, tt.want)
				}
			}
		})
	}
}


================================================
FILE: pkg/meta/load_dump_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"fmt"
	"io"
	"os"
	"os/exec"
	"path"
	"strings"
	"testing"
	"time"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/sirupsen/logrus"
	"golang.org/x/text/encoding/simplifiedchinese"
	"golang.org/x/text/transform"
)

const sampleFile = "metadata.sample"
const subSampleFile = "metadata-sub.sample"

func TestEscape(t *testing.T) {
	cases := []struct {
		value            []rune
		gbkStart, gbkEnd int
	}{
		{value: []rune("%1F果汁数据科技有限公司%2B"), gbkStart: 0, gbkEnd: 0},
		{value: []rune("果汁数据科技有限公司%1F"), gbkStart: 0, gbkEnd: 1},
		{value: []rune("果汁数据科技有限公司"), gbkStart: 1, gbkEnd: 2},
		{value: []rune("果汁数据科技有限公司"), gbkStart: 1, gbkEnd: 4},
		{value: []rune("果汁数据科技有限公司"), gbkStart: 5, gbkEnd: 10},
		{value: []rune("果汁数据科技有限公司"), gbkStart: 0, gbkEnd: 10},
		{value: []rune("GBK果汁数据科技有限公司文件"), gbkStart: 0, gbkEnd: 15},
		{value: []rune("%果汁数据科%技有限公司%"), gbkStart: 1, gbkEnd: 4},
		{value: []rune("\"果汁数据科\"技有限公司%"), gbkStart: 1, gbkEnd: 4},
		{value: []rune("\\果汁数\\据科技有限公司"), gbkStart: 1, gbkEnd: 4},
	}
	for _, c := range cases {
		var v []byte
		prefix := c.value[:c.gbkStart]
		middle := c.value[c.gbkStart:c.gbkEnd]
		suffix := c.value[c.gbkEnd:]
		gbk, err := Utf8ToGbk([]byte(string(middle)))
		if err != nil {
			t.Fatalf("Utf8ToGbk error: %v", err)
		}
		v = append(v, []byte(string(prefix))...)
		v = append(v, gbk...)
		v = append(v, []byte(string(suffix))...)
		s := escape(string(v))
		t.Log("escape value: ", s)
		r := unescape(s)
		if !bytes.Equal(r, v) {
			t.Fatalf("expected %v, but got %v", v, r)
		}
	}
}

func Utf8ToGbk(s []byte) ([]byte, error) {
	reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewEncoder())
	d, e := io.ReadAll(reader)
	if e != nil {
		return nil, e
	}
	return d, nil
}

func GbkToUtf8(s []byte) ([]byte, error) {
	reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
	d, e := io.ReadAll(reader)
	if e != nil {
		return nil, e
	}
	return d, nil
}

func checkMeta(t *testing.T, m Meta) {
	if _, err := m.Load(true); err != nil {
		t.Fatalf("load setting: %s", err)
	}

	counters := map[string]int64{
		"usedSpace":   115392512,
		"totalInodes": 14,
		"nextInode":   35,
		"nextChunk":   9,
		"nextSession": 0,
		"nextTrash":   1,
	}
	for name, expect := range counters {
		val, err := m.getBase().en.getCounter(name)
		if err != nil {
			t.Fatalf("get counter %s: %s", name, err)
		}
		if m.Name() == "redis" && (name == "nextChunk" || name == "nextInode") {
			expect--
		}
		if val != expect {
			t.Fatalf("counter %s: %d != %d", name, val, expect)
		}
	}

	ctx := Background()
	var entries []*Entry
	if st := m.Readdir(ctx, 1, 1, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	} else if len(entries) != 11 {
		t.Fatalf("entries: %d", len(entries))
	}

	var expectedStat dirStat
	for _, entry := range entries {
		fname := string(entry.Name)
		if strings.HasPrefix(fname, "GBK") {
			if utf8, err := GbkToUtf8(entry.Name); err != nil || string(utf8) != "GBK果汁数据科技有限公司文件" {
				t.Fatalf("load GBK file error: %s", string(utf8))
			}
		}
		if strings.HasPrefix(fname, "UTF8") && fname != "UTF8果汁数据科技有限公司目录" && fname != "UTF8果汁数据科技有限公司文件" {
			t.Fatalf("load entries error: %s", fname)
		}
		if string(entry.Name) != "." && string(entry.Name) != ".." {
			var length uint64
			if entry.Attr.Typ == TypeFile {
				length = entry.Attr.Length
			}
			expectedStat.inodes++
			expectedStat.length += int64(length)
			expectedStat.space += align4K(length)
		}
	}

	stat, st := m.(engine).doGetDirStat(ctx, 1, false)
	if st != 0 {
		t.Fatalf("get dir stat: %s", st)
	}
	if stat == nil {
		t.Fatalf("get dir stat: nil")
	}
	if *stat != expectedStat {
		t.Fatalf("expected: %v, but got: %v", expectedStat, *stat)
	}

	var summary Summary
	if st = m.GetSummary(ctx, 1, &summary, true, true); st != 0 {
		t.Fatalf("get summary: %s", st)
	}
	expectedQuota := Quota{
		MaxInodes:  100,
		MaxSpace:   1 << 30,
		UsedSpace:  int64(summary.Size) - align4K(0),
		UsedInodes: int64(summary.Dirs+summary.Files) - 1,
	}

	quota, err := m.(engine).doGetQuota(ctx, DirQuotaType, 1)
	if err != nil {
		t.Fatalf("get quota: %s", err)
	}
	if quota == nil {
		t.Fatalf("get quota: nil")
	}
	if *quota != expectedQuota {
		t.Fatalf("expected: %v, but got: %v", expectedQuota, *quota)
	}

	attr := &Attr{}
	if st := m.GetAttr(ctx, 2, attr); st != 0 {
		t.Fatalf("getattr: %s", st)
	}
	if attr.Nlink != 1 || attr.Length != 24 {
		t.Fatalf("nlink: %d, length: %d", attr.Nlink, attr.Length)
	}

	if attr.Flags != 128 {
		t.Fatalf("expect the flags euqal 128, but actual is: %d", attr.Flags)
	}

	if attr.AccessACL == 0 || attr.DefaultACL == 0 {
		t.Fatalf("expect ACL not 0, but actual is: %d, %d", attr.AccessACL, attr.DefaultACL)
	}

	ar := &aclAPI.Rule{}
	if st := m.GetFacl(ctx, 2, aclAPI.TypeAccess, ar); st != 0 {
		t.Fatalf("get access acl: %s", st)
	}
	ar2 := &aclAPI.Rule{
		Owner: 6,
		Group: 4,
		Mask:  4,
		Other: 4,
		NamedUsers: []aclAPI.Entry{
			{Id: 1, Perm: 6},
			{Id: 2, Perm: 7},
		},
		NamedGroups: nil,
	}
	if !bytes.Equal(ar.Encode(), ar2.Encode()) {
		t.Fatalf("access acl: %v != %v", ar, ar2)
	}

	dr := &aclAPI.Rule{}
	if st := m.GetFacl(ctx, 2, aclAPI.TypeDefault, dr); st != 0 {
		t.Fatalf("get default acl: %s", st)
	}
	dr2 := &aclAPI.Rule{
		Owner:      7,
		Group:      5,
		Mask:       5,
		Other:      5,
		NamedUsers: nil,
		NamedGroups: []aclAPI.Entry{
			{Id: 3, Perm: 6},
			{Id: 4, Perm: 7},
		},
	}
	if !bytes.Equal(dr.Encode(), dr2.Encode()) {
		t.Fatalf("default acl: %v != %v", dr, dr2)
	}

	var slices []Slice
	if st := m.Read(ctx, 2, 0, &slices); st != 0 {
		t.Fatalf("read chunk: %s", st)
	}
	if len(slices) != 1 || slices[0].Id != 4 || slices[0].Size != 24 {
		t.Fatalf("slices: %v", slices)
	}
	if st := m.GetAttr(ctx, 4, attr); st != 0 || attr.Nlink != 2 { // hard link
		t.Fatalf("getattr: %s, %d", st, attr.Nlink)
	}
	if ps := m.GetParents(ctx, 4); len(ps) != 2 || ps[1] != 1 || ps[3] != 1 {
		t.Fatalf("getparents: %+v != {1:1, 3:1}", ps)
	}
	var target []byte

	if st := m.ReadLink(ctx, 5, &target); st == 0 { // symlink
		if utf8, err := GbkToUtf8(target); err != nil || string(utf8) != "GBK果汁数据科技有限公司文件" {
			t.Fatalf("readlink: %s, %s", st, target)
		}
	} else {
		t.Fatalf("readlink: %s, %s", st, target)
	}

	var value []byte
	if st := m.GetXattr(ctx, 2, "k", &value); st != 0 || string(value) != "v" {
		t.Fatalf("getxattr: %s %v", st, value)
	}
	if st := m.GetXattr(ctx, 3, "dk", &value); st != 0 || string(value) != "果汁%25" {
		t.Fatalf("getxattr: %s %v", st, value)
	}
}

func testLoadSub(t *testing.T, uri, fname string) {
	m := NewClient(uri, nil)
	if err := m.Reset(); err != nil {
		t.Fatalf("reset meta: %s", err)
	}
	fp, err := os.Open(fname)
	if err != nil {
		t.Fatalf("open file: %s", fname)
	}
	defer fp.Close()
	if err = m.LoadMeta(fp); err != nil {
		t.Fatalf("load meta: %s", err)
	}

	var entries []*Entry
	if st := m.Readdir(Background(), 1, 0, &entries); st != 0 {
		t.Fatalf("readdir: %s", st)
	} else if len(entries) != 4 {
		t.Fatalf("entries: %d", len(entries))
	}
	for _, entry := range entries {
		fname := string(entry.Name)
		if fname != "." && fname != ".." && fname != "big" && fname != "f11" {
			t.Fatalf("invalid entry name: %s", fname)
		}
	}
}

func testDump(t *testing.T, m Meta, root Ino, expect, result string) {
	fp, err := os.OpenFile(result, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
	if err != nil {
		t.Fatalf("open file %s: %s", result, err)
	}
	defer fp.Close()
	if _, err = m.Load(true); err != nil {
		t.Fatalf("load setting: %s", err)
	}
	if err = m.DumpMeta(fp, root, 1, false, true, false); err != nil {
		t.Fatalf("dump meta: %s", err)
	}
	cmd := exec.Command("diff", expect, result)
	if out, err := cmd.Output(); err != nil {
		t.Fatalf("diff %s %s: %s", expect, result, out)
	}
	fp.Seek(0, 0)
	if err = m.DumpMeta(fp, root, 10, false, false, false); err != nil {
		t.Fatalf("dump meta: %s", err)
	}
	cmd = exec.Command("diff", expect, result)
	if out, err := cmd.Output(); err != nil {
		t.Fatalf("diff %s %s: %s", expect, result, out)
	}
}

func testLoadDump(t *testing.T, name, addr string) {
	t.Run("Metadata Engine: "+name, func(t *testing.T) {
		m := testLoad(t, addr, sampleFile, false)
		testDump(t, m, 1, sampleFile, "test.dump")
		m.Shutdown()
		conf := DefaultConf()
		conf.Subdir = "d1"
		m = NewClient(addr, conf)
		_ = m.Chroot(Background(), "d1")
		testDump(t, m, 1, subSampleFile, "test_subdir.dump")
		testDump(t, m, 0, sampleFile, "test.dump")
		_ = m.Shutdown()
		testLoadSub(t, addr, subSampleFile)
	})
}

func TestLoadDump(t *testing.T) { //skip mutate
	testLoadDump(t, "redis", "redis://127.0.0.1/10")
	// testLoadDump(t, "mysql", "mysql://root:@/dev")
	testLoadDump(t, "badger", "badger://jfs-load-dump")
	testLoadDump(t, "tikv", "tikv://127.0.0.1:2379/jfs-load-dump")
}

func testDumpV2(t *testing.T, m Meta, result string, opt *DumpOption) {
	if opt == nil {
		opt = &DumpOption{Threads: 10, KeepSecret: true}
	}
	fp, err := os.OpenFile(result, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
	if err != nil {
		t.Fatalf("open file %s: %s", result, err)
	}
	defer fp.Close()
	if _, err = m.Load(true); err != nil {
		t.Fatalf("load setting: %s", err)
	}
	if err = m.DumpMetaV2(Background(), fp, opt); err != nil {
		t.Fatalf("dump meta: %s", err)
	}
	fp.Sync()
}

func testLoad(t *testing.T, uri, fname string, v2 bool) Meta {
	m := NewClient(uri, nil)
	if err := m.Reset(); err != nil {
		t.Fatalf("reset meta: %s", err)
	}
	fp, err := os.Open(fname)
	if err != nil {
		t.Fatalf("open file: %s", fname)
	}
	defer fp.Close()
	if v2 {
		if err = m.LoadMetaV2(Background(), fp, &LoadOption{Threads: 10}); err != nil {
			t.Fatalf("load meta: %s", err)
		}
	} else {
		if err = m.LoadMeta(fp); err != nil {
			t.Fatalf("load meta: %s", err)
		}
	}
	checkMeta(t, m)
	return m
}

func testLoadDumpV2(t *testing.T, name, addr1, addr2 string) {
	t.Run("Metadata Engine: "+name, func(t *testing.T) {
		start := time.Now()
		m := testLoad(t, addr1, sampleFile, false)
		t.Logf("load meta: %v", time.Since(start))
		start = time.Now()
		testDumpV2(t, m, fmt.Sprintf("%s.dump", name), nil)
		m.Shutdown()
		t.Logf("dump meta v2: %v", time.Since(start))
		start = time.Now()
		m = testLoad(t, addr2, fmt.Sprintf("%s.dump", name), true)
		m.Shutdown()
		t.Logf("load meta v2: %v", time.Since(start))
	})
}

func testLoadOtherEngine(t *testing.T, src, dst, dstAddr string) {
	t.Run(fmt.Sprintf("Load %s to %s", src, dst), func(t *testing.T) {
		m := testLoad(t, dstAddr, fmt.Sprintf("%s.dump", src), true)
		m.Shutdown()
	})
}

func TestLoadDumpV2(t *testing.T) {
	logger.SetLevel(logrus.DebugLevel)

	engines := map[string][]string{
		"sqlite3": {"sqlite3://dev.db", "sqlite3://dev2.db"},
		// "mysql": {"mysql://root:@/dev", "mysql://root:@/dev2"},
		"redis":  {"redis://127.0.0.1:6379/2", "redis://127.0.0.1:6379/3"},
		"badger": {"badger://" + path.Join(t.TempDir(), "jfs-load-duimp-testdb-bk1"), "badger://" + path.Join(t.TempDir(), "jfs-load-duimp-testdb-bk2")},
		// "tikv":  {"tikv://127.0.0.1:2379/jfs-load-dump-1", "tikv://127.0.0.1:2379/jfs-load-dump-2"},
	}

	for name, addrs := range engines {
		testLoadDumpV2(t, name, addrs[0], addrs[1])
		testSecretAndTrash(t, addrs[0], addrs[1])
	}

	for src := range engines {
		for dst, dstAddr := range engines {
			if src == dst {
				continue
			}
			testLoadOtherEngine(t, src, dst, dstAddr[1])
		}
	}
}

func TestLoadDumpSlow(t *testing.T) { //skip mutate
	if os.Getenv("SKIP_NON_CORE") == "true" {
		t.Skipf("skip non-core test")
	}
	testLoadDump(t, "redis cluster", "redis://127.0.0.1:7001/10")
	testLoadDump(t, "sqlite", "sqlite3://"+path.Join(t.TempDir(), "jfs-load-dump-test.db"))
	testLoadDump(t, "badger", "badger://"+path.Join(t.TempDir(), "jfs-load-duimp-testdb"))
	testLoadDump(t, "etcd", fmt.Sprintf("etcd://%s/jfs-load-dump", os.Getenv("ETCD_ADDR")))
	testLoadDump(t, "postgres", "postgres://localhost:5432/test?sslmode=disable")
}

func TestLoadDump_MemKV(t *testing.T) {
	t.Run("Metadata Engine: memkv", func(t *testing.T) {
		_ = os.Remove(settingPath)
		m := testLoad(t, "memkv://test/jfs", sampleFile, false)
		testDump(t, m, 1, sampleFile, "test.dump")
	})
	t.Run("Metadata Engine: memkv; --SubDir d1 ", func(t *testing.T) {
		_ = os.Remove(settingPath)
		m := testLoad(t, "memkv://user:pass@test/jfs", sampleFile, false)
		if kvm, ok := m.(*kvMeta); ok { // memkv will be empty if created again
			if st := kvm.Chroot(Background(), "d1"); st != 0 {
				t.Fatalf("Chroot to subdir d1: %s", st)
			}
		}
		testDump(t, m, 1, subSampleFile, "test_subdir.dump")
		testDump(t, m, 0, sampleFile, "test.dump")
		_ = os.Remove(settingPath)
		testLoadSub(t, "memkv://user:pass@test/jfs", subSampleFile)
	})
}

func testSecretAndTrash(t *testing.T, addr, addr2 string) {
	m := testLoad(t, addr, sampleFile, false)
	testDumpV2(t, m, "sqlite-secret.dump", &DumpOption{Threads: 10, KeepSecret: true})
	m2 := testLoad(t, addr2, "sqlite-secret.dump", true)
	if m2.GetFormat().EncryptKey != m.GetFormat().EncryptKey {
		t.Fatalf("encrypt key not valid: %s", m2.GetFormat().EncryptKey)
	}
	testDumpV2(t, m, "sqlite-non-secret.dump", &DumpOption{Threads: 10, KeepSecret: false})
	m2.Shutdown()

	m2 = testLoad(t, addr2, "sqlite-non-secret.dump", true)
	if m2.GetFormat().EncryptKey != "removed" {
		t.Fatalf("encrypt key not valid: %s", m2.GetFormat().EncryptKey)
	}

	// trash
	trashs := map[Ino]uint64{
		27: 11,
		29: 10485760,
	}
	cnt := 0
	m2.getBase().scanTrashFiles(Background(), func(inode Ino, size uint64, ts time.Time) (clean bool, err error) {
		cnt++
		if tSize, ok := trashs[inode]; !ok || size != tSize {
			t.Fatalf("trash file: %d %d", inode, size)
		}
		return false, nil
	})
	if cnt != len(trashs) {
		t.Fatalf("trash count: %d != %d", cnt, len(trashs))
	}

	m.Shutdown()
	m2.Shutdown()
}

/*
func BenchmarkLoadDumpV2(b *testing.B) {
	logrus.SetLevel(logrus.DebugLevel)
	b.ReportAllocs()
	engines := map[string]string{
		"mysql": "mysql://root:@/dev",
		"redis": "redis://127.0.0.1:6379/2",
		"tikv": "tikv://127.0.0.1:2379/jfs-load-dump-1",
	}

	sample := "../../1M_files_in_one_dir.dump"
	for name, addr := range engines {
		m := NewClient(addr, nil)
		defer func() {
			m.Reset()
			m.Shutdown()
		}()
		b.Run("Load "+name, func(b *testing.B) {
			if err := m.Reset(); err != nil {
				b.Fatalf("reset meta: %s", err)
			}
			fp, err := os.Open(sample)
			if err != nil {
				b.Fatalf("open file: %s", sample)
			}
			defer fp.Close()

			b.ResetTimer()
			if err = m.LoadMeta(fp); err != nil {
				b.Fatalf("load meta: %s", err)
			}
		})

		b.Run("Dump "+name, func(b *testing.B) {
			path := fmt.Sprintf("%s.v1.dump", name)
			fp, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
			if err != nil {
				b.Fatalf("open file %s: %s", path, err)
			}
			defer fp.Close()
			if _, err = m.Load(true); err != nil {
				b.Fatalf("load setting: %s", err)
			}

			b.ResetTimer()
			if err = m.DumpMeta(fp, RootInode, 10, true, true, false); err != nil {
				b.Fatalf("dump meta: %s", err)
			}
			fp.Sync()
		})

		b.Run("DumpV2 "+name, func(b *testing.B) {
			path := fmt.Sprintf("%s.v2.dump", name)
			fp, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
			if err != nil {
				b.Fatalf("open file %s: %s", path, err)
			}
			defer fp.Close()

			b.ResetTimer()
			if err = m.DumpMetaV2(Background(), fp, &DumpOption{Threads: 10}); err != nil {
				b.Fatalf("dump meta: %s", err)
			}
			fp.Sync()

			b.StopTimer()
			bak := &bakFormat{}
			fp2, err := os.Open(path)
			if err != nil {
				b.Fatalf("open file: %s", path)
			}
			defer fp2.Close()
			footer, err := bak.readFooter(fp2)
			if err != nil {
				b.Fatalf("read footer: %s", err)
			}
			for name, info := range footer.msg.Infos {
				b.Logf("segment: %s, num: %d", name, info.Num)
			}
			b.StartTimer()
		})

		b.Run("LoadV2 "+name, func(b *testing.B) {
			path := fmt.Sprintf("%s.v2.dump", name)
			if err := m.Reset(); err != nil {
				b.Fatalf("reset meta: %s", err)
			}
			fp, err := os.Open(path)
			if err != nil {
				b.Fatalf("open file: %s", path)
			}
			defer fp.Close()

			b.ResetTimer()
			if err = m.LoadMetaV2(Background(), fp, &LoadOption{Threads: 10}); err != nil {
				b.Fatalf("load meta: %s", err)
			}
		})
	}
}
*/


================================================
FILE: pkg/meta/lua_scripts.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// nolint
package meta

const scriptLookup = `
local buf = redis.call('HGET', KEYS[1], KEYS[2])
if not buf then
    error("ENOENT")
end
local ino = struct.unpack(">I8", string.sub(buf, 2))
-- double float has 52 significant bits
if ino > 4503599627370495 then
    error("ENOTSUP")
end
return {ino, redis.call('GET', "i" .. string.format("%.f", ino))}
`

const scriptResolve = `
local function unpack_attr(buf)
    local x = {}
    x.flags, x.mode, x.uid, x.gid = struct.unpack(">BHI4I4", string.sub(buf, 0, 11))
    x.type = math.floor(x.mode / 4096) % 8
    x.mode = x.mode % 4096
    return x
end

local function get_attr(ino)
    local encoded_attr = redis.call('GET', "i" .. string.format("%.f", ino))
    if not encoded_attr then
        error("ENOENT")
    end
    return unpack_attr(encoded_attr)
end

local function lookup(parent, name)
    local buf = redis.call('HGET', "d" .. string.format("%.f", parent), name)
    if not buf then
        error("ENOENT")
    end
    return struct.unpack(">BI8", buf)
end

local function has_value(tab, val)
    for index, value in ipairs(tab) do
        if value == val then
            return true
        end
    end
    return false
end

local function can_access(ino, uid, gids)
    if uid == 0 then
        return true
    end

    local attr = get_attr(ino)
    local mode = 0
    if attr.uid == uid then
        mode = math.floor(attr.mode / 64) % 8
    elseif has_value(gids, tostring(attr.gid)) then
        mode = math.floor(attr.mode / 8) % 8
    else
        mode = attr.mode % 8
    end
    return mode % 2 == 1
end

local function resolve(parent, path, uid, gids)
    local _maxIno = 4503599627370495
    local _type = 2
    for name in string.gmatch(path, "[^/]+") do
        if _type == 3 or parent > _maxIno then
            error("ENOTSUP")
        elseif _type ~= 2 then
            error("ENOTDIR")
        elseif parent > 1 and not can_access(parent, uid, gids) then 
            error("EACCESS")
        end
        _type, parent = lookup(parent, name)
    end
    if parent > _maxIno then
        error("ENOTSUP")
    end
    return {parent, redis.call('GET', "i" .. string.format("%.f", parent))}
end

return resolve(tonumber(KEYS[1]), KEYS[2], tonumber(KEYS[3]), ARGV)
`


================================================
FILE: pkg/meta/metadata-sub.sample
================================================
{
  "Setting": {
    "Name": "load-dump-test",
    "UUID": "faa27c8f-edab-4791-a4e0-1620b732b343",
    "Storage": "file",
    "Bucket": "/Users/juicefs/.juicefs/local/",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "EncryptKey": "AQSttslKOSE/hQT/gmaMniCsdPF8JdPRfoYK6zFkdUOnifYwBA==",
    "KeyEncrypted": true,
    "TrashDays": 1,
    "MetaVersion": 1,
    "EnableACL": true
  },
  "Counters": {
    "usedSpace": 115392512,
    "usedInodes": 14,
    "nextInodes": 35,
    "nextChunk": 9,
    "nextSession": 0,
    "nextTrash": 1
  },
  "Sustained": [],
  "DelFiles": [
    {
      "inode": 23,
      "length": 0,
      "expire": 1637664458
    }
  ],
  "Quotas": {
    "1": {
      "maxSpace": 1073741824,
      "maxInodes": 100
    }
  },
  "FSTree": {
    "attr": {"inode":3,"type":"directory","mode":493,"uid":501,"gid":20,"atime":1623746591,"mtime":1623746610,"ctime":1623746610,"atimensec":959224111,"mtimensec":959224111,"ctimensec":959224111,"nlink":2,"length":0},
    "xattrs": [{"name":"dk","value":"果汁%2525"}],
    "entries": {
      "big": {
        "attr": {"inode":6,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1637150857,"mtime":1637150858,"ctime":1637150878,"atimensec":961503222,"mtimensec":961503222,"ctimensec":961503222,"nlink":1,"length":104857600},
        "posix_acl_default": {"owner":7,"group":5,"other":5,"mask":5,"users":null,"groups":[{"id":5,"perm":4},{"id":6,"perm":4}]},
        "chunks": [
          {"index":0,"slices":[{"id":5,"size":67108864,"len":67108864}]},
          {"index":1,"slices":[{"id":6,"size":37748736,"len":37748736}]}
        ]
      },
      "f11": {
        "attr": {"inode":4,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746610,"mtime":1623746610,"ctime":1623746639,"atimensec":591590333,"mtimensec":591590333,"ctimensec":591590333,"nlink":2,"length":12},
        "chunks": [{"index":0,"slices":[{"id":2,"size":12,"len":12}]}]
      }
    }
  }
}


================================================
FILE: pkg/meta/metadata.sample
================================================
{
  "Setting": {
    "Name": "load-dump-test",
    "UUID": "faa27c8f-edab-4791-a4e0-1620b732b343",
    "Storage": "file",
    "Bucket": "/Users/juicefs/.juicefs/local/",
    "SecretKey": "removed",
    "BlockSize": 4096,
    "Compression": "none",
    "EncryptKey": "AQSttslKOSE/hQT/gmaMniCsdPF8JdPRfoYK6zFkdUOnifYwBA==",
    "KeyEncrypted": true,
    "TrashDays": 1,
    "MetaVersion": 1,
    "EnableACL": true
  },
  "Counters": {
    "usedSpace": 115392512,
    "usedInodes": 14,
    "nextInodes": 35,
    "nextChunk": 9,
    "nextSession": 0,
    "nextTrash": 1
  },
  "Sustained": [],
  "DelFiles": [
    {
      "inode": 23,
      "length": 0,
      "expire": 1637664458
    }
  ],
  "Quotas": {
    "1": {
      "maxSpace": 1073741824,
      "maxInodes": 100
    }
  },
  "FSTree": {
    "attr": {"inode":1,"type":"directory","mode":511,"uid":0,"gid":0,"atime":1623745101,"mtime":1638437879,"ctime":1638437879,"nlink":5,"length":0},
    "xattrs": [{"name":"lastBackup","value":"2021-11-23T18:29:54+08:00"}],
    "posix_acl_access": {"owner":6,"group":4,"other":4,"mask":4,"users":null,"groups":null},
    "entries": {
      "GBK%B9%FB֭%CA%FD%BEݿƼ%BC%D3%D0%CF޹%AB˾%CEļ%FE": {
        "attr": {"inode":34,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1648717321,"mtime":1648717321,"ctime":1648717321,"atimensec":401146141,"mtimensec":401146141,"ctimensec":401146141,"nlink":1,"length":0},
        "posix_acl_access": {"owner":6,"group":4,"other":4,"mask":4,"users":[{"id":1,"perm":6},{"id":2,"perm":7}],"groups":null},
        "posix_acl_default": {"owner":7,"group":5,"other":5,"mask":5,"users":null,"groups":[{"id":3,"perm":6},{"id":4,"perm":7}]}
      },
      "UTF8果汁数据科技有限公司文件": {
        "attr": {"inode":33,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1648717211,"mtime":1648717211,"ctime":1648717211,"atimensec":36325414,"mtimensec":36325414,"ctimensec":36325414,"nlink":1,"length":0}
      },
      "UTF8果汁数据科技有限公司目录": {
        "attr": {"inode":32,"type":"directory","mode":493,"uid":501,"gid":0,"atime":1648717173,"mtime":1648717173,"ctime":1648717173,"atimensec":605897411,"mtimensec":605897411,"ctimensec":605897411,"nlink":2,"length":0},
        "entries": {
        }
      },
      "d": {
        "attr": {"inode":25,"type":"directory","mode":493,"uid":1,"gid":0,"atime":1637664458,"mtime":1637664458,"ctime":1637664458,"atimensec":862381233,"mtimensec":862381233,"ctimensec":862381233,"nlink":2,"length":0},
        "entries": {
        }
      },
      "d1": {
        "attr": {"inode":3,"type":"directory","mode":493,"uid":501,"gid":20,"atime":1623746591,"mtime":1623746610,"ctime":1623746610,"atimensec":959224111,"mtimensec":959224111,"ctimensec":959224111,"nlink":2,"length":0},
        "xattrs": [{"name":"dk","value":"果汁%2525"}],
        "entries": {
          "big": {
            "attr": {"inode":6,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1637150857,"mtime":1637150858,"ctime":1637150878,"atimensec":961503222,"mtimensec":961503222,"ctimensec":961503222,"nlink":1,"length":104857600},
            "posix_acl_default": {"owner":7,"group":5,"other":5,"mask":5,"users":null,"groups":[{"id":5,"perm":4},{"id":6,"perm":4}]},
            "chunks": [
              {"index":0,"slices":[{"id":5,"size":67108864,"len":67108864}]},
              {"index":1,"slices":[{"id":6,"size":37748736,"len":37748736}]}
            ]
          },
          "f11": {
            "attr": {"inode":4,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746610,"mtime":1623746610,"ctime":1623746639,"atimensec":591590333,"mtimensec":591590333,"ctimensec":591590333,"nlink":2,"length":12},
            "chunks": [{"index":0,"slices":[{"id":2,"size":12,"len":12}]}]
          }
        }
      },
      "f1": {
        "attr": {"inode":2,"flags":128,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746580,"mtime":1623746661,"ctime":1623746661,"atimensec":219686444,"mtimensec":219686444,"ctimensec":219686444,"nlink":1,"length":24},
        "xattrs": [{"name":"k","value":"v"}],
        "posix_acl_access": {"owner":6,"group":4,"other":4,"mask":4,"users":[{"id":1,"perm":6},{"id":2,"perm":7}],"groups":null},
        "posix_acl_default": {"owner":7,"group":5,"other":5,"mask":5,"users":null,"groups":[{"id":3,"perm":6},{"id":4,"perm":7}]},
        "chunks": [{"index":0,"slices":[{"id":1,"size":6,"len":6},{"id":2,"size":12,"len":12},{"id":4,"size":24,"len":24}]}]
      },
      "l1": {
        "attr": {"inode":4,"type":"regular","mode":420,"uid":501,"gid":20,"atime":1623746610,"mtime":1623746610,"ctime":1623746639,"atimensec":591590333,"mtimensec":591590333,"ctimensec":591590333,"nlink":2,"length":12},
        "chunks": [{"index":0,"slices":[{"id":2,"size":12,"len":12}]}]
      },
      "s1": {
        "attr": {"inode":5,"type":"symlink","mode":420,"uid":501,"gid":20,"atime":1623746645,"mtime":1623746645,"ctime":1623746645,"atimensec":984144666,"mtimensec":984144666,"ctimensec":984144666,"nlink":1,"length":0},
        "symlink": "GBK%B9%FB֭%CA%FD%BEݿƼ%BC%D3%D0%CF޹%AB˾%CEļ%FE"
      },
      "sd": {
        "attr": {"inode":26,"type":"symlink","mode":420,"uid":1,"gid":0,"atime":1637664458,"mtime":1637664458,"ctime":1637664458,"atimensec":873647777,"mtimensec":873647777,"ctimensec":873647777,"nlink":1,"length":0},
        "symlink": "d"
      }
    }
  },
  "Trash": {
    "attr": {"inode":9223372032828243968,"type":"directory","mode":365,"uid":0,"gid":0,"atime":1623745101,"mtime":1638437877,"ctime":1638437877,"nlink":2,"length":0},
    "entries": {
      "2021-12-02-09": {
        "attr": {"inode":9223372032828243969,"type":"directory","mode":365,"uid":0,"gid":0,"atime":1638437877,"mtime":1638437877,"ctime":1638437877,"atimensec":598277000,"mtimensec":598277000,"ctimensec":598277000,"nlink":2,"length":0},
        "entries": {
          "1-27-tf1": {
            "attr": {"inode":27,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1638437852,"mtime":1638437852,"ctime":1638437877,"atimensec":28186000,"mtimensec":28186000,"ctimensec":28186000,"nlink":1,"length":11},
            "chunks": [{"index":0,"slices":[{"id":7,"size":11,"len":11}]}]
          },
          "1-28-td1": {
            "attr": {"inode":28,"type":"directory","mode":493,"uid":501,"gid":0,"atime":1638437856,"mtime":1638437879,"ctime":1638437879,"atimensec":59246000,"mtimensec":59246000,"ctimensec":59246000,"nlink":2,"length":0},
            "entries": {
            }
          },
          "28-29-tdf1": {
            "attr": {"inode":29,"type":"regular","mode":420,"uid":501,"gid":0,"atime":1638437873,"mtime":1638437873,"ctime":1638437879,"atimensec":449880000,"mtimensec":449880000,"ctimensec":449880000,"nlink":1,"length":10485760},
            "chunks": [{"index":0,"slices":[{"id":8,"size":10485760,"len":10485760}]}]
          }
        }
      }
    }
  }
}


================================================
FILE: pkg/meta/openfile.go
================================================
package meta

import (
	"sync"
	"time"
)

const (
	invalidateAllChunks = 0xFFFFFFFF
	invalidateAttrOnly  = 0xFFFFFFFE
)

var ofPool = sync.Pool{
	New: func() interface{} {
		return &openFile{}
	},
}

type openFile struct {
	sync.RWMutex
	attr      Attr
	refs      int
	lastCheck int64
	first     []Slice
	chunks    map[uint32][]Slice
}

func (o *openFile) invalidateChunk() {
	o.first = nil
	for c := range o.chunks {
		delete(o.chunks, c)
	}
}

func (o *openFile) release() {
	o.attr = Attr{}
	o.refs = 0
	o.lastCheck = 0
	o.first = nil
	o.chunks = nil
	ofPool.Put(o)
}

type openfiles struct {
	sync.Mutex
	expire time.Duration
	limit  uint64
	files  map[Ino]*openFile
}

func newOpenFiles(expire time.Duration, limit uint64) *openfiles {
	of := &openfiles{
		expire: expire,
		limit:  limit,
		files:  make(map[Ino]*openFile),
	}
	go of.cleanup()
	return of
}

func (o *openfiles) cleanup() {
	for {
		var (
			cnt, deleted, todel int
			candidateIno        Ino
			candidateOf         *openFile
		)
		o.Lock()
		if o.limit > 0 && len(o.files) > int(o.limit) {
			todel = len(o.files) - int(o.limit)
		}
		now := time.Now().Unix()
		for ino, of := range o.files {
			cnt++
			if cnt > 1e3 || todel > 0 && deleted >= todel {
				break
			}
			if of.refs <= 0 {
				if now-of.lastCheck > 3600*12 {
					of.release()
					delete(o.files, ino)
					deleted++
					continue
				}
				if todel == 0 {
					continue
				}
				if candidateIno == 0 {
					candidateIno = ino
					candidateOf = of
					continue
				}
				if of.lastCheck < candidateOf.lastCheck {
					candidateIno = ino
					candidateOf = of
				}
				candidateOf.release()
				delete(o.files, candidateIno)
				deleted++
				candidateIno = 0
			}
		}
		o.Unlock()
		time.Sleep(time.Millisecond * time.Duration(1000*(cnt+1-deleted*2)/(cnt+1)))
	}
}

func (o *openfiles) OpenCheck(ino Ino, attr *Attr) bool {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if ok && time.Second*time.Duration(time.Now().Unix()-of.lastCheck) < o.expire {
		if attr != nil {
			*attr = of.attr
		}
		of.refs++
		return true
	}
	return false
}

func (o *openfiles) Open(ino Ino, attr *Attr) {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if !ok {
		of = ofPool.Get().(*openFile)
		o.files[ino] = of
	} else if attr != nil && attr.Mtime == of.attr.Mtime && attr.Mtimensec == of.attr.Mtimensec {
		attr.KeepCache = of.attr.KeepCache
	} else {
		of.invalidateChunk()
	}
	if attr != nil {
		of.attr = *attr
	}
	// next open can keep cache if not modified
	of.attr.KeepCache = true
	of.refs++
	of.lastCheck = time.Now().Unix()
}

func (o *openfiles) Close(ino Ino) bool {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if ok {
		of.refs--
		return of.refs <= 0
	}
	return true
}

func (o *openfiles) Check(ino Ino, attr *Attr) bool {
	if attr == nil {
		panic("attr is nil")
	}
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if ok && time.Second*time.Duration(time.Now().Unix()-of.lastCheck) < o.expire {
		*attr = of.attr
		return true
	}
	return false
}

func (o *openfiles) Update(ino Ino, attr *Attr) bool {
	if attr == nil {
		return false
	}
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if ok {
		if attr.Mtime != of.attr.Mtime || attr.Mtimensec != of.attr.Mtimensec {
			of.invalidateChunk()
		} else {
			attr.KeepCache = of.attr.KeepCache
		}
		of.attr = *attr
		of.lastCheck = time.Now().Unix()
		return true
	}
	return false
}

func (o *openfiles) IsOpen(ino Ino) bool {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	return ok && of.refs > 0
}

func (o *openfiles) ReadChunk(ino Ino, indx uint32) ([]Slice, bool) {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if !ok {
		return nil, false
	}
	if indx == 0 {
		return of.first, of.first != nil
	} else {
		cs, ok := of.chunks[indx]
		return cs, ok
	}
}

func (o *openfiles) CacheChunk(ino Ino, indx uint32, cs []Slice) {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if !ok {
		return
	}
	if indx == 0 {
		of.first = cs
	} else {
		if of.chunks == nil {
			of.chunks = make(map[uint32][]Slice)
		}
		of.chunks[indx] = cs
	}
}

func (o *openfiles) InvalidateChunk(ino Ino, indx uint32) {
	o.Lock()
	defer o.Unlock()
	of, ok := o.files[ino]
	if ok {
		if indx == invalidateAllChunks {
			of.invalidateChunk()
		} else if indx == 0 {
			of.first = nil
		} else {
			delete(of.chunks, indx)
		}
		of.lastCheck = 0
	}
}

func (o *openfiles) find(ino Ino) *openFile {
	o.Lock()
	defer o.Unlock()
	return o.files[ino]
}


================================================
FILE: pkg/meta/pb/backup.pb.go
================================================
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// 	protoc-gen-go v1.35.2
// 	protoc        v5.29.0
// source: pkg/meta/pb/backup.proto

package pb

import (
	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
	reflect "reflect"
	sync "sync"
)

const (
	// Verify that this generated code is sufficiently up-to-date.
	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
	// Verify that runtime/protoimpl is sufficiently up-to-date.
	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)

type Format struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` // meta.Format's json format
}

func (x *Format) Reset() {
	*x = Format{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[0]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Format) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Format) ProtoMessage() {}

func (x *Format) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[0]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Format.ProtoReflect.Descriptor instead.
func (*Format) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{0}
}

func (x *Format) GetData() []byte {
	if x != nil {
		return x.Data
	}
	return nil
}

type Counter struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Key   string `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"`
	Value int64  `protobuf:"varint,2,opt,name=value,proto3" json:"value,omitempty"`
}

func (x *Counter) Reset() {
	*x = Counter{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[1]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Counter) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Counter) ProtoMessage() {}

func (x *Counter) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[1]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Counter.ProtoReflect.Descriptor instead.
func (*Counter) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{1}
}

func (x *Counter) GetKey() string {
	if x != nil {
		return x.Key
	}
	return ""
}

func (x *Counter) GetValue() int64 {
	if x != nil {
		return x.Value
	}
	return 0
}

type Sustained struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Sid    uint64   `protobuf:"varint,1,opt,name=sid,proto3" json:"sid,omitempty"`
	Inodes []uint64 `protobuf:"varint,2,rep,packed,name=inodes,proto3" json:"inodes,omitempty"`
}

func (x *Sustained) Reset() {
	*x = Sustained{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[2]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Sustained) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Sustained) ProtoMessage() {}

func (x *Sustained) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[2]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Sustained.ProtoReflect.Descriptor instead.
func (*Sustained) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{2}
}

func (x *Sustained) GetSid() uint64 {
	if x != nil {
		return x.Sid
	}
	return 0
}

func (x *Sustained) GetInodes() []uint64 {
	if x != nil {
		return x.Inodes
	}
	return nil
}

type DelFile struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode  uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	Length uint64 `protobuf:"varint,2,opt,name=length,proto3" json:"length,omitempty"`
	Expire int64  `protobuf:"varint,3,opt,name=expire,proto3" json:"expire,omitempty"`
}

func (x *DelFile) Reset() {
	*x = DelFile{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[3]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *DelFile) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*DelFile) ProtoMessage() {}

func (x *DelFile) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[3]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use DelFile.ProtoReflect.Descriptor instead.
func (*DelFile) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{3}
}

func (x *DelFile) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *DelFile) GetLength() uint64 {
	if x != nil {
		return x.Length
	}
	return 0
}

func (x *DelFile) GetExpire() int64 {
	if x != nil {
		return x.Expire
	}
	return 0
}

type SliceRef struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Id   uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
	Size uint32 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"`
	Refs int64  `protobuf:"varint,3,opt,name=refs,proto3" json:"refs,omitempty"`
}

func (x *SliceRef) Reset() {
	*x = SliceRef{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[4]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *SliceRef) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*SliceRef) ProtoMessage() {}

func (x *SliceRef) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[4]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use SliceRef.ProtoReflect.Descriptor instead.
func (*SliceRef) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{4}
}

func (x *SliceRef) GetId() uint64 {
	if x != nil {
		return x.Id
	}
	return 0
}

func (x *SliceRef) GetSize() uint32 {
	if x != nil {
		return x.Size
	}
	return 0
}

func (x *SliceRef) GetRefs() int64 {
	if x != nil {
		return x.Refs
	}
	return 0
}

type Acl struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Id   uint32 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
	Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` // acl.Rule's binary format
}

func (x *Acl) Reset() {
	*x = Acl{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[5]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Acl) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Acl) ProtoMessage() {}

func (x *Acl) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[5]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Acl.ProtoReflect.Descriptor instead.
func (*Acl) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{5}
}

func (x *Acl) GetId() uint32 {
	if x != nil {
		return x.Id
	}
	return 0
}

func (x *Acl) GetData() []byte {
	if x != nil {
		return x.Data
	}
	return nil
}

type Xattr struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	Name  string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"`
	Value []byte `protobuf:"bytes,3,opt,name=value,proto3" json:"value,omitempty"`
}

func (x *Xattr) Reset() {
	*x = Xattr{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[6]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Xattr) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Xattr) ProtoMessage() {}

func (x *Xattr) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[6]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Xattr.ProtoReflect.Descriptor instead.
func (*Xattr) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{6}
}

func (x *Xattr) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Xattr) GetName() string {
	if x != nil {
		return x.Name
	}
	return ""
}

func (x *Xattr) GetValue() []byte {
	if x != nil {
		return x.Value
	}
	return nil
}

type Quota struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode      uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	MaxSpace   int64  `protobuf:"varint,2,opt,name=maxSpace,proto3" json:"maxSpace,omitempty"`
	MaxInodes  int64  `protobuf:"varint,3,opt,name=maxInodes,proto3" json:"maxInodes,omitempty"`
	UsedSpace  int64  `protobuf:"varint,4,opt,name=usedSpace,proto3" json:"usedSpace,omitempty"`
	UsedInodes int64  `protobuf:"varint,5,opt,name=usedInodes,proto3" json:"usedInodes,omitempty"`
}

func (x *Quota) Reset() {
	*x = Quota{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[7]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Quota) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Quota) ProtoMessage() {}

func (x *Quota) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[7]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Quota.ProtoReflect.Descriptor instead.
func (*Quota) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{7}
}

func (x *Quota) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Quota) GetMaxSpace() int64 {
	if x != nil {
		return x.MaxSpace
	}
	return 0
}

func (x *Quota) GetMaxInodes() int64 {
	if x != nil {
		return x.MaxInodes
	}
	return 0
}

func (x *Quota) GetUsedSpace() int64 {
	if x != nil {
		return x.UsedSpace
	}
	return 0
}

func (x *Quota) GetUsedInodes() int64 {
	if x != nil {
		return x.UsedInodes
	}
	return 0
}

type Stat struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode      uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	DataLength int64  `protobuf:"varint,2,opt,name=dataLength,proto3" json:"dataLength,omitempty"`
	UsedSpace  int64  `protobuf:"varint,3,opt,name=usedSpace,proto3" json:"usedSpace,omitempty"`
	UsedInodes int64  `protobuf:"varint,4,opt,name=usedInodes,proto3" json:"usedInodes,omitempty"`
}

func (x *Stat) Reset() {
	*x = Stat{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[8]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Stat) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Stat) ProtoMessage() {}

func (x *Stat) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[8]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Stat.ProtoReflect.Descriptor instead.
func (*Stat) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{8}
}

func (x *Stat) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Stat) GetDataLength() int64 {
	if x != nil {
		return x.DataLength
	}
	return 0
}

func (x *Stat) GetUsedSpace() int64 {
	if x != nil {
		return x.UsedSpace
	}
	return 0
}

func (x *Stat) GetUsedInodes() int64 {
	if x != nil {
		return x.UsedInodes
	}
	return 0
}

type Node struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	Data  []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` // meta.Attr's binary format
}

func (x *Node) Reset() {
	*x = Node{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[9]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Node) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Node) ProtoMessage() {}

func (x *Node) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[9]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Node.ProtoReflect.Descriptor instead.
func (*Node) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{9}
}

func (x *Node) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Node) GetData() []byte {
	if x != nil {
		return x.Data
	}
	return nil
}

type Edge struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Parent uint64 `protobuf:"varint,1,opt,name=parent,proto3" json:"parent,omitempty"`
	Inode  uint64 `protobuf:"varint,2,opt,name=inode,proto3" json:"inode,omitempty"`
	Name   []byte `protobuf:"bytes,3,opt,name=name,proto3" json:"name,omitempty"`
	Type   uint32 `protobuf:"varint,4,opt,name=type,proto3" json:"type,omitempty"`
}

func (x *Edge) Reset() {
	*x = Edge{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[10]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Edge) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Edge) ProtoMessage() {}

func (x *Edge) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[10]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Edge.ProtoReflect.Descriptor instead.
func (*Edge) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{10}
}

func (x *Edge) GetParent() uint64 {
	if x != nil {
		return x.Parent
	}
	return 0
}

func (x *Edge) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Edge) GetName() []byte {
	if x != nil {
		return x.Name
	}
	return nil
}

func (x *Edge) GetType() uint32 {
	if x != nil {
		return x.Type
	}
	return 0
}

// for redis and tikv only
type Parent struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode  uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	Parent uint64 `protobuf:"varint,2,opt,name=parent,proto3" json:"parent,omitempty"`
	Cnt    int64  `protobuf:"varint,3,opt,name=cnt,proto3" json:"cnt,omitempty"`
}

func (x *Parent) Reset() {
	*x = Parent{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[11]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Parent) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Parent) ProtoMessage() {}

func (x *Parent) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[11]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Parent.ProtoReflect.Descriptor instead.
func (*Parent) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{11}
}

func (x *Parent) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Parent) GetParent() uint64 {
	if x != nil {
		return x.Parent
	}
	return 0
}

func (x *Parent) GetCnt() int64 {
	if x != nil {
		return x.Cnt
	}
	return 0
}

type Chunk struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode  uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	Index  uint32 `protobuf:"varint,2,opt,name=index,proto3" json:"index,omitempty"`
	Slices []byte `protobuf:"bytes,3,opt,name=slices,proto3" json:"slices,omitempty"` // array of meta.slice
}

func (x *Chunk) Reset() {
	*x = Chunk{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[12]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Chunk) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Chunk) ProtoMessage() {}

func (x *Chunk) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[12]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Chunk.ProtoReflect.Descriptor instead.
func (*Chunk) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{12}
}

func (x *Chunk) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Chunk) GetIndex() uint32 {
	if x != nil {
		return x.Index
	}
	return 0
}

func (x *Chunk) GetSlices() []byte {
	if x != nil {
		return x.Slices
	}
	return nil
}

type Symlink struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Inode  uint64 `protobuf:"varint,1,opt,name=inode,proto3" json:"inode,omitempty"`
	Target []byte `protobuf:"bytes,2,opt,name=target,proto3" json:"target,omitempty"`
}

func (x *Symlink) Reset() {
	*x = Symlink{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[13]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Symlink) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Symlink) ProtoMessage() {}

func (x *Symlink) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[13]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Symlink.ProtoReflect.Descriptor instead.
func (*Symlink) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{13}
}

func (x *Symlink) GetInode() uint64 {
	if x != nil {
		return x.Inode
	}
	return 0
}

func (x *Symlink) GetTarget() []byte {
	if x != nil {
		return x.Target
	}
	return nil
}

type Batch struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Nodes     []*Node      `protobuf:"bytes,1,rep,name=nodes,proto3" json:"nodes,omitempty"`
	Edges     []*Edge      `protobuf:"bytes,2,rep,name=edges,proto3" json:"edges,omitempty"`
	Chunks    []*Chunk     `protobuf:"bytes,3,rep,name=chunks,proto3" json:"chunks,omitempty"`
	SliceRefs []*SliceRef  `protobuf:"bytes,4,rep,name=sliceRefs,proto3" json:"sliceRefs,omitempty"`
	Xattrs    []*Xattr     `protobuf:"bytes,5,rep,name=xattrs,proto3" json:"xattrs,omitempty"`
	Parents   []*Parent    `protobuf:"bytes,6,rep,name=parents,proto3" json:"parents,omitempty"`
	Symlinks  []*Symlink   `protobuf:"bytes,7,rep,name=symlinks,proto3" json:"symlinks,omitempty"`
	Sustained []*Sustained `protobuf:"bytes,8,rep,name=sustained,proto3" json:"sustained,omitempty"`
	Delfiles  []*DelFile   `protobuf:"bytes,9,rep,name=delfiles,proto3" json:"delfiles,omitempty"`
	Dirstats  []*Stat      `protobuf:"bytes,10,rep,name=dirstats,proto3" json:"dirstats,omitempty"`
	Quotas    []*Quota     `protobuf:"bytes,11,rep,name=quotas,proto3" json:"quotas,omitempty"`
	Acls      []*Acl       `protobuf:"bytes,12,rep,name=acls,proto3" json:"acls,omitempty"`
	Counters  []*Counter   `protobuf:"bytes,13,rep,name=counters,proto3" json:"counters,omitempty"`
}

func (x *Batch) Reset() {
	*x = Batch{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[14]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Batch) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Batch) ProtoMessage() {}

func (x *Batch) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[14]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Batch.ProtoReflect.Descriptor instead.
func (*Batch) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{14}
}

func (x *Batch) GetNodes() []*Node {
	if x != nil {
		return x.Nodes
	}
	return nil
}

func (x *Batch) GetEdges() []*Edge {
	if x != nil {
		return x.Edges
	}
	return nil
}

func (x *Batch) GetChunks() []*Chunk {
	if x != nil {
		return x.Chunks
	}
	return nil
}

func (x *Batch) GetSliceRefs() []*SliceRef {
	if x != nil {
		return x.SliceRefs
	}
	return nil
}

func (x *Batch) GetXattrs() []*Xattr {
	if x != nil {
		return x.Xattrs
	}
	return nil
}

func (x *Batch) GetParents() []*Parent {
	if x != nil {
		return x.Parents
	}
	return nil
}

func (x *Batch) GetSymlinks() []*Symlink {
	if x != nil {
		return x.Symlinks
	}
	return nil
}

func (x *Batch) GetSustained() []*Sustained {
	if x != nil {
		return x.Sustained
	}
	return nil
}

func (x *Batch) GetDelfiles() []*DelFile {
	if x != nil {
		return x.Delfiles
	}
	return nil
}

func (x *Batch) GetDirstats() []*Stat {
	if x != nil {
		return x.Dirstats
	}
	return nil
}

func (x *Batch) GetQuotas() []*Quota {
	if x != nil {
		return x.Quotas
	}
	return nil
}

func (x *Batch) GetAcls() []*Acl {
	if x != nil {
		return x.Acls
	}
	return nil
}

func (x *Batch) GetCounters() []*Counter {
	if x != nil {
		return x.Counters
	}
	return nil
}

type Footer struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Magic   uint32                     `protobuf:"varint,1,opt,name=magic,proto3" json:"magic,omitempty"`
	Version uint32                     `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"`
	Infos   map[string]*Footer_SegInfo `protobuf:"bytes,3,rep,name=infos,proto3" json:"infos,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
}

func (x *Footer) Reset() {
	*x = Footer{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[15]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Footer) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Footer) ProtoMessage() {}

func (x *Footer) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[15]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Footer.ProtoReflect.Descriptor instead.
func (*Footer) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{15}
}

func (x *Footer) GetMagic() uint32 {
	if x != nil {
		return x.Magic
	}
	return 0
}

func (x *Footer) GetVersion() uint32 {
	if x != nil {
		return x.Version
	}
	return 0
}

func (x *Footer) GetInfos() map[string]*Footer_SegInfo {
	if x != nil {
		return x.Infos
	}
	return nil
}

type Footer_SegInfo struct {
	state         protoimpl.MessageState
	sizeCache     protoimpl.SizeCache
	unknownFields protoimpl.UnknownFields

	Offset []uint64 `protobuf:"varint,1,rep,packed,name=offset,proto3" json:"offset,omitempty"`
	Num    uint64   `protobuf:"varint,2,opt,name=num,proto3" json:"num,omitempty"`
}

func (x *Footer_SegInfo) Reset() {
	*x = Footer_SegInfo{}
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[16]
	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
	ms.StoreMessageInfo(mi)
}

func (x *Footer_SegInfo) String() string {
	return protoimpl.X.MessageStringOf(x)
}

func (*Footer_SegInfo) ProtoMessage() {}

func (x *Footer_SegInfo) ProtoReflect() protoreflect.Message {
	mi := &file_pkg_meta_pb_backup_proto_msgTypes[16]
	if x != nil {
		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
		if ms.LoadMessageInfo() == nil {
			ms.StoreMessageInfo(mi)
		}
		return ms
	}
	return mi.MessageOf(x)
}

// Deprecated: Use Footer_SegInfo.ProtoReflect.Descriptor instead.
func (*Footer_SegInfo) Descriptor() ([]byte, []int) {
	return file_pkg_meta_pb_backup_proto_rawDescGZIP(), []int{15, 0}
}

func (x *Footer_SegInfo) GetOffset() []uint64 {
	if x != nil {
		return x.Offset
	}
	return nil
}

func (x *Footer_SegInfo) GetNum() uint64 {
	if x != nil {
		return x.Num
	}
	return 0
}

var File_pkg_meta_pb_backup_proto protoreflect.FileDescriptor

var file_pkg_meta_pb_backup_proto_rawDesc = []byte{
	0x0a, 0x18, 0x70, 0x6b, 0x67, 0x2f, 0x6d, 0x65, 0x74, 0x61, 0x2f, 0x70, 0x62, 0x2f, 0x62, 0x61,
	0x63, 0x6b, 0x75, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x02, 0x70, 0x62, 0x22, 0x1c,
	0x0a, 0x06, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61,
	0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x31, 0x0a, 0x07,
	0x43, 0x6f, 0x75, 0x6e, 0x74, 0x65, 0x72, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01,
	0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c,
	0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22,
	0x35, 0x0a, 0x09, 0x53, 0x75, 0x73, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x64, 0x12, 0x10, 0x0a, 0x03,
	0x73, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x73, 0x69, 0x64, 0x12, 0x16,
	0x0a, 0x06, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x04, 0x52, 0x06,
	0x69, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x4f, 0x0a, 0x07, 0x44, 0x65, 0x6c, 0x46, 0x69, 0x6c,
	0x65, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04,
	0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74,
	0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x12,
	0x16, 0x0a, 0x06, 0x65, 0x78, 0x70, 0x69, 0x72, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52,
	0x06, 0x65, 0x78, 0x70, 0x69, 0x72, 0x65, 0x22, 0x42, 0x0a, 0x08, 0x53, 0x6c, 0x69, 0x63, 0x65,
	0x52, 0x65, 0x66, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52,
	0x02, 0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28,
	0x0d, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x72, 0x65, 0x66, 0x73, 0x18,
	0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x04, 0x72, 0x65, 0x66, 0x73, 0x22, 0x29, 0x0a, 0x03, 0x41,
	0x63, 0x6c, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x02,
	0x69, 0x64, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c,
	0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x05, 0x58, 0x61, 0x74, 0x74, 0x72, 0x12,
	0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05,
	0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20,
	0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c,
	0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x22,
	0x95, 0x01, 0x0a, 0x05, 0x51, 0x75, 0x6f, 0x74, 0x61, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f,
	0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12,
	0x1a, 0x0a, 0x08, 0x6d, 0x61, 0x78, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28,
	0x03, 0x52, 0x08, 0x6d, 0x61, 0x78, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6d,
	0x61, 0x78, 0x49, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09,
	0x6d, 0x61, 0x78, 0x49, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x12, 0x1c, 0x0a, 0x09, 0x75, 0x73, 0x65,
	0x64, 0x53, 0x70, 0x61, 0x63, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x75, 0x73,
	0x65, 0x64, 0x53, 0x70, 0x61, 0x63, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x75, 0x73, 0x65, 0x64, 0x49,
	0x6e, 0x6f, 0x64, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x75, 0x73, 0x65,
	0x64, 0x49, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x22, 0x7a, 0x0a, 0x04, 0x53, 0x74, 0x61, 0x74, 0x12,
	0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05,
	0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x61, 0x74, 0x61, 0x4c, 0x65, 0x6e,
	0x67, 0x74, 0x68, 0x18, 0x02, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x64, 0x61, 0x74, 0x61, 0x4c,
	0x65, 0x6e, 0x67, 0x74, 0x68, 0x12, 0x1c, 0x0a, 0x09, 0x75, 0x73, 0x65, 0x64, 0x53, 0x70, 0x61,
	0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x75, 0x73, 0x65, 0x64, 0x53, 0x70,
	0x61, 0x63, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x75, 0x73, 0x65, 0x64, 0x49, 0x6e, 0x6f, 0x64, 0x65,
	0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x75, 0x73, 0x65, 0x64, 0x49, 0x6e, 0x6f,
	0x64, 0x65, 0x73, 0x22, 0x30, 0x0a, 0x04, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x69,
	0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64,
	0x65, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52,
	0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x5c, 0x0a, 0x04, 0x45, 0x64, 0x67, 0x65, 0x12, 0x16, 0x0a,
	0x06, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x70,
	0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x02,
	0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e,
	0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12,
	0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x74,
	0x79, 0x70, 0x65, 0x22, 0x48, 0x0a, 0x06, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x14, 0x0a,
	0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e,
	0x6f, 0x64, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x18, 0x02, 0x20,
	0x01, 0x28, 0x04, 0x52, 0x06, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x63,
	0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x63, 0x6e, 0x74, 0x22, 0x4b, 0x0a,
	0x05, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18,
	0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12, 0x14, 0x0a, 0x05,
	0x69, 0x6e, 0x64, 0x65, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x69, 0x6e, 0x64,
	0x65, 0x78, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x6c, 0x69, 0x63, 0x65, 0x73, 0x18, 0x03, 0x20, 0x01,
	0x28, 0x0c, 0x52, 0x06, 0x73, 0x6c, 0x69, 0x63, 0x65, 0x73, 0x22, 0x37, 0x0a, 0x07, 0x53, 0x79,
	0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01,
	0x20, 0x01, 0x28, 0x04, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x74,
	0x61, 0x72, 0x67, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x74, 0x61, 0x72,
	0x67, 0x65, 0x74, 0x22, 0xed, 0x03, 0x0a, 0x05, 0x42, 0x61, 0x74, 0x63, 0x68, 0x12, 0x1e, 0x0a,
	0x05, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x08, 0x2e, 0x70,
	0x62, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x05, 0x6e, 0x6f, 0x64, 0x65, 0x73, 0x12, 0x1e, 0x0a,
	0x05, 0x65, 0x64, 0x67, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x08, 0x2e, 0x70,
	0x62, 0x2e, 0x45, 0x64, 0x67, 0x65, 0x52, 0x05, 0x65, 0x64, 0x67, 0x65, 0x73, 0x12, 0x21, 0x0a,
	0x06, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x09, 0x2e,
	0x70, 0x62, 0x2e, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x52, 0x06, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73,
	0x12, 0x2a, 0x0a, 0x09, 0x73, 0x6c, 0x69, 0x63, 0x65, 0x52, 0x65, 0x66, 0x73, 0x18, 0x04, 0x20,
	0x03, 0x28, 0x0b, 0x32, 0x0c, 0x2e, 0x70, 0x62, 0x2e, 0x53, 0x6c, 0x69, 0x63, 0x65, 0x52, 0x65,
	0x66, 0x52, 0x09, 0x73, 0x6c, 0x69, 0x63, 0x65, 0x52, 0x65, 0x66, 0x73, 0x12, 0x21, 0x0a, 0x06,
	0x78, 0x61, 0x74, 0x74, 0x72, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x09, 0x2e, 0x70,
	0x62, 0x2e, 0x58, 0x61, 0x74, 0x74, 0x72, 0x52, 0x06, 0x78, 0x61, 0x74, 0x74, 0x72, 0x73, 0x12,
	0x24, 0x0a, 0x07, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b,
	0x32, 0x0a, 0x2e, 0x70, 0x62, 0x2e, 0x50, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x52, 0x07, 0x70, 0x61,
	0x72, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x27, 0x0a, 0x08, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b,
	0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0b, 0x2e, 0x70, 0x62, 0x2e, 0x53, 0x79, 0x6d,
	0x6c, 0x69, 0x6e, 0x6b, 0x52, 0x08, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x73, 0x12, 0x2b,
	0x0a, 0x09, 0x73, 0x75, 0x73, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x64, 0x18, 0x08, 0x20, 0x03, 0x28,
	0x0b, 0x32, 0x0d, 0x2e, 0x70, 0x62, 0x2e, 0x53, 0x75, 0x73, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x64,
	0x52, 0x09, 0x73, 0x75, 0x73, 0x74, 0x61, 0x69, 0x6e, 0x65, 0x64, 0x12, 0x27, 0x0a, 0x08, 0x64,
	0x65, 0x6c, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x09, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0b, 0x2e,
	0x70, 0x62, 0x2e, 0x44, 0x65, 0x6c, 0x46, 0x69, 0x6c, 0x65, 0x52, 0x08, 0x64, 0x65, 0x6c, 0x66,
	0x69, 0x6c, 0x65, 0x73, 0x12, 0x24, 0x0a, 0x08, 0x64, 0x69, 0x72, 0x73, 0x74, 0x61, 0x74, 0x73,
	0x18, 0x0a, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x08, 0x2e, 0x70, 0x62, 0x2e, 0x53, 0x74, 0x61, 0x74,
	0x52, 0x08, 0x64, 0x69, 0x72, 0x73, 0x74, 0x61, 0x74, 0x73, 0x12, 0x21, 0x0a, 0x06, 0x71, 0x75,
	0x6f, 0x74, 0x61, 0x73, 0x18, 0x0b, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x09, 0x2e, 0x70, 0x62, 0x2e,
	0x51, 0x75, 0x6f, 0x74, 0x61, 0x52, 0x06, 0x71, 0x75, 0x6f, 0x74, 0x61, 0x73, 0x12, 0x1b, 0x0a,
	0x04, 0x61, 0x63, 0x6c, 0x73, 0x18, 0x0c, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x07, 0x2e, 0x70, 0x62,
	0x2e, 0x41, 0x63, 0x6c, 0x52, 0x04, 0x61, 0x63, 0x6c, 0x73, 0x12, 0x27, 0x0a, 0x08, 0x63, 0x6f,
	0x75, 0x6e, 0x74, 0x65, 0x72, 0x73, 0x18, 0x0d, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0b, 0x2e, 0x70,
	0x62, 0x2e, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x65, 0x72, 0x52, 0x08, 0x63, 0x6f, 0x75, 0x6e, 0x74,
	0x65, 0x72, 0x73, 0x22, 0xe8, 0x01, 0x0a, 0x06, 0x46, 0x6f, 0x6f, 0x74, 0x65, 0x72, 0x12, 0x14,
	0x0a, 0x05, 0x6d, 0x61, 0x67, 0x69, 0x63, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x6d,
	0x61, 0x67, 0x69, 0x63, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18,
	0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x2b,
	0x0a, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x15, 0x2e,
	0x70, 0x62, 0x2e, 0x46, 0x6f, 0x6f, 0x74, 0x65, 0x72, 0x2e, 0x49, 0x6e, 0x66, 0x6f, 0x73, 0x45,
	0x6e, 0x74, 0x72, 0x79, 0x52, 0x05, 0x69, 0x6e, 0x66, 0x6f, 0x73, 0x1a, 0x33, 0x0a, 0x07, 0x53,
	0x65, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x16, 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74,
	0x18, 0x01, 0x20, 0x03, 0x28, 0x04, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x10,
	0x0a, 0x03, 0x6e, 0x75, 0x6d, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, 0x52, 0x03, 0x6e, 0x75, 0x6d,
	0x1a, 0x4c, 0x0a, 0x0a, 0x49, 0x6e, 0x66, 0x6f, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10,
	0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79,
	0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32,
	0x12, 0x2e, 0x70, 0x62, 0x2e, 0x46, 0x6f, 0x6f, 0x74, 0x65, 0x72, 0x2e, 0x53, 0x65, 0x67, 0x49,
	0x6e, 0x66, 0x6f, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x06,
	0x5a, 0x04, 0x2e, 0x2f, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}

var (
	file_pkg_meta_pb_backup_proto_rawDescOnce sync.Once
	file_pkg_meta_pb_backup_proto_rawDescData = file_pkg_meta_pb_backup_proto_rawDesc
)

func file_pkg_meta_pb_backup_proto_rawDescGZIP() []byte {
	file_pkg_meta_pb_backup_proto_rawDescOnce.Do(func() {
		file_pkg_meta_pb_backup_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_meta_pb_backup_proto_rawDescData)
	})
	return file_pkg_meta_pb_backup_proto_rawDescData
}

var file_pkg_meta_pb_backup_proto_msgTypes = make([]protoimpl.MessageInfo, 18)
var file_pkg_meta_pb_backup_proto_goTypes = []any{
	(*Format)(nil),         // 0: pb.Format
	(*Counter)(nil),        // 1: pb.Counter
	(*Sustained)(nil),      // 2: pb.Sustained
	(*DelFile)(nil),        // 3: pb.DelFile
	(*SliceRef)(nil),       // 4: pb.SliceRef
	(*Acl)(nil),            // 5: pb.Acl
	(*Xattr)(nil),          // 6: pb.Xattr
	(*Quota)(nil),          // 7: pb.Quota
	(*Stat)(nil),           // 8: pb.Stat
	(*Node)(nil),           // 9: pb.Node
	(*Edge)(nil),           // 10: pb.Edge
	(*Parent)(nil),         // 11: pb.Parent
	(*Chunk)(nil),          // 12: pb.Chunk
	(*Symlink)(nil),        // 13: pb.Symlink
	(*Batch)(nil),          // 14: pb.Batch
	(*Footer)(nil),         // 15: pb.Footer
	(*Footer_SegInfo)(nil), // 16: pb.Footer.SegInfo
	nil,                    // 17: pb.Footer.InfosEntry
}
var file_pkg_meta_pb_backup_proto_depIdxs = []int32{
	9,  // 0: pb.Batch.nodes:type_name -> pb.Node
	10, // 1: pb.Batch.edges:type_name -> pb.Edge
	12, // 2: pb.Batch.chunks:type_name -> pb.Chunk
	4,  // 3: pb.Batch.sliceRefs:type_name -> pb.SliceRef
	6,  // 4: pb.Batch.xattrs:type_name -> pb.Xattr
	11, // 5: pb.Batch.parents:type_name -> pb.Parent
	13, // 6: pb.Batch.symlinks:type_name -> pb.Symlink
	2,  // 7: pb.Batch.sustained:type_name -> pb.Sustained
	3,  // 8: pb.Batch.delfiles:type_name -> pb.DelFile
	8,  // 9: pb.Batch.dirstats:type_name -> pb.Stat
	7,  // 10: pb.Batch.quotas:type_name -> pb.Quota
	5,  // 11: pb.Batch.acls:type_name -> pb.Acl
	1,  // 12: pb.Batch.counters:type_name -> pb.Counter
	17, // 13: pb.Footer.infos:type_name -> pb.Footer.InfosEntry
	16, // 14: pb.Footer.InfosEntry.value:type_name -> pb.Footer.SegInfo
	15, // [15:15] is the sub-list for method output_type
	15, // [15:15] is the sub-list for method input_type
	15, // [15:15] is the sub-list for extension type_name
	15, // [15:15] is the sub-list for extension extendee
	0,  // [0:15] is the sub-list for field type_name
}

func init() { file_pkg_meta_pb_backup_proto_init() }
func file_pkg_meta_pb_backup_proto_init() {
	if File_pkg_meta_pb_backup_proto != nil {
		return
	}
	type x struct{}
	out := protoimpl.TypeBuilder{
		File: protoimpl.DescBuilder{
			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
			RawDescriptor: file_pkg_meta_pb_backup_proto_rawDesc,
			NumEnums:      0,
			NumMessages:   18,
			NumExtensions: 0,
			NumServices:   0,
		},
		GoTypes:           file_pkg_meta_pb_backup_proto_goTypes,
		DependencyIndexes: file_pkg_meta_pb_backup_proto_depIdxs,
		MessageInfos:      file_pkg_meta_pb_backup_proto_msgTypes,
	}.Build()
	File_pkg_meta_pb_backup_proto = out.File
	file_pkg_meta_pb_backup_proto_rawDesc = nil
	file_pkg_meta_pb_backup_proto_goTypes = nil
	file_pkg_meta_pb_backup_proto_depIdxs = nil
}


================================================
FILE: pkg/meta/pb/backup.proto
================================================
syntax = "proto3";
package pb;
option go_package = "./pb";

/*
1. install protocol buffer compiler
2. install Go protoc plugin (protoc-gen-go)
3. exec: protoc --go_out=pkg/meta pkg/meta/pb/backup.proto in main directory
*/

message Format {
  bytes data = 1; // meta.Format's json format
}

message Counter {
  string key = 1;
  int64 value = 2;
}

message Sustained {
  uint64 sid = 1;
  repeated uint64 inodes = 2;
}

message DelFile {
  uint64 inode = 1;
  uint64 length = 2;
  int64 expire = 3;
}

message SliceRef {
  uint64 id   = 1;
  uint32 size = 2;
  int64 refs  = 3;
}

message Acl {
  uint32 id = 1;
  bytes data = 2; // acl.Rule's binary format
}

message Xattr {
  uint64 inode = 1;
  string name = 2;
  bytes value = 3;
}

message Quota {
  uint64 inode = 1;
  int64 maxSpace = 2;
  int64 maxInodes = 3;
  int64 usedSpace = 4;
  int64 usedInodes = 5;
}

message Stat {
  uint64 inode = 1;
  int64 dataLength = 2;
  int64 usedSpace = 3;
  int64 usedInodes = 4;
}

message Node {
  uint64 inode = 1;
  bytes data = 2; // meta.Attr's binary format
}

message Edge {
  uint64 parent = 1;
  uint64 inode = 2;
  bytes name = 3;
  uint32 type = 4;
}

// for redis and tikv only
message Parent {
  uint64 inode = 1;
  uint64 parent = 2 ;
  int64 cnt = 3;
}

message Chunk {
  uint64 inode = 1;
  uint32 index = 2;
  bytes slices = 3; // array of meta.slice
}

message Symlink {
  uint64 inode = 1;
  bytes target = 2;
}

message Batch {
  repeated Node nodes = 1;
  repeated Edge edges = 2;
  repeated Chunk chunks = 3;
  repeated SliceRef sliceRefs = 4;
  repeated Xattr xattrs = 5;
  repeated Parent parents = 6;
  repeated Symlink symlinks = 7;
  repeated Sustained sustained = 8;
  repeated DelFile delfiles = 9;
  repeated Stat dirstats = 10;
  repeated Quota quotas = 11;
  repeated Acl acls = 12;
  repeated Counter counters = 13;
}

message Footer {
  message SegInfo {
    repeated uint64 offset = 1;
    uint64 num = 2;
  }

  uint32 magic = 1;
  uint32 version = 2;
  map<string, SegInfo> infos = 3;
}

================================================
FILE: pkg/meta/quota.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"fmt"
	"sort"
	"strconv"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/pkg/errors"
)

// stat of dir
type dirStat struct {
	length int64
	space  int64
	inodes int64
}

const (
	DirQuotaType = iota
	UserQuotaType
	GroupQuotaType
)

type Quota struct {
	MaxSpace, MaxInodes   int64
	UsedSpace, UsedInodes int64
	newSpace, newInodes   int64
}

type iQuota struct {
	qtype uint32
	qkey  uint64 // ino/uid/gid
	quota *Quota
}

// Returns true if it will exceed the quota limit
func (q *Quota) check(space, inodes int64) bool {
	if space > 0 {
		max := atomic.LoadInt64(&q.MaxSpace)
		if max > 0 && atomic.LoadInt64(&q.UsedSpace)+atomic.LoadInt64(&q.newSpace)+space > max {
			return true
		}
	}
	if inodes > 0 {
		max := atomic.LoadInt64(&q.MaxInodes)
		if max > 0 && atomic.LoadInt64(&q.UsedInodes)+atomic.LoadInt64(&q.newInodes)+inodes > max {
			return true
		}
	}
	return false
}

func (q *Quota) update(space, inodes int64) {
	atomic.AddInt64(&q.newSpace, space)
	atomic.AddInt64(&q.newInodes, inodes)
}

func (q *Quota) snap() Quota {
	return Quota{
		MaxSpace:   atomic.LoadInt64(&q.MaxSpace),
		MaxInodes:  atomic.LoadInt64(&q.MaxInodes),
		UsedSpace:  atomic.LoadInt64(&q.UsedSpace),
		UsedInodes: atomic.LoadInt64(&q.UsedInodes),
		newSpace:   atomic.LoadInt64(&q.newSpace),
		newInodes:  atomic.LoadInt64(&q.newInodes),
	}
}

// not thread safe
func (q *Quota) sanitize() {
	if q.UsedSpace < 0 {
		q.UsedSpace = 0
	}
	if q.MaxSpace > 0 && q.MaxSpace < q.UsedSpace {
		q.MaxSpace = q.UsedSpace
	}
	if q.UsedInodes < 0 {
		q.UsedInodes = 0
	}
	if q.MaxInodes > 0 && q.MaxInodes < q.UsedInodes {
		q.MaxInodes = q.UsedInodes
	}
}

func (m *baseMeta) parallelSyncDirStat(ctx Context, inos map[Ino]bool) *sync.WaitGroup {
	var wg sync.WaitGroup
	for i := range inos {
		wg.Add(1)
		go func(ino Ino) {
			defer wg.Done()
			_, st := m.en.doSyncDirStat(ctx, ino)
			if st != 0 && st != syscall.ENOENT {
				logger.Warnf("sync dir stat for %d: %s", ino, st)
			}
		}(i)
	}
	return &wg
}

func (m *baseMeta) groupBatch(batch map[Ino]dirStat, size int) [][]Ino {
	var inos []Ino
	for ino := range batch {
		inos = append(inos, ino)
	}
	sort.Slice(inos, func(i, j int) bool {
		return inos[i] < inos[j]
	})
	var batches [][]Ino
	for i := 0; i < len(inos); i += size {
		end := i + size
		if end > len(inos) {
			end = len(inos)
		}
		batches = append(batches, inos[i:end])
	}
	return batches
}

func (m *baseMeta) calcDirStat(ctx Context, ino Ino) (*dirStat, syscall.Errno) {
	var entries []*Entry
	if eno := m.en.doReaddir(ctx, ino, 1, &entries, -1); eno != 0 {
		return nil, eno
	}

	stat := new(dirStat)
	for _, e := range entries {
		if ctx.Canceled() {
			return nil, syscall.EINTR
		}
		stat.inodes += 1
		var l uint64
		if e.Attr.Typ == TypeFile {
			l = e.Attr.Length
		}
		stat.length += int64(l)
		stat.space += align4K(l)
	}
	return stat, 0
}

func (m *baseMeta) GetDirStat(ctx Context, inode Ino) (stat *dirStat, st syscall.Errno) {
	stat, st = m.en.doGetDirStat(ctx, m.checkRoot(inode), !m.conf.ReadOnly)
	if st != 0 {
		return
	}
	if stat == nil {
		stat, st = m.calcDirStat(ctx, inode)
	}
	return
}

func (m *baseMeta) updateDirStat(ctx Context, ino Ino, length, space, inodes int64) {
	if !m.getFormat().DirStats {
		return
	}
	m.dirStatsLock.Lock()
	defer m.dirStatsLock.Unlock()
	stat := m.dirStats[ino]
	stat.length += length
	stat.inodes += inodes
	stat.space += space
	m.dirStats[ino] = stat
}

func (m *baseMeta) updateParentStat(ctx Context, inode, parent Ino, length, space int64) {
	if length == 0 && space == 0 {
		return
	}
	m.en.updateStats(space, 0)
	if !m.getFormat().DirStats {
		return
	}
	if parent > 0 {
		m.updateDirStat(ctx, parent, length, space, 0)
		m.updateDirQuota(ctx, parent, space, 0)
	} else {
		go func() {
			for p, v := range m.en.doGetParents(ctx, inode) {
				m.updateDirStat(ctx, p, length*int64(v), space*int64(v), 0)
				m.updateDirQuota(ctx, p, space*int64(v), 0)
			}
		}()
	}
}

func (m *baseMeta) flushDirStat(ctx Context) {
	defer m.sessWG.Done()
	period := 1 * time.Second
	if m.conf.DirStatFlushPeriod != 0 {
		period = m.conf.DirStatFlushPeriod
	}

	ticker := time.NewTicker(period)
	defer ticker.Stop()
	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			m.doFlushDirStat()
		}
	}
}

func (m *baseMeta) doFlushDirStat() {
	if !m.getFormat().DirStats {
		return
	}
	m.dirStatsLock.Lock()
	if len(m.dirStats) == 0 {
		m.dirStatsLock.Unlock()
		return
	}
	stats := m.dirStats
	m.dirStats = make(map[Ino]dirStat)
	m.dirStatsLock.Unlock()
	err := m.en.doUpdateDirStat(Background(), stats)
	if err != nil {
		logger.Errorf("update dir stat failed: %v", err)
	}
}

func (m *baseMeta) flushStats(ctx Context) {
	defer m.sessWG.Done()
	ticker := time.NewTicker(time.Second)
	defer ticker.Stop()
	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			m.doFlushStats()
		}
	}
}

func (m *baseMeta) doFlushStats() {
	m.fsStatsLock.Lock()
	m.en.doFlushStats()
	m.fsStatsLock.Unlock()
}

func (m *baseMeta) syncVolumeStat(ctx Context) error {
	return m.en.doSyncVolumeStat(ctx)
}

func (m *baseMeta) checkQuota(ctx Context, space, inodes int64, uid, gid uint32, parents ...Ino) syscall.Errno {
	if space <= 0 && inodes <= 0 {
		return 0
	}
	if m.checkUserQuota(ctx, uint64(uid), space, inodes) {
		return syscall.EDQUOT
	}
	if m.checkGroupQuota(ctx, uint64(gid), space, inodes) {
		return syscall.EDQUOT
	}

	format := m.getFormat()
	if space > 0 && format.Capacity > 0 && atomic.LoadInt64(&m.usedSpace)+atomic.LoadInt64(&m.newSpace)+space > int64(format.Capacity) {
		return syscall.ENOSPC
	}
	if inodes > 0 && format.Inodes > 0 && atomic.LoadInt64(&m.usedInodes)+atomic.LoadInt64(&m.newInodes)+inodes > int64(format.Inodes) {
		return syscall.ENOSPC
	}
	if !format.DirStats {
		return 0
	}
	for _, ino := range parents {
		if m.checkDirQuota(ctx, ino, space, inodes) {
			return syscall.EDQUOT
		}
	}
	return 0
}

func (m *baseMeta) loadQuotas() {
	format := m.getFormat()
	if !format.DirStats && !format.UserGroupQuota {
		return
	}

	dirQuotas, userQuotas, groupQuotas, err := m.en.doLoadQuotas(Background())
	if err != nil {
		logger.Warnf("Load quotas: %s", err)
		return
	}
	m.quotaMu.Lock()
	defer m.quotaMu.Unlock()

	m.syncQuotaMaps(m.dirQuotas, dirQuotas, "inode")
	m.syncQuotaMaps(m.userQuotas, userQuotas, "user")
	m.syncQuotaMaps(m.groupQuotas, groupQuotas, "group")
}

func (m *baseMeta) syncQuotaMaps(existing map[uint64]*Quota, loaded map[uint64]*Quota, quotaType string) {
	// add new or update existing
	for key, q := range loaded {
		logger.Debugf("Load quotas got %s %d -> %+v", quotaType, key, q)
		if quota, ok := existing[key]; ok {
			atomic.SwapInt64(&quota.MaxSpace, q.MaxSpace)
			atomic.SwapInt64(&quota.MaxInodes, q.MaxInodes)
			atomic.SwapInt64(&quota.UsedSpace, q.UsedSpace)
			atomic.SwapInt64(&quota.UsedInodes, q.UsedInodes)
		} else {
			existing[key] = q
		}
	}
	// delete that are not in loaded
	if quotaType == "inode" {
		for key := range existing {
			if _, ok := loaded[key]; !ok {
				logger.Infof("Quota for %s %d is deleted", quotaType, key)
				delete(existing, key)
			}
		}
	}
}

func (m *baseMeta) getDirParent(ctx Context, inode Ino) (Ino, syscall.Errno) {
	m.parentMu.Lock()
	parent, ok := m.dirParents[inode]
	m.parentMu.Unlock()
	if ok {
		return parent, 0
	}
	logger.Debugf("Get directory parent of inode %d: cache miss", inode)
	var attr Attr
	st := m.GetAttr(ctx, inode, &attr)
	return attr.Parent, st
}

// get inode of the first parent (or myself) with quota
func (m *baseMeta) getQuotaParent(ctx Context, inode Ino) (Ino, *Quota) {
	if !m.getFormat().DirStats {
		return 0, nil
	}
	var q *Quota
	var st syscall.Errno
	for {
		m.quotaMu.RLock()
		q = m.dirQuotas[uint64(inode)]
		m.quotaMu.RUnlock()
		if q != nil {
			return inode, q
		}
		if inode <= RootInode {
			break
		}
		lastInode := inode
		if inode, st = m.getDirParent(ctx, inode); st != 0 {
			logger.Warnf("Get directory parent of inode %d: %s", lastInode, st)
			break
		}
	}
	return 0, nil
}

func (m *baseMeta) checkDirQuota(ctx Context, inode Ino, space, inodes int64) bool {
	if !m.getFormat().DirStats {
		return false
	}
	var q *Quota
	var st syscall.Errno
	for {
		m.quotaMu.RLock()
		q = m.dirQuotas[uint64(inode)]
		m.quotaMu.RUnlock()
		if q != nil && q.check(space, inodes) {
			return true
		}
		if inode <= RootInode {
			break
		}
		lastInode := inode
		if inode, st = m.getDirParent(ctx, inode); st != 0 {
			logger.Warnf("Get directory parent of inode %d: %s", lastInode, st)
			break
		}
	}
	return false
}

func (m *baseMeta) checkUserQuota(ctx Context, uid uint64, space, inodes int64) bool {
	if !m.getFormat().UserGroupQuota {
		return false
	}

	var q *Quota
	m.quotaMu.RLock()
	q, ok := m.userQuotas[uid]
	m.quotaMu.RUnlock()

	if !ok {
		return false
	}
	return q.check(space, inodes)
}

func (m *baseMeta) checkGroupQuota(ctx Context, gid uint64, space, inodes int64) bool {
	if !m.getFormat().UserGroupQuota {
		return false
	}

	var q *Quota
	m.quotaMu.RLock()
	q, ok := m.groupQuotas[gid]
	m.quotaMu.RUnlock()

	if !ok {
		return false
	}
	return q.check(space, inodes)
}

func (m *baseMeta) updateDirQuota(ctx Context, inode Ino, space, inodes int64) {
	if !m.getFormat().DirStats {
		return
	}
	var q *Quota
	var st syscall.Errno
	for {
		m.quotaMu.RLock()
		q = m.dirQuotas[uint64(inode)]
		m.quotaMu.RUnlock()
		if q != nil {
			q.update(space, inodes)
		}
		if inode <= RootInode {
			break
		}
		lastInode := inode
		if inode, st = m.getDirParent(ctx, inode); st != 0 {
			logger.Warnf("Get directory parent of inode %d: %s", lastInode, st)
			break
		}
	}
}

func (m *baseMeta) updateUserGroupStat(ctx Context, uid, gid uint32, space, inodes int64) {
	if !m.getFormat().UserGroupQuota {
		return
	}
	if (uid == 0 && gid == 0) || (space == 0 && inodes == 0) {
		return
	}
	m.quotaMu.Lock()
	if uid > 0 {
		if uq := m.userQuotas[uint64(uid)]; uq != nil {
			uq.update(space, inodes)
		} else {
			m.userQuotas[uint64(uid)] = &Quota{
				UsedSpace:  0,
				UsedInodes: 0,
				MaxSpace:   -1, // No limit
				MaxInodes:  -1,
				newSpace:   space,
				newInodes:  inodes,
			}
		}
	}
	if gid > 0 {
		if gq := m.groupQuotas[uint64(gid)]; gq != nil {
			gq.update(space, inodes)
		} else {
			m.groupQuotas[uint64(gid)] = &Quota{
				UsedSpace:  0,
				UsedInodes: 0,
				MaxSpace:   -1, // No limit
				MaxInodes:  -1,
				newSpace:   space,
				newInodes:  inodes,
			}
		}
	}
	m.quotaMu.Unlock()
}

func (m *baseMeta) flushQuotas(ctx Context) {
	defer m.sessWG.Done()
	ticker := time.NewTicker(3 * time.Second)
	defer ticker.Stop()
	for {
		select {
		case <-ctx.Done():
			return
		case <-ticker.C:
			m.doFlushQuotas()
		}
	}
}

func (m *baseMeta) collectQuotas(qtype uint32, quotas map[uint64]*Quota) []*iQuota {
	var result []*iQuota
	for key, q := range quotas {
		newSpace := atomic.LoadInt64(&q.newSpace)
		newInodes := atomic.LoadInt64(&q.newInodes)
		if newSpace != 0 || newInodes != 0 {
			result = append(result, &iQuota{
				qtype: qtype,
				qkey:  key,
				quota: &Quota{newSpace: newSpace, newInodes: newInodes},
			})
		}
	}
	return result
}

func (m *baseMeta) updateQuota(q *Quota, newSpace, newInodes int64) {
	atomic.AddInt64(&q.newSpace, -newSpace)
	atomic.AddInt64(&q.UsedSpace, newSpace)
	atomic.AddInt64(&q.newInodes, -newInodes)
	atomic.AddInt64(&q.UsedInodes, newInodes)
}

func (m *baseMeta) doFlushQuotas() {
	if !m.getFormat().DirStats && !m.getFormat().UserGroupQuota {
		return
	}

	var allQuotas []*iQuota
	m.quotaMu.RLock()
	allQuotas = append(allQuotas, m.collectQuotas(DirQuotaType, m.dirQuotas)...)
	allQuotas = append(allQuotas, m.collectQuotas(UserQuotaType, m.userQuotas)...)
	allQuotas = append(allQuotas, m.collectQuotas(GroupQuotaType, m.groupQuotas)...)
	m.quotaMu.RUnlock()

	if len(allQuotas) == 0 {
		return
	}
	if err := m.en.doFlushQuotas(Background(), allQuotas); err != nil {
		logger.Warnf("Flush quotas: %s", err)
		return
	}
	m.quotaMu.Lock()
	for _, snap := range allQuotas {
		var q *Quota
		switch snap.qtype {
		case DirQuotaType:
			q = m.dirQuotas[snap.qkey]
		case UserQuotaType:
			q = m.userQuotas[snap.qkey]
		case GroupQuotaType:
			q = m.groupQuotas[snap.qkey]
		}
		if q != nil {
			m.updateQuota(q, snap.quota.newSpace, snap.quota.newInodes)
		}
	}
	m.quotaMu.Unlock()

}

func (m *baseMeta) HandleQuota(ctx Context, cmd uint8, dpath string, uid uint32, gid uint32, quotas map[string]*Quota, strict, repair bool, create bool) error {
	var inode Ino
	if cmd != QuotaList && uid == 0 && gid == 0 {
		if st := m.resolve(ctx, dpath, &inode, create); st != 0 {
			return fmt.Errorf("resolve dir %s: %s", dpath, st)
		}
		if inode.IsTrash() {
			return errors.New("no quota for any trash directory")
		}
	}

	var key uint64
	var qtype uint32
	qtype = 0xffffffff
	if uid != 0 {
		qtype = UserQuotaType
		key = uint64(uid)
	} else if gid != 0 {
		qtype = GroupQuotaType
		key = uint64(gid)
	} else if dpath != "" {
		qtype = DirQuotaType
		key = uint64(inode)
	}

	if cmd != QuotaList && qtype == 0xffffffff {
		return fmt.Errorf("invalid quota type")
	}

	switch cmd {
	case QuotaSet:
		return m.handleQuotaSet(ctx, qtype, key, dpath, quotas, strict)
	case QuotaGet:
		return m.handleQuotaGet(ctx, qtype, key, dpath, quotas)
	case QuotaDel:
		return m.en.doDelQuota(ctx, qtype, key)
	case QuotaList:
		return m.handleQuotaList(ctx, qtype, key, quotas)
	case QuotaCheck:
		return m.handleQuotaCheck(ctx, qtype, key, dpath, strict, repair, quotas)
	default:
		return fmt.Errorf("invalid quota command: %d", cmd)
	}
}

func (m *baseMeta) handleQuotaSet(ctx Context, qtype uint32, key uint64, dpath string, quotas map[string]*Quota, strict bool) error {
	format := m.getFormat()
	var quota *Quota
	var scan bool = false
	switch qtype {
	case DirQuotaType:
		if !format.DirStats {
			format.DirStats = true
			err := m.en.doInit(format, false)
			if err != nil {
				logger.Warnf("init dir stats: %s", err)
			}
		}
		quota = quotas[dpath]
	case UserQuotaType:
		if !format.UserGroupQuota {
			format.UserGroupQuota = true
			scan = true
			err := m.en.doInit(format, false)
			if err != nil {
				logger.Warnf("init user group quota: %s", err)
			}
		}
		quota = quotas[fmt.Sprintf("uid:%d", key)]
	case GroupQuotaType:
		if !format.UserGroupQuota {
			format.UserGroupQuota = true
			scan = true
			err := m.en.doInit(format, false)
			if err != nil {
				logger.Warnf("init user group quota: %s", err)
			}
		}
		quota = quotas[fmt.Sprintf("gid:%d", key)]
	}
	if quota == nil {
		return nil
	}

	created, err := m.en.doSetQuota(ctx, qtype, uint64(key), &Quota{
		MaxSpace:   quota.MaxSpace,
		MaxInodes:  quota.MaxInodes,
		UsedSpace:  -1,
		UsedInodes: -1,
	})
	if err != nil {
		return err
	}
	if !created {
		return nil
	}
	return m.initializeQuotaUsage(ctx, qtype, key, dpath, strict, scan)
}

func (m *baseMeta) initializeQuotaUsage(ctx Context, qtype uint32, key uint64, dpath string, strict bool, scan bool) error {
	switch qtype {
	case DirQuotaType:
		wrapErr := func(e error) error {
			return errors.Wrapf(e, "set quota usage for file(%s), please repair it later", dpath)
		}

		var sum Summary
		if st := m.GetSummary(ctx, Ino(key), &sum, true, strict); st != 0 {
			return wrapErr(st)
		}

		_, err := m.en.doSetQuota(ctx, DirQuotaType, key, &Quota{
			UsedSpace:  int64(sum.Size) - align4K(0),
			UsedInodes: int64(sum.Dirs+sum.Files) - 1,
			MaxSpace:   -1,
			MaxInodes:  -1,
		})
		if err != nil {
			return wrapErr(err)
		}
		return nil
	case UserQuotaType, GroupQuotaType:
		if scan {
			return m.ScanUserGroupUsage(ctx)
		}
	}
	return nil
}

func (m *baseMeta) ScanUserGroupUsage(ctx Context) error {
	userUsage, groupUsage, err := m.scanGlobalUserGroupUsage(ctx)
	if err != nil {
		return fmt.Errorf("scan global user group usage: %v", err)
	}

	var userQuotasSnapshot map[uint64]*Quota
	var groupQuotasSnapshot map[uint64]*Quota

	m.quotaMu.Lock()
	// Reset user and group quotas
	m.userQuotas = make(map[uint64]*Quota)
	m.groupQuotas = make(map[uint64]*Quota)
	for uid, usage := range userUsage {
		m.userQuotas[uid] = &Quota{
			MaxSpace:   -1,
			MaxInodes:  -1,
			UsedSpace:  int64(usage.Size),
			UsedInodes: int64(usage.Files),
		}
	}
	for gid, usage := range groupUsage {
		m.groupQuotas[gid] = &Quota{
			MaxSpace:   -1,
			MaxInodes:  -1,
			UsedSpace:  int64(usage.Size),
			UsedInodes: int64(usage.Files),
		}
	}

	userQuotasSnapshot = make(map[uint64]*Quota)
	for uid, quota := range m.userQuotas {
		userQuotasSnapshot[uid] = &Quota{
			MaxSpace:   atomic.LoadInt64(&quota.MaxSpace),
			MaxInodes:  atomic.LoadInt64(&quota.MaxInodes),
			UsedSpace:  atomic.LoadInt64(&quota.UsedSpace),
			UsedInodes: atomic.LoadInt64(&quota.UsedInodes),
		}
	}

	groupQuotasSnapshot = make(map[uint64]*Quota)
	for gid, quota := range m.groupQuotas {
		groupQuotasSnapshot[gid] = &Quota{
			MaxSpace:   atomic.LoadInt64(&quota.MaxSpace),
			MaxInodes:  atomic.LoadInt64(&quota.MaxInodes),
			UsedSpace:  atomic.LoadInt64(&quota.UsedSpace),
			UsedInodes: atomic.LoadInt64(&quota.UsedInodes),
		}
	}
	m.quotaMu.Unlock()

	for uid, quota := range userQuotasSnapshot {
		_, err := m.en.doSetQuota(ctx, UserQuotaType, uid, quota)
		if err != nil {
			logger.Warnf("Failed to save user quota for uid %d: %v", uid, err)
		}
	}

	for gid, quota := range groupQuotasSnapshot {
		_, err := m.en.doSetQuota(ctx, GroupQuotaType, gid, quota)
		if err != nil {
			logger.Warnf("Failed to save group quota for gid %d: %v", gid, err)
		}
	}

	return nil
}

func (m *baseMeta) scanGlobalUserGroupUsage(ctx Context) (map[uint64]*Summary, map[uint64]*Summary, error) {
	userUsage := make(map[uint64]*Summary)
	groupUsage := make(map[uint64]*Summary)

	processedFiles := make(map[Ino]bool)
	visitedDirs := make(map[Ino]bool)

	dirQueue := []Ino{RootInode}
	if m.getFormat().TrashDays > 0 {
		var trashAttr Attr
		if st := m.en.doGetAttr(ctx, TrashInode, &trashAttr); st == 0 {
			dirQueue = append(dirQueue, TrashInode)
		}
	}

	for len(dirQueue) > 0 {
		currentDir := dirQueue[0]
		dirQueue = dirQueue[1:]

		var entries []*Entry
		visitedDirs[currentDir] = true
		err := m.en.doReaddir(ctx, currentDir, 1, &entries, -1)
		if err != 0 {
			logger.Warnf("readdir %d: %s", currentDir, err)
			continue
		}

		for _, e := range entries {
			if string(e.Name) == "." || string(e.Name) == ".." {
				continue
			}

			uid, gid := uint64(e.Attr.Uid), uint64(e.Attr.Gid)
			if (uid == 0 || gid == 0) && e.Attr.Typ == TypeFile {
				continue
			}

			if userUsage[uid] == nil {
				userUsage[uid] = &Summary{}
			}
			if groupUsage[gid] == nil {
				groupUsage[gid] = &Summary{}
			}

			var space int64
			var inodes int64
			if e.Attr.Typ == TypeFile {
				if e.Attr.Nlink > 1 {
					if processedFiles[e.Inode] {
						space = 0
						inodes = 0
					} else {
						space = align4K(e.Attr.Length)
						inodes = 1
						processedFiles[e.Inode] = true
					}
				} else {
					space = align4K(e.Attr.Length)
					inodes = 1
				}
			} else if e.Attr.Typ == TypeDirectory {
				space = align4K(0)
				inodes = 1
				userUsage[uid].Dirs++
				groupUsage[gid].Dirs++
				if !visitedDirs[e.Inode] {
					dirQueue = append(dirQueue, e.Inode)
				}
			}

			userUsage[uid].Size += uint64(space)
			userUsage[uid].Files += uint64(inodes)
			groupUsage[gid].Size += uint64(space)
			groupUsage[gid].Files += uint64(inodes)

		}
	}
	return userUsage, groupUsage, nil
}

func (m *baseMeta) handleQuotaGet(ctx Context, qtype uint32, key uint64, dpath string, quotas map[string]*Quota) error {
	q, err := m.en.doGetQuota(ctx, qtype, key)
	if err != nil {
		return err
	}
	if q == nil {
		return nil
	}
	switch qtype {
	case DirQuotaType:
		quotas[dpath] = q
	case UserQuotaType:
		quotas[fmt.Sprintf("uid:%d", key)] = q
	case GroupQuotaType:
		quotas[fmt.Sprintf("gid:%d", key)] = q
	}
	return nil
}

func (m *baseMeta) handleQuotaList(ctx Context, qtype uint32, key uint64, quotas map[string]*Quota) error {
	dirQuotas, userQuotas, groupQuotas, err := m.en.doLoadQuotas(ctx)
	if err != nil {
		return err
	}

	match := func(targetType uint32, k uint64, v *Quota) bool {
		if v.MaxInodes == -1 && v.MaxSpace == -1 {
			return false
		}
		return qtype == 0xffffffff || (qtype == targetType && k == key)
	}

	for ino, quota := range dirQuotas {
		if !match(DirQuotaType, ino, quota) {
			continue
		}
		if ps := m.GetPaths(ctx, Ino(ino)); len(ps) > 0 {
			quotas[ps[0]] = quota
		} else {
			quotas[fmt.Sprintf("inode:%d", ino)] = quota
		}
	}
	for uid, quota := range userQuotas {
		if match(UserQuotaType, uid, quota) {
			quotas[fmt.Sprintf("uid:%d", uid)] = quota
		}
	}
	for gid, quota := range groupQuotas {
		if match(GroupQuotaType, gid, quota) {
			quotas[fmt.Sprintf("gid:%d", gid)] = quota
		}
	}
	return nil
}

func (m *baseMeta) handleQuotaCheck(ctx Context, qtype uint32, key uint64, dpath string, strict, repair bool, quotas map[string]*Quota) error {
	q, err := m.en.doGetQuota(ctx, qtype, key)
	if err != nil {
		return err
	}
	if q == nil {
		return fmt.Errorf("no quota for inode %d path %s", key, dpath)
	}

	var sum Summary
	if st := m.GetSummary(ctx, Ino(key), &sum, true, strict); st != 0 {
		return st
	}

	usedInodes := int64(sum.Dirs+sum.Files) - 1
	usedSpace := int64(sum.Size) - align4K(0) // quota ignore root dir

	if q.UsedInodes == usedInodes && q.UsedSpace == usedSpace {
		logger.Infof("quota of %s is consistent", dpath)
		quotas[dpath] = q
		return nil
	}

	logger.Warnf(
		"%s: quota(%s, %s) != summary(%s, %s)", dpath,
		humanize.Comma(q.UsedInodes), humanize.IBytes(uint64(q.UsedSpace)),
		humanize.Comma(usedInodes), humanize.IBytes(uint64(usedSpace)),
	)

	if repair {
		q.UsedInodes = usedInodes
		q.UsedSpace = usedSpace
		quotas[dpath] = q
		logger.Info("repairing...")
		_, err = m.en.doSetQuota(ctx, qtype, key, &Quota{
			MaxInodes:  -1,
			MaxSpace:   -1,
			UsedInodes: q.UsedInodes,
			UsedSpace:  q.UsedSpace,
		})
		return err
	}

	return fmt.Errorf("quota of %s is inconsistent, please repair it with --repair flag", dpath)
}

func (m *baseMeta) updateQuotaMetrics() {
	m.quotaMu.RLock()
	dirQuotasSnapshot := make(map[uint64]Quota)
	for inode, quota := range m.dirQuotas {
		dirQuotasSnapshot[inode] = quota.snap()
	}

	userQuotasSnapshot := make(map[uint64]Quota)
	for uid, quota := range m.userQuotas {
		userQuotasSnapshot[uid] = quota.snap()
	}

	groupQuotasSnapshot := make(map[uint64]Quota)
	for gid, quota := range m.groupQuotas {
		groupQuotasSnapshot[gid] = quota.snap()
	}
	m.quotaMu.RUnlock()
	m.updateDirQuotaMetrics(dirQuotasSnapshot)
	m.updateUserQuotaMetrics(userQuotasSnapshot)
	m.updateGroupQuotaMetrics(groupQuotasSnapshot)
}

func (m *baseMeta) updateDirQuotaMetrics(dirQuotas map[uint64]Quota) {
	m.quotaMetricMu.Lock()
	defer m.quotaMetricMu.Unlock()
	for inode, quota := range dirQuotas {
		inodeStr := strconv.FormatUint(inode, 10)
		m.dirQuotaMaxSpaceG.WithLabelValues(inodeStr).Set(float64(quota.MaxSpace))
		m.dirQuotaMaxInodesG.WithLabelValues(inodeStr).Set(float64(quota.MaxInodes))
		m.dirQuotaUsedSpaceG.WithLabelValues(inodeStr).Set(float64(quota.UsedSpace))
		m.dirQuotaUsedInodesG.WithLabelValues(inodeStr).Set(float64(quota.UsedInodes))
		m.dirQuotaMetricKeys[inode] = true
	}
}

func (m *baseMeta) updateUserQuotaMetrics(userQuotas map[uint64]Quota) {
	m.quotaMetricMu.Lock()
	defer m.quotaMetricMu.Unlock()
	for uid, quota := range userQuotas {
		uidStr := strconv.FormatUint(uid, 10)
		m.userQuotaMaxSpaceG.WithLabelValues(uidStr).Set(float64(quota.MaxSpace))
		m.userQuotaMaxInodesG.WithLabelValues(uidStr).Set(float64(quota.MaxInodes))
		m.userQuotaUsedSpaceG.WithLabelValues(uidStr).Set(float64(quota.UsedSpace))
		m.userQuotaUsedInodesG.WithLabelValues(uidStr).Set(float64(quota.UsedInodes))
		m.userQuotaMetricKeys[uid] = true
	}
}

func (m *baseMeta) updateGroupQuotaMetrics(groupQuotas map[uint64]Quota) {
	m.quotaMetricMu.Lock()
	defer m.quotaMetricMu.Unlock()
	for gid, quota := range groupQuotas {
		gidStr := strconv.FormatUint(gid, 10)
		m.groupQuotaMaxSpaceG.WithLabelValues(gidStr).Set(float64(quota.MaxSpace))
		m.groupQuotaMaxInodesG.WithLabelValues(gidStr).Set(float64(quota.MaxInodes))
		m.groupQuotaUsedSpaceG.WithLabelValues(gidStr).Set(float64(quota.UsedSpace))
		m.groupQuotaUsedInodesG.WithLabelValues(gidStr).Set(float64(quota.UsedInodes))
		m.groupQuotaMetricKeys[gid] = true
	}
}

func (m *baseMeta) cleanupQuotaMetrics() {
	m.quotaMu.RLock()
	dirQuotas := make(map[uint64]Quota, len(m.dirQuotas))
	for inode, q := range m.dirQuotas {
		dirQuotas[inode] = q.snap()
	}
	userQuotas := make(map[uint64]Quota, len(m.userQuotas))
	for uid, q := range m.userQuotas {
		userQuotas[uid] = q.snap()
	}
	groupQuotas := make(map[uint64]Quota, len(m.groupQuotas))
	for gid, q := range m.groupQuotas {
		groupQuotas[gid] = q.snap()
	}
	m.quotaMu.RUnlock()

	m.quotaMetricMu.Lock()
	defer m.quotaMetricMu.Unlock()

	// directory quotas
	for inode := range m.dirQuotaMetricKeys {
		q, ok := dirQuotas[inode]
		if !ok || (q.MaxSpace <= 0 && q.MaxInodes <= 0) {
			inodeStr := strconv.FormatUint(inode, 10)
			m.dirQuotaMaxSpaceG.DeleteLabelValues(inodeStr)
			m.dirQuotaMaxInodesG.DeleteLabelValues(inodeStr)
			m.dirQuotaUsedSpaceG.DeleteLabelValues(inodeStr)
			m.dirQuotaUsedInodesG.DeleteLabelValues(inodeStr)
			delete(m.dirQuotaMetricKeys, inode)
		}
	}

	// user quotas
	for uid := range m.userQuotaMetricKeys {
		q, ok := userQuotas[uid]
		if !ok || (q.MaxSpace <= 0 && q.MaxInodes <= 0) {
			uidStr := strconv.FormatUint(uid, 10)
			m.userQuotaMaxSpaceG.DeleteLabelValues(uidStr)
			m.userQuotaMaxInodesG.DeleteLabelValues(uidStr)
			m.userQuotaUsedSpaceG.DeleteLabelValues(uidStr)
			m.userQuotaUsedInodesG.DeleteLabelValues(uidStr)
			delete(m.userQuotaMetricKeys, uid)
		}
	}

	// group quotas
	for gid := range m.groupQuotaMetricKeys {
		q, ok := groupQuotas[gid]
		if !ok || (q.MaxSpace <= 0 && q.MaxInodes <= 0) {
			gidStr := strconv.FormatUint(gid, 10)
			m.groupQuotaMaxSpaceG.DeleteLabelValues(gidStr)
			m.groupQuotaMaxInodesG.DeleteLabelValues(gidStr)
			m.groupQuotaUsedSpaceG.DeleteLabelValues(gidStr)
			m.groupQuotaUsedInodesG.DeleteLabelValues(gidStr)
			delete(m.groupQuotaMetricKeys, gid)
		}
	}
}


================================================
FILE: pkg/meta/random_test.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"flag"
	"fmt"
	"math"
	"os"
	"reflect"
	"runtime"
	"sort"
	"strings"
	"syscall"
	"testing"
	"time"
	"unicode"

	"pgregory.net/rapid"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/sirupsen/logrus"
)

type tSlice struct {
	pos  uint32
	id   uint64
	clen uint32
	off  uint32
	len  uint32
}

type tQuota struct {
	size   uint64
	inodes uint64
}

type tNode struct {
	name     string
	inode    Ino
	_type    uint8
	mode     uint16
	uid      uint32
	gid      uint32
	atime    int64
	mtime    int64
	ctime    int64
	iflags   uint8
	length   uint64
	parents  []*tNode
	hardlink bool
	chunks   map[uint32][]tSlice
	children map[string]*tNode
	target   string
	xattrs   map[string][]byte
	quota    *tQuota
	flocks   map[ownerKey]byte
	plocks   map[ownerKey][]plockRecord
	accACL   *aclAPI.Rule
	defACL   *aclAPI.Rule
}

func (n *tNode) accessMode(uid uint32, gids []uint32) uint8 {
	if uid == 0 {
		return 0x7
	}
	mode := n.mode
	if uid == n.uid {
		return uint8(mode>>6) & 7
	}
	for _, gid := range gids {
		if gid == n.gid {
			return uint8(mode>>3) & 7
		}
	}
	return uint8(mode & 7)
}

func (n *tNode) access(ctx Context, mask uint8) bool {
	if ctx.Uid() == 0 {
		return true
	}

	if n.accACL != nil && (n.mode&00070) != 0 {
		return n.accACL.CanAccess(ctx.Uid(), ctx.Gids(), n.uid, n.gid, mask)
	}

	mode := n.accessMode(ctx.Uid(), ctx.Gids())
	if mode&mask != mask {
		return false
	}
	return true
}

func (n *tNode) stickyAccess(child *tNode, uid uint32) bool {
	if uid == 0 || n.mode&01000 == 0 {
		return true
	}
	if uid == n.uid || uid == child.uid {
		return true
	}
	return false
}

type fsMachine struct {
	nodes map[Ino]*tNode
	meta  Meta
	sid   uint64
	ctx   Context
}

var metaType string
var tCounter uint64

func (m *fsMachine) Init(t *rapid.T) {
	m.sid = uint64(rapid.UintRange(1, math.MaxUint32-1).Draw(t, "sid"))
	m.nodes = make(map[Ino]*tNode)
	m.nodes[1] = &tNode{
		_type:    TypeDirectory,
		mode:     0777,
		inode:    RootInode,
		length:   4096,
		xattrs:   make(map[string][]byte),
		children: make(map[string]*tNode),
		parents:  []*tNode{{inode: RootInode, _type: TypeDirectory}},
	}
	_ = os.Remove(settingPath)
	m.meta = NewClient(metaURL, testConfig())
	m.meta.Reset()
	if err := m.meta.Init(testFormat(), true); err != nil {
		t.Fatalf("initialize failed: %s", err)
	}
	m.meta.getBase().sessCtx = Background()
	m.meta.getBase().sid = m.sid
	registry := prometheus.NewRegistry() // replace default so only JuiceFS metrics are exposed
	registerer := prometheus.WrapRegistererWithPrefix("juicefs_",
		prometheus.WrapRegistererWith(prometheus.Labels{"mp": "virtual-mp", "vol_name": "test-vol"}, registry))
	m.meta.InitMetrics(registerer)

	switch m.meta.(type) {
	case *dbMeta:
		metaType = "db"
	case *redisMeta:
		metaType = "redis"
	case *kvMeta:
		metaType = "tkv"
	}

	tCounter++
	if tCounter%50 == 0 {
		fmt.Println("current counter: ", tCounter)
	}
}

func (m *fsMachine) genName(t *rapid.T) string {
	name := rapid.StringN(1, 200, 255).Draw(t, "name")
	name = strings.ReplaceAll(name, "|", "a") // FIXME: name can't contain '|'
	name = strings.ReplaceAll(name, ".#", "aa")
	name = strings.ReplaceAll(name, "\n", "a")
	return name
}

func (m *fsMachine) Cleanup() {
	m.meta.CloseSession()
	m.meta.Reset()
	m.meta.Shutdown()
}

func (m *fsMachine) prepare(t *rapid.T) {
	// m.ctx.ts++
	uid := rapid.Uint32Range(0, 5).Draw(t, "uid")
	gid := rapid.Uint32Range(0, 5).Draw(t, "gid")
	m.ctx = NewContext(1, uid, []uint32{gid})
	// t.Logf("time: %d", m.ctx.ts)
}

func (m *fsMachine) pickNode(t *rapid.T) Ino {
	m.prepare(t)
	var inodes []Ino
	for inode := range m.nodes {
		inodes = append(inodes, Ino(inode))
	}
	sort.Slice(inodes, func(i, j int) bool { return inodes[i] < inodes[j] })
	return rapid.SampledFrom(inodes).Draw(t, "node")
}

func (m *fsMachine) create(_type uint8, parent Ino, name string, mode, umask uint16, inode Ino) syscall.Errno {
	if _type < TypeFile || _type == TypeSymlink {
		return syscall.EINVAL
	}
	if err := checkFSNodeName(name); err != 0 {
		return err
	}
	p := m.nodes[parent]
	if p == nil {
		return syscall.ENOENT
	}
	if p.children == nil {
		return syscall.ENOTDIR
	}

	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}
	if p.children[name] != nil {
		return syscall.EEXIST
	}
	n := &tNode{
		name:    name,
		_type:   _type,
		mode:    mode &^ umask,
		inode:   inode,
		uid:     m.ctx.Uid(),
		gid:     m.ctx.Gids()[0],
		parents: []*tNode{p},
		xattrs:  make(map[string][]byte),
	}

	if runtime.GOOS == "darwin" {
		n.gid = p.gid
	} else if runtime.GOOS == "linux" && p.mode&02000 != 0 {
		n.gid = p.gid
		if _type == TypeDirectory {
			p.mode |= 02000
		} else if n.mode&02010 == 02010 && m.ctx.Uid() != 0 {
			var found bool
			for _, gid := range m.ctx.Gids() {
				if gid == p.gid {
					found = true
				}
			}
			if !found {
				n.mode &= ^uint16(02000)
			}
		}
	}

	mode &= 07777
	if p.defACL != nil && _type != TypeSymlink {
		// inherit default acl
		if _type == TypeDirectory {
			n.defACL = p.defACL
		}

		// set access acl by parent's default acl
		rule := p.defACL

		if rule.IsMinimal() {
			// simple acl as default
			n.mode = mode & (0xFE00 | rule.GetMode())
		} else {
			cRule := rule.ChildAccessACL(mode)
			n.accACL = cRule
			n.mode = (mode & 0xFE00) | cRule.GetMode()
		}
	} else {
		n.mode = mode & ^umask
	}

	switch _type {
	case TypeDirectory:
		n.children = make(map[string]*tNode)
		n.length = 4 << 10
	case TypeFile:
		n.chunks = make(map[uint32][]tSlice)
	case TypeSymlink:
		n.length = uint64(len(name))
	default:
		n.length = 0
	}

	// p.mtime = m.ctx.ts
	// p.ctime = m.ctx.ts
	m.nodes[inode] = n
	p.children[name] = n
	return 0
}

func checkFSNodeName(name string) syscall.Errno {
	len := len(name)
	if len == 0 {
		return syscall.EINVAL
	}
	if len > MaxName {
		return syscall.ENAMETOOLONG
	}
	if name[0] == '.' {
		if len == 1 {
			return syscall.EINVAL
		}
		if len == 2 && name[1] == '.' {
			return syscall.EINVAL
		}
	}
	if strings.ContainsAny(name, "/\x00") {
		return syscall.EINVAL
	}
	return 0
}

func (m *fsMachine) link(parent Ino, name string, inode Ino) syscall.Errno {
	if name == "." || name == ".." {
		return syscall.EEXIST
	}
	if err := checkFSNodeName(name); err != 0 {
		return err
	}
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if n.children != nil {
		return syscall.EPERM
	}
	p := m.nodes[parent]
	if p == nil {
		return syscall.ENOENT
	}
	if p.children == nil {
		return syscall.ENOTDIR
	}
	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}
	if p.children[name] != nil {
		return syscall.EEXIST
	}
	// n.ctime = m.ctx.ts
	// p.mtime = m.ctx.ts
	// p.ctime = m.ctx.ts
	n.parents = append(n.parents, p)
	n.hardlink = true
	p.children[name] = n
	return 0
}

func (m *fsMachine) symlink(parent Ino, name string, inode Ino, target string) syscall.Errno {
	if len(target) == 0 || len(target) > SymlinkMax {
		return syscall.EINVAL
	}
	for _, c := range target {
		if c == 0 {
			return syscall.EINVAL
		}
	}
	if err := checkFSNodeName(name); err != 0 {
		return err
	}
	p := m.nodes[parent]
	if p == nil {
		return syscall.ENOENT
	}
	if p.children == nil {
		return syscall.ENOTDIR
	}
	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}
	if p.children[name] != nil {
		return syscall.EEXIST
	}
	n := &tNode{
		name:  name,
		_type: TypeSymlink,
		inode: inode,
		mode:  0777,
		uid:   m.ctx.Uid(),
		gid:   m.ctx.Gids()[0],
		// atime:   m.ctx.ts,
		// mtime:   m.ctx.ts,
		// ctime:   m.ctx.ts,
		parents: []*tNode{p},
		target:  target,
		xattrs:  make(map[string][]byte),
	}

	_type := TypeSymlink
	if runtime.GOOS == "darwin" {
		n.gid = p.gid
	} else if runtime.GOOS == "linux" && p.mode&02000 != 0 {
		n.gid = p.gid
		if _type == TypeDirectory {
			p.mode |= 02000
		} else if n.mode&02010 == 02010 && m.ctx.Uid() != 0 {
			var found bool
			for _, gid := range m.ctx.Gids() {
				if gid == p.gid {
					found = true
				}
			}
			if !found {
				n.mode &= ^uint16(02000)
			}
		}
	}

	n.length = uint64(len(target))
	// p.mtime = m.ctx.ts
	// p.ctime = m.ctx.ts
	m.nodes[inode] = n
	p.children[name] = n
	return 0
}

func (m *fsMachine) readlink(inode Ino) (string, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return "", syscall.ENOENT
	}
	if n.target == "" {
		return "", syscall.EINVAL
	}
	return n.target, 0
}

func (m *fsMachine) pickChild(parent Ino, t *rapid.T) string {
	n := m.nodes[parent]
	if len(n.children) == 0 {
		return ""
	}
	var names []string
	for name := range n.children {
		names = append(names, name)
	}
	sort.Slice(names, func(i, j int) bool { return names[i] < names[j] })
	return rapid.SampledFrom(names).Draw(t, "child")
}

func (m *fsMachine) unlink(parent Ino, name string) syscall.Errno {
	p := m.nodes[parent]
	if p == nil {
		return syscall.ENOENT
	}
	if p._type != TypeDirectory {
		return syscall.ENOTDIR
	}

	if metaType == "db" {
		if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
			return syscall.EACCES
		}
	}
	c := p.children[name]

	if c._type == TypeDirectory {
		return syscall.EPERM
	}

	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}

	if _, ok := p.children[name]; !ok {
		return syscall.ENOENT
	}
	if err := checkFSNodeName(name); err != 0 {
		return err
	}

	if !p.stickyAccess(c, m.ctx.Uid()) {
		return syscall.EACCES
	}

	delete(p.children, name)
	for i, tp := range c.parents {
		if tp == p {
			c.parents = append(c.parents[:i], c.parents[i+1:]...)
			break
		}
	}
	if len(c.parents) == 0 {
		delete(m.nodes, c.inode)
	} else {
		// c.ctime = m.ctx.ts
	}
	// p.mtime = m.ctx.ts
	// p.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) rmdir(parent Ino, name string) syscall.Errno {
	p := m.nodes[parent]
	if p == nil {
		return syscall.ENOENT
	}

	if metaType == "db" {
		if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
			return syscall.EACCES
		}
	}
	c := p.children[name]

	if c._type != TypeDirectory {
		return syscall.ENOTDIR
	}

	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}
	if _, ok := p.children[name]; !ok {
		return syscall.ENOENT
	}
	if err := checkFSNodeName(name); err != 0 {
		return err
	}

	if len(c.children) != 0 {
		return syscall.ENOTEMPTY
	}

	if !p.stickyAccess(c, m.ctx.Uid()) {
		return syscall.EACCES
	}

	delete(p.children, name)
	for i, tp := range c.parents {
		if tp == p {
			c.parents = append(c.parents[:i], c.parents[i+1:]...)
			break
		}
	}
	if len(c.parents) == 0 {
		delete(m.nodes, c.inode)
	} else {
		// c.ctime = m.ctx.ts
	}
	// p.mtime = m.ctx.ts
	// p.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) lookup(parent Ino, name string, checkPerm bool) (Ino, syscall.Errno) {
	p := m.nodes[parent]
	if checkPerm {
		if !p.access(m.ctx, MODE_MASK_X) {
			return 0, syscall.EACCES
		}
	}
	if _, ok := p.children[name]; !ok {
		return 0, syscall.ENOENT
	}
	if p == nil {
		return 0, syscall.ENOENT
	}
	if err := checkFSNodeName(name); err != 0 {
		return 0, err
	}
	//if p.children == nil {
	//	return 0, syscall.ENOENT
	//}
	if !p.access(m.ctx, MODE_MASK_X) {
		return 0, syscall.EACCES
	}
	c := p.children[name]
	if c == nil {
		return 0, syscall.ENOENT
	}
	return c.inode, 0
}

func (m *fsMachine) getattr(inode Ino) (*tNode, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return nil, syscall.ENOENT
	}
	return n, 0
}

func (m *fsMachine) doMknod(inode Ino) (*tNode, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return nil, syscall.ENOENT
	}
	return n, 0
}

func (m *fsMachine) setattr(inode Ino, attr Attr) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	// FIXME: check attr
	return 0
}

func (m *fsMachine) truncate(inode Ino, length uint64) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if n._type != TypeFile {
		return syscall.EPERM
	}
	if !n.access(m.ctx, MODE_MASK_W) {
		return syscall.EACCES
	}
	for i := range n.chunks {
		if uint64(i)*ChunkSize >= length {
			delete(n.chunks, i)
		} else if uint64(i)*ChunkSize+ChunkSize > length {
			var slices []tSlice
			for _, s := range n.chunks[i] {
				if s.pos < uint32(length-uint64(i)*ChunkSize) {
					if s.pos+s.len > uint32(length-uint64(i)*ChunkSize) {
						s.len = uint32(length-uint64(i)*ChunkSize) - s.pos
					}
					slices = append(slices, tSlice{s.pos, s.id, s.clen, s.off, s.len})
				}
			}
			n.chunks[i] = slices
		}
	}
	n.length = length
	// n.mtime = m.ctx.ts
	// n.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) fallocate(inode Ino, mode uint8, offset uint64, size uint64) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if n._type != TypeFile {
		return syscall.EPERM
	}
	if !n.access(m.ctx, MODE_MASK_W) {
		return syscall.EACCES
	}
	if offset+size > n.length {
		n.length = offset + size
	}
	// n.mtime = m.ctx.ts
	// n.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) copy_file_range(srcinode Ino, srcoff uint64, dstinode Ino, dstoff uint64, size uint64, flags uint64) syscall.Errno {
	//if srcinode == dstinode && (size == 0 || srcoff <= dstoff && dstoff < srcoff+size || dstoff < srcoff && srcoff < dstoff+size) {
	//	return syscall.EINVAL // overlap
	//}
	src := m.nodes[srcinode]
	if src == nil {
		return syscall.ENOENT
	}
	if src._type != TypeFile {
		return syscall.EINVAL
	}
	dst := m.nodes[dstinode]
	if dst == nil {
		return syscall.ENOENT
	}
	if dst._type != TypeFile {
		return syscall.EINVAL
	}
	//if !src.access(m.ctx, MODE_MASK_R) {
	//	return syscall.EACCES
	//}
	//if !dst.access(m.ctx, MODE_MASK_W) {
	//	return syscall.EACCES
	//}
	updateChunk := func(off uint64, s tSlice) {
		for s.len > 0 {
			indx := uint32(off / ChunkSize)
			pos := uint32(off % ChunkSize)
			len := uint32(ChunkSize - pos)
			if len > s.len {
				len = s.len
			}
			dst.chunks[indx] = append(dst.chunks[indx], tSlice{pos, s.id, s.clen, s.off, len})
			s.off += len
			s.len -= len
			off += uint64(len)
		}
	}
	if srcoff >= src.length {
		return 0
	}
	if srcoff+size > src.length {
		size = src.length - srcoff
	}
	if dstoff+size > dst.length {
		dst.length = dstoff + size
	}
	for size > 0 {
		indx := uint32(srcoff / ChunkSize)
		pos := uint32(srcoff % ChunkSize)
		l := uint32(ChunkSize - pos)
		if srcoff < src.length && srcoff+uint64(l) > src.length {
			l = uint32(src.length - srcoff)
		}
		if uint64(l) > size {
			l = uint32(size)
		}

		updateChunk(dstoff, tSlice{0, 0, 0, 0, l})
		var cs []tSlice
		cs = append(cs, src.chunks[indx]...) // copy
		for _, s := range cs {
			if s.pos+s.len <= pos || s.pos >= pos+l {
				continue
			}
			if s.pos+s.len > pos+l {
				s.len = pos + l - s.pos
			}
			if s.pos < pos {
				diff := pos - s.pos
				s.off += diff
				s.len -= diff
				s.pos = pos
			}
			updateChunk(dstoff+uint64(s.pos-pos), s)
		}
		srcoff += uint64(l)
		dstoff += uint64(l)
		size -= uint64(l)
	}
	// dst.mtime = m.ctx.ts
	// dst.ctime = m.ctx.ts
	return 0
}

// rmr Hint: Unlike the Rmr with the meta interface.
func (m *fsMachine) rmr(parent Ino, name string, removed *uint64) syscall.Errno {
	p := m.nodes[parent]
	if p == nil {
		return syscall.ENOENT
	}
	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}
	if p.children == nil {
		return syscall.ENOENT
	}
	if err := checkFSNodeName(name); err != 0 {
		return err
	}

	c := p.children[name]
	if c == nil {
		return syscall.ENOENT
	}

	if !p.stickyAccess(c, m.ctx.Uid()) {
		return syscall.EPERM
	}
	for n := range c.children {
		if eno := m.rmr(c.inode, n, removed); eno != 0 {
			return eno
		}
	}

	if !p.access(m.ctx, MODE_MASK_W|MODE_MASK_X) {
		return syscall.EACCES
	}

	var st syscall.Errno
	if c._type == TypeDirectory {
		st = m.rmdir(parent, name)
	} else {
		st = m.unlink(parent, name)
	}
	if st == 0 && removed != nil {
		*removed++
	}
	return 0
}

func (m *fsMachine) isancestor(a, b *tNode) bool {
	if a == b {
		return true
	}
	for _, p := range b.parents {
		if m.isancestor(a, p) {
			return true
		}
	}
	return false
}

func (m *fsMachine) rename(srcparent Ino, srcname string, dstparent Ino, dstname string, flag uint8) syscall.Errno {
	if dstparent == srcparent && dstname == srcname {
		return 0
	}
	if err := checkFSNodeName(dstname); err != 0 {
		return err
	}
	// todo: The order of condition checks in different metadata engines is inconsistent
	if metaType == "db" {
		src := m.nodes[srcparent]
		if src == nil {
			return syscall.ENOENT
		}
		dst := m.nodes[dstparent]
		if dst == nil {
			return syscall.ENOENT
		}
		if src._type != TypeDirectory || dst._type != TypeDirectory {
			return syscall.ENOTDIR
		}
		if !src.access(m.ctx, MODE_MASK_X|MODE_MASK_W) {
			return syscall.EACCES
		}
		if !dst.access(m.ctx, MODE_MASK_X|MODE_MASK_W) {
			return syscall.EACCES
		}
	}

	if metaType == "redis" {
		src := m.nodes[srcparent]
		if src == nil {
			return syscall.ENOENT
		}
		dst := m.nodes[dstparent]
		if dst == nil {
			return syscall.ENOENT
		}
		srcnode := src.children[srcname]
		if srcnode == nil {
			return syscall.ENOENT
		}
		c := dst.children[dstname]
		if c != nil {
			if srcnode._type == TypeDirectory && c._type != TypeDirectory {
				return syscall.ENOTDIR
			} else if srcnode._type != TypeDirectory && c._type == TypeDirectory {
				return syscall.EISDIR
			}
		}

	}
	src := m.nodes[srcparent]
	if src == nil {
		return syscall.ENOENT
	}
	if src.children == nil {
		return syscall.ENOTDIR
	}
	if !src.access(m.ctx, MODE_MASK_X|MODE_MASK_W) {
		return syscall.EACCES
	}

	dst := m.nodes[dstparent]
	if dst == nil {
		return syscall.ENOENT
	}
	if dst.children == nil {
		return syscall.ENOTDIR
	}
	if !dst.access(m.ctx, MODE_MASK_X|MODE_MASK_W) {
		return syscall.EACCES
	}

	srcnode := src.children[srcname]
	if srcnode == nil {
		return syscall.ENOENT
	}

	if metaType == "tkv" {
		c := dst.children[dstname]
		if c != nil {
			if srcnode._type == TypeDirectory && c._type != TypeDirectory {
				return syscall.ENOTDIR
			} else if srcnode._type != TypeDirectory && c._type == TypeDirectory {
				return syscall.EISDIR
			}
		}
	}

	if !src.stickyAccess(srcnode, m.ctx.Uid()) {
		return syscall.EACCES
	}

	// owner of a directory cannot rename subdirectories owned by other users.
	uid := m.ctx.Uid()
	if src != dst && src.mode&0o1000 != 0 && uid != 0 &&
		uid != srcnode.uid && (uid != src.uid || srcnode._type == TypeDirectory) {
		return syscall.EACCES
	}

	if c := dst.children[dstname]; c != nil {
		if c == srcnode {
			return syscall.EPERM
		}
		if flag != 0 {
		} else if srcnode._type == TypeDirectory && c._type != TypeDirectory {
			return syscall.ENOTDIR
		} else if srcnode._type != TypeDirectory && c._type == TypeDirectory {
			return syscall.EISDIR
		}
		if len(c.children) != 0 {
			return syscall.ENOTEMPTY
		}
		if dst != src || dstname != srcname {
			if !dst.stickyAccess(c, m.ctx.Uid()) {
				return syscall.EACCES
			}
			if st := m.rmr(dst.inode, dstname, nil); st != 0 {
				return st
			}
		}
	}
	for i, tp := range srcnode.parents {
		if tp == src {
			srcnode.parents[i] = dst
			break
		}
	}
	delete(src.children, srcname)
	srcnode.name = dstname
	dst.children[dstname] = srcnode
	// srcnode.ctime = m.ctx.ts
	// src.mtime = m.ctx.ts
	// src.ctime = m.ctx.ts
	// dst.mtime = m.ctx.ts
	// dst.ctime = m.ctx.ts
	return 0
}

type tEntry struct {
	name string
	node *tNode
}

func (m *fsMachine) readdir(inode Ino) ([]*tEntry, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return nil, syscall.ENOENT
	}
	if !n.access(m.ctx, MODE_MASK_R) {
		return nil, syscall.EACCES
	}
	var result []*tEntry
	result = append(result, &tEntry{
		name: ".",
		node: n,
	}, &tEntry{
		name: "..",
		node: n.parents[0],
	})

	for name, node := range n.children {
		result = append(result, &tEntry{
			name: name,
			node: node,
		})
	}
	sort.Slice(result, func(i, j int) bool { return result[i].name < result[j].name })
	return result, 0
}

func (m *fsMachine) write(inode Ino, indx uint32, pos uint32, chunkid uint64, cleng uint32, off, len uint32) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if n._type != TypeFile {
		return syscall.EPERM
	}
	if len == 0 {
		return 0
	}
	//pos = pos % ChunkSize // fix invalid pos
	//if chunkid == 0 || cleng == 0 || len == 0 || pos+len > ChunkSize || off+len > cleng {
	//	return syscall.EINVAL
	//}
	n.chunks[indx] = append(n.chunks[indx], tSlice{pos, chunkid, cleng, off, len})
	if uint64(indx)*ChunkSize+uint64(pos+len) > n.length {
		n.length = uint64(indx)*ChunkSize + uint64(pos) + uint64(len)
	}
	return 0
}

func (m *fsMachine) read(inode Ino, indx uint32) (uint64, []tSlice, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return 0, nil, syscall.ENOENT
	}
	if n._type != TypeFile {
		return 0, nil, syscall.EPERM
	}
	// if !n.access(m.ctx, MODE_MASK_R) {
	// 	return 0, nil, "", syscall.EACCES
	// }
	var ss []*slice
	var clen = make(map[uint64]uint32)
	for _, s := range n.chunks[indx] {
		ss = append(ss, &slice{id: s.id, off: s.off, len: s.len, pos: s.pos})
		clen[s.id] = s.clen
	}
	cs := buildSlice2(ss)
	for i := range cs {
		if _, ok := clen[cs[i].id]; ok {
			cs[i].clen = clen[cs[i].id]
		}
	}
	return n.length, cs, 0
}

func buildSlice2(ss []*slice) []tSlice {
	if len(ss) == 0 {
		return nil
	}
	var root *slice
	for i := range ss {
		s := new(slice)
		*s = *ss[i]
		var right *slice
		s.left, right = root.cut(s.pos)
		_, s.right = right.cut(s.pos + s.len)
		root = s
	}
	// root.optimize(1)
	var pos uint32
	var chunk []tSlice
	root.visit(func(s *slice) {
		if s.pos > pos {
			chunk = append(chunk, tSlice{pos: pos, len: s.pos - pos, clen: s.pos - pos})
			pos = s.pos
		}
		chunk = append(chunk, tSlice{pos: pos, id: s.id, off: s.off, len: s.len})
		pos += s.len
	})
	return chunk
}

func (m *fsMachine) setxattr(inode Ino, name string, value []byte, mode uint8) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	// if !xattr.check(name) {
	// 	return syscall.EINVAL
	// }
	switch mode {
	case XattrCreate:
		if n.xattrs[name] != nil {
			return syscall.EEXIST
		}
		n.xattrs[name] = value
	case XattrReplace:
		if n.xattrs[name] == nil {
			return ENOATTR
		}
		n.xattrs[name] = value
	case XattrCreateOrReplace:
		n.xattrs[name] = value
	default:
		return syscall.EINVAL
	}
	// n.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) removexattr(inode Ino, name string) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	// if !xattr.check(name) {
	// 	return syscall.EINVAL
	// }
	if n.xattrs[name] == nil {
		return ENOATTR
	}
	// n.ctime = m.ctx.ts
	delete(n.xattrs, name)
	return 0
}

func (m *fsMachine) getxattr(inode Ino, name string) ([]byte, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return nil, syscall.ENOENT
	}
	// if !xattr.check(name) {
	// 	return nil, syscall.EINVAL
	// }
	if v, ok := n.xattrs[name]; ok {
		return v, 0
	}
	return nil, ENOATTR
}

func (m *fsMachine) listxattr(inode Ino) ([]byte, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return nil, syscall.ENOENT
	}
	var names []string
	for name := range n.xattrs {
		names = append(names, name+"\x00")
	}

	if n.accACL != nil {
		names = append(names, "system.posix_acl_access"+"\x00")
	}
	if n.defACL != nil {
		names = append(names, "system.posix_acl_default"+"\x00")
	}

	sort.Slice(names, func(i, j int) bool { return names[i] < names[j] })
	r := []byte(strings.Join(names, ""))
	if len(r) > 65536 {
		return nil, syscall.ERANGE
	}
	return r, 0
}

func (m *fsMachine) Mkdir(t *rapid.T) {
	parent := m.pickNode(t)
	name := rapid.StringN(1, 200, 255).Draw(t, "name")
	mode := rapid.Uint16Range(0, 01777).Draw(t, "mode")
	if name == "." || name == ".." {
		t.Skipf("skip mkdir %s", name)
	}
	var inode Ino
	var attr Attr
	st := m.meta.Mkdir(m.ctx, parent, name, mode, 0, 0, &inode, &attr)
	t.Logf("parent ino %d, dir ino %d", parent, inode)
	//var attr2 Attr
	//m.meta.GetAttr(m.ctx, inode, &attr2)
	st2 := m.create(TypeDirectory, parent, name, mode, 0, inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) Mknod(t *rapid.T) {
	parent := m.pickNode(t)
	name := rapid.StringN(1, 200, 255).Draw(t, "name")
	if name == "." || name == ".." {
		t.Skipf("skip mknod %s", name)
	}
	_type := rapid.Uint8Range(0, TypeDirectory).Draw(t, "type")
	mode := rapid.Uint16Range(0, 01777).Draw(t, "mode")
	var inode Ino
	var attr Attr
	st := m.meta.Mknod(m.ctx, parent, name, _type, mode, 0, 0, "", &inode, &attr)
	st2 := m.create(_type, parent, name, mode, 0, inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) Link(t *rapid.T) {
	parent := m.pickNode(t)
	name := m.genName(t)
	inode := m.pickNode(t)
	st := m.meta.Link(m.ctx, inode, parent, name, nil)
	st2 := m.link(parent, name, inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}
func (m *fsMachine) Rmdir(t *rapid.T) {
	parent := m.pickNode(t)
	name := m.pickChild(parent, t)
	if name == "" {
		return
	}
	st := m.meta.Rmdir(m.ctx, parent, name)
	st2 := m.rmdir(parent, name)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) Unlink(t *rapid.T) {
	parent := m.pickNode(t)
	name := m.pickChild(parent, t)
	if name == "" {
		return
	}
	st := m.meta.Unlink(m.ctx, parent, name)
	st2 := m.unlink(parent, name)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

const SymlinkMax = 65536

func (m *fsMachine) Symlink(t *rapid.T) {
	parent := m.pickNode(t)
	name := rapid.StringN(1, 200, 255).Draw(t, "name")
	target := rapid.StringN(1, 1000, SymlinkMax+1).Draw(t, "target")
	if name == "." || name == ".." {
		t.Skipf("skip symlink %s", name)
	}
	if target == "." || target == ".." {
		t.Skipf("skip symlink %s", target)
	}
	var ti Ino
	st := m.meta.Symlink(m.ctx, parent, name, target, &ti, nil)
	st2 := m.symlink(parent, name, ti, target)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) Readlink(t *rapid.T) {
	inode := m.pickNode(t)
	var target []byte
	st := m.meta.ReadLink(m.ctx, inode, &target)
	target2, st2 := m.readlink(inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if st == 0 && string(target) != target2 {
		t.Fatalf("expect %s but got %s", target2, target)
	}
}

func (m *fsMachine) Lookup(t *rapid.T) {
	parent := m.pickNode(t)
	name := m.pickChild(parent, t)
	if name == "" {
		return
	}
	var inode Ino
	var attr Attr
	st := m.meta.Lookup(m.ctx, parent, name, &inode, &attr, true)
	inode2, st2 := m.lookup(parent, name, true)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if st == 0 && inode != inode2 {
		t.Fatalf("expect %d but got %d", inode2, inode)
	}
}

func (m *fsMachine) Getattr(t *rapid.T) {
	inode := m.pickNode(t)
	var attr Attr
	st := m.meta.GetAttr(m.ctx, inode, &attr)
	t.Logf("attr %#v", attr)
	var n *tNode
	if st == 0 {
		n = new(tNode)
		n._type = attr.Typ
		n.mode = attr.Mode
		n.uid = attr.Uid
		n.gid = attr.Gid
		// n.atime = attr.Atime
		// n.mtime = attr.Mtime
		// n.ctime = attr.Ctime
		n.length = attr.Length
	}
	n2, st2 := m.getattr(inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if n2 != nil {
		if n2._type != n._type || n2.mode != n.mode ||
			n2.uid != n.uid || n2.gid != n.gid ||
			// n2.atime != n.atime || n2.mtime != n.mtime || n2.ctime != n.ctime ||
			n2.length != n.length {
			t.Logf("expect %+v but got %+v", n2, n)
			t.Fatalf("attr not matched")
		}
	}
}

func (m *fsMachine) Rename(t *rapid.T) {
	dstName := rapid.StringN(1, 200, 255).Draw(t, "name")
	if dstName == "." || dstName == ".." {
		t.Skipf("skip name . and ..")
	}

	srcParent := m.pickNode(t)
	srcName := m.pickChild(srcParent, t)
	if srcName == "" {
		return
	}
	var srcIno Ino
	for name, n := range m.nodes[srcParent].children {
		// When the node is a hard link, name is not equal to n.name
		if srcName == name {
			srcIno = n.inode
		}
	}
	dstParent := m.pickNode(t)

	if srcIno == dstParent {
		t.Skipf("skip rename srcIno is dstParent")
	}
	// hard link
	if n, ok := m.nodes[dstParent].children[dstName]; ok && n.inode == srcIno {
		t.Skipf("skip rename srcIno is dstParent")
	}
	tmp := m.nodes[dstParent].inode
	for {
		if tmp == RootInode {
			break
		}
		if tmp == srcIno {
			t.Skipf("skip rename dstParent is subdir of srcIno")
		} else {
			tmp = m.nodes[tmp].parents[0].inode
		}
	}

	var inode Ino
	var attr Attr
	st := m.rename(srcParent, srcName, dstParent, dstName, 0)
	st2 := m.meta.Rename(m.ctx, srcParent, srcName, dstParent, dstName, 0, &inode, &attr)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st, st2)
	}
}

/*
Due to concurrency issues, the execution result of rmr is unpredictable.

	func (m *fsMachine) Rmr(t *rapid.T) {
		parent := m.pickNode(t)
		t.Logf("rmr parent ino %d", parent)
		name := m.pickChild(parent, t)
		var removed, removed2 uint64
		st := m.meta.Remove(m.ctx, parent, name, &removed)
		st2 := m.rmr(parent, name, &removed2)
		if st != st2 {
			t.Fatalf("expect %s but got %s", st2, st)
		}
		if removed != removed2 {
			t.Fatalf("expect removed %d but got %d", removed2, removed)
		}
	}
*/

func (m *fsMachine) Readdir(t *rapid.T) {
	inode := m.pickNode(t)
	var names []string
	var result []*Entry
	st := m.meta.Readdir(m.ctx, inode, 0, &result)
	if st == 0 {
		for _, e := range result {
			names = append(names, string(e.Name))
		}
		sort.Strings(names)
	}
	stdRes, st2 := m.readdir(inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	var names2 []string
	for _, entry := range stdRes {
		names2 = append(names2, entry.name)
	}
	if st == 0 && !reflect.DeepEqual(names, names2) {
		t.Fatalf("expect %+v but got %+v", names2, names)
	}
}

// Truncate is currently disabled.
// FIXME: The comparison of the truncate results requires compacting all slices,
// and some tricky processing are required on results.
//func (m *fsMachine) Truncate(t *rapid.T) {
//	inode := m.pickNode(t)
//	length := rapid.Uint64Range(0, 500<<20).Draw(t, "length")
//	var attr Attr
//	st := m.meta.Truncate(m.ctx, inode, 0, length, &attr, false)
//	st2 := m.truncate(inode, length)
//	if st != st2 {
//		t.Fatalf("expect %s but got %s", st2, st)
//	}
//}

func (m *fsMachine) Fallocate(t *rapid.T) {
	inode := m.pickNode(t)
	offset := rapid.Uint64Range(0, 500<<20).Draw(t, "offset")
	length := rapid.Uint64Range(1, 500<<20).Draw(t, "length")
	st := m.meta.Fallocate(m.ctx, inode, 0, offset, length, nil)
	st2 := m.fallocate(inode, 0, offset, length)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

// CopyFileRange is currently disabled, same reason as Truncate.
//func (m *fsMachine) CopyFileRange(t *rapid.T) {
//	srcinode := m.pickNode(t)
//	srcoff := rapid.Uint64Max(m.nodes[srcinode].length).Draw(t, "srcoff")
//	dstinode := m.pickNode(t)
//	dstoff := rapid.Uint64Max(m.nodes[dstinode].length).Draw(t, "dstoff")
//	size := rapid.Uint64Max(m.nodes[srcinode].length).Draw(t, "size")
//	var copied uint64
//	st := m.meta.CopyFileRange(m.ctx, srcinode, srcoff, dstinode, dstoff, size, 0, &copied)
//	st2 := m.copy_file_range(srcinode, srcoff, dstinode, dstoff, size, 0)
//	if st != st2 {
//		t.Fatalf("expect %s but got %s", st2, st)
//	}
//}

func (m *fsMachine) getPath(inode Ino) string {
	n := m.nodes[inode]
	if n == nil {
		return ""
	}
	if len(n.parents) == 0 {
		return "/"
	}
	p := n.parents[0]
	for name, t := range p.children {
		if t == n {
			return m.getPath(p.inode) + "/" + name
		}
	}
	panic("unreachable")
}

func (m *fsMachine) Write(t *rapid.T) {
	inode := m.pickNode(t)
	indx := rapid.Uint32Range(0, 10).Draw(t, "indx")
	pos := rapid.Uint32Range(0, ChunkSize).Draw(t, "pos")
	var chunkid uint64
	m.meta.NewSlice(m.ctx, &chunkid)
	cleng := rapid.Uint32Range(1, ChunkSize).Draw(t, "cleng")
	off := rapid.Uint32Range(0, cleng-1).Draw(t, "off")
	len := rapid.Uint32Range(1, cleng-off).Draw(t, "len")
	st := m.meta.Write(m.ctx, inode, indx, pos, Slice{chunkid, cleng, off, len}, time.Time{})
	st2 := m.write(inode, indx, pos, chunkid, cleng, off, len)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) Read(t *rapid.T) {
	inode := m.pickNode(t)
	indx := rapid.Uint32Range(0, 10).Draw(t, "indx")
	var result []Slice
	st := m.meta.Read(m.ctx, inode, indx, &result)
	var slices []tSlice
	if st == 0 {
		var pos uint32
		for _, so := range result {
			s := tSlice{pos, so.Id, so.Size, so.Off, so.Len}
			slices = append(slices, s)
			pos += slices[len(slices)-1].len
		}
	}
	_, slices2, st2 := m.read(inode, indx)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if st == 0 && !reflect.DeepEqual(cleanupSlices(slices), cleanupSlices(slices2)) {
		t.Fatalf("expect %+v but got %+v", slices2, slices)
	}
}

func cleanupSlices(ss []tSlice) []tSlice {
	for i := 0; i < len(ss); i++ {
		s := ss[i]
		if s.id == 0 && s.off > 0 {
			s.off = 0
			ss[i] = s
		}
		if ss[i].id == 0 && i > 0 && ss[i-1].id == 0 {
			ss[i-1].len += ss[i].len
			ss = append(ss[:i], ss[i+1:]...)
			i--
		}
	}
	for len(ss) > 0 && ss[len(ss)-1].id == 0 {
		ss = ss[:len(ss)-1]
	}
	if len(ss) == 0 {
		ss = nil
	}
	return ss
}

func (m *fsMachine) SetXAttr(t *rapid.T) {
	inode := m.pickNode(t)
	name := string(rapid.SliceOfN(rapid.RuneFrom(nil, unicode.Lu), 1, XATTR_NAME_MAX).Draw(t, "name"))
	value := rapid.SliceOfN(rapid.Byte(), 1, XATTR_SIZE_MAX+1).Draw(t, "value")
	mode := rapid.Uint8Range(0, XATTR_REMOVE).Draw(t, "mode")
	st := m.meta.SetXattr(m.ctx, inode, name, value, uint32(mode))
	st2 := m.setxattr(inode, name, value, mode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) RemoveXattr(t *rapid.T) {
	inode := m.pickNode(t)
	name := rapid.StringN(1, 200, XATTR_NAME_MAX+1).Draw(t, "name")
	st := m.meta.RemoveXattr(m.ctx, inode, name)
	st2 := m.removexattr(inode, name)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

const XATTR_REMOVE = 5
const XATTR_NAME_MAX = 255
const XATTR_SIZE_MAX = 65536

func (m *fsMachine) GetXAttr(t *rapid.T) {
	inode := m.pickNode(t)
	name := string(rapid.SliceOfN(rapid.RuneFrom(nil, unicode.Lu), 1, XATTR_NAME_MAX+1).Draw(t, "name"))
	var value []byte
	st := m.meta.GetXattr(m.ctx, inode, name, &value)
	value2, st2 := m.getxattr(inode, name)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if st == 0 && string(value) != string(value2) {
		t.Fatalf("expect %s but got %s", string(value2), string(value))
	}
}

func (m *fsMachine) ListXAttr(t *rapid.T) {
	inode := m.pickNode(t)
	var attrs []byte
	st := m.meta.ListXattr(m.ctx, inode, &attrs)
	attrs2, st2 := m.listxattr(inode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	as := strings.Split(string(attrs), "\x00")
	sort.Strings(as)
	as2 := strings.Split(string(attrs2), "\x00")
	sort.Strings(as2)
	if st == 0 && !reflect.DeepEqual(as, as2) {
		t.Fatalf("expect %s but got %s", string(attrs2), string(attrs))
	}
}

func (m *fsMachine) Check(t *rapid.T) {
	m.ctx = NewContext(0, 0, []uint32{0})
	if err := m.checkFSTree(RootInode); err != nil {
		t.Fatalf("check FSTree error %s", err)
	}
}

func (m *fsMachine) checkFSTree(root Ino) error {
	var result []*Entry
	if st := m.meta.Readdir(m.ctx, root, 1, &result); st != 0 {
		return fmt.Errorf("meta readdir error %s", st)
	}
	sort.Slice(result, func(i, j int) bool { return string(result[i].Name) < string(result[j].Name) })

	stdResult, st := m.readdir(root)
	if st != 0 {
		return fmt.Errorf("standard meta readdir error %d", st)
	}
	if len(result) != len(stdResult) {
		return fmt.Errorf("the results of reading the directory should have equal lengths. standard meta: %#v test meta: %#v", stdResult, result)
	}
	for i := 0; i < len(result); i++ {
		stdEntry := stdResult[i]
		stdNode := stdEntry.node
		entry := result[i]
		if stdEntry.name == "." || stdEntry.name == ".." {
			continue
		}
		if stdEntry.name != string(entry.Name) {
			return fmt.Errorf("name should equal. ino %d standard meta: %s, test meta %s", stdNode.inode, stdNode.name, string(entry.Name))
		}
		if stdNode._type != entry.Attr.Typ {
			return fmt.Errorf("type should equal ino: %d, standard meta: %d, test meta %d", entry.Inode, stdNode._type, entry.Attr.Typ)
		}
		switch entry.Attr.Typ {
		case TypeDirectory:
			if err := m.checkFSTree(entry.Inode); err != nil {
				return err
			}
		default:
			if stdNode.inode != entry.Inode {
				return fmt.Errorf("inode should equal. standard meta: %d, test meta %d", stdNode.inode, entry.Inode)
			}
			if stdNode.gid != entry.Attr.Gid {
				return fmt.Errorf("gid should equal. ino %d standard meta: %d, test meta %d", stdNode.inode, stdNode.gid, entry.Attr.Gid)
			}
			if stdNode.uid != entry.Attr.Uid {
				return fmt.Errorf("uid should equal. ino %d standard meta: %d, test meta %d", stdNode.inode, stdNode.uid, entry.Attr.Uid)
			}
			if stdNode.length != entry.Attr.Length {
				return fmt.Errorf("length should equal. ino %d standard meta: %d, test meta %d", stdNode.inode, stdNode.length, entry.Attr.Length)
			}
			if stdNode.iflags != entry.Attr.Flags {
				return fmt.Errorf("flags should equal. ino %d standard meta: %d, test meta %d", stdNode.inode, stdNode.iflags, entry.Attr.Flags)
			}
			if stdNode.mode != entry.Attr.Mode {
				return fmt.Errorf("mode should equal. ino %d standard meta: %d, test meta %d", stdNode.inode, stdNode.mode, entry.Attr.Mode)
			}
			// If a hard link has been set, the parent will be cleared.
			if !stdNode.hardlink {
				if stdNode.parents[0].inode != entry.Attr.Parent {
					return fmt.Errorf("parent should equal. ino %d standard meta: %d, test meta %d", stdNode.inode, stdNode.parents[0].inode, entry.Attr.Parent)
				}
			}

			// check chunks
			for indx := range stdNode.chunks {
				var rs []Slice
				st := m.meta.Read(m.ctx, stdNode.inode, indx, &rs)
				var slices []tSlice
				if st == 0 {
					var pos uint32
					for _, so := range rs {
						s := tSlice{pos, so.Id, so.Size, so.Off, so.Len}
						slices = append(slices, s)
						pos += slices[len(slices)-1].len
					}
				}
				_, slices2, st2 := m.read(stdNode.inode, indx)
				if st != st2 {
					return fmt.Errorf("read eno should equal. standard meta ino %d ,indx %d std meta eno %d test meta eno %d", stdNode.inode, indx, st2, st)
				}
				if st == 0 && !reflect.DeepEqual(cleanupSlices(slices), cleanupSlices(slices2)) {
					return fmt.Errorf("slice should equal. standard meta %+v test meta %+v", slices2, slices)
				}
			}

			// check symlink
			var target []byte
			st := m.meta.ReadLink(m.ctx, stdNode.inode, &target)
			target2, st2 := m.readlink(stdNode.inode)
			if st != st2 {
				return fmt.Errorf("readlink eno should equal. standard meta ino %d stadndard meta %d test meta %d", stdNode.inode, st2, st)
			}
			if st == 0 && string(target) != target2 {
				return fmt.Errorf("symlink should equal. standard meta ino %d stadndard meta %s test meta %s", stdNode.inode, target2, string(target))
			}

			// check xattr
			var attrs []byte
			st = m.meta.ListXattr(m.ctx, stdNode.inode, &attrs)
			attrs2, st2 := m.listxattr(stdNode.inode)
			if st != st2 {
				return fmt.Errorf("listxattr eno should equal. standard meta ino %d stadndard meta %d test meta %d", stdNode.inode, st2, st)
			}
			as := strings.Split(string(attrs), "\x00")
			sort.Strings(as)
			as2 := strings.Split(string(attrs2), "\x00")
			sort.Strings(as2)
			if st == 0 && !reflect.DeepEqual(as, as2) {
				return fmt.Errorf("listxattr should equal. standard meta ino %d stadndard meta %s test meta %s", stdNode.inode, as2, as)
			}
		}
	}
	return nil
}

func (m *fsMachine) setfacl(inode Ino, atype uint8, rule *aclAPI.Rule) syscall.Errno {
	if atype != aclAPI.TypeAccess && atype != aclAPI.TypeDefault {
		return syscall.EINVAL
	}
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if m.ctx.Uid() != 0 && m.ctx.Uid() != n.uid {
		return syscall.EPERM
	}

	if rule.IsEmpty() {
		if atype == aclAPI.TypeDefault {
			n.defACL = nil
			m.removexattr(inode, "system.posix_acl_default")
		}
		// TODO: update ctime
		return 0
	}

	if rule.IsMinimal() && atype == aclAPI.TypeAccess {
		n.accACL = nil
		n.mode &= 07000
		n.mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7)
		return 0
	}

	rule.InheritPerms(n.mode)
	if atype == aclAPI.TypeAccess {
		n.accACL = rule
		if n.accACL.GetMode() != n.mode&0777 {
			n.mode = n.mode&07000 | n.accACL.GetMode()
		}
	} else {
		n.defACL = rule
	}
	return 0
}

func (m *fsMachine) Setfacl(t *rapid.T) {
	inode := m.pickNode(t)
	atype := rapid.Uint8Range(1, 2).Draw(t, "atype")
	user := rapid.Uint16Range(0, 7).Draw(t, "user")
	group := rapid.Uint16Range(0, 7).Draw(t, "group")
	other := rapid.Uint16Range(0, 7).Draw(t, "other")
	mask := rapid.Uint16Range(0, 7).Draw(t, "mask")
	var users aclAPI.Entries
	var groups aclAPI.Entries

	us := rapid.IntRange(0, 3).Draw(t, "users")
	for i := 0; i < us; i++ {
		users = append(users, aclAPI.Entry{Id: rapid.Uint32Range(1, 5).Draw(t, "uid"), Perm: rapid.Uint16Range(0, 7).Draw(t, "perm")})
	}
	gs := rapid.IntRange(0, 3).Draw(t, "groups")
	for i := 0; i < gs; i++ {
		groups = append(groups, aclAPI.Entry{Id: rapid.Uint32Range(1, 5).Draw(t, "gid"), Perm: rapid.Uint16Range(0, 7).Draw(t, "perm")})
	}
	rule := &aclAPI.Rule{
		Owner:       user,
		Group:       group,
		Mask:        mask,
		Other:       other,
		NamedUsers:  users,
		NamedGroups: groups,
	}

	st := m.meta.SetFacl(m.ctx, inode, atype, rule)
	st2 := m.setfacl(inode, atype, rule)

	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) getfacl(inode Ino, atype uint8) (*aclAPI.Rule, syscall.Errno) {
	n := m.nodes[inode]
	if n == nil {
		return nil, syscall.ENOENT
	}
	switch atype {
	case aclAPI.TypeAccess:
		if n.accACL == nil {
			return nil, ENOATTR
		}
		return n.accACL, 0
	case aclAPI.TypeDefault:
		if n.defACL == nil {
			return nil, ENOATTR
		}
		return n.defACL, 0
	default:
		return nil, syscall.EINVAL
	}
}

func (m *fsMachine) GetACL(t *rapid.T) {
	inode := m.pickNode(t)
	atype := rapid.Uint8Range(1, 2).Draw(t, "atype")

	rule := &aclAPI.Rule{}
	st := m.meta.GetFacl(m.ctx, inode, atype, rule)
	rule2, st2 := m.getfacl(inode, atype)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if st == 0 && !rule.IsEqual(rule2) {
		t.Fatalf("expect %+v but got %+v, %t", rule2, rule, reflect.DeepEqual(rule, *rule2))
	}
}

func (m *fsMachine) RemoveACL(t *rapid.T) {
	inode := m.pickNode(t)
	atype := rapid.Uint8Range(1, 2).Draw(t, "atype")

	var rule *aclAPI.Rule
	if atype == aclAPI.TypeAccess {
		rule = &aclAPI.Rule{
			Mask: 0xFFFF,
		}
		rule.InheritPerms(m.nodes[inode].mode)
	} else {
		rule = aclAPI.EmptyRule()
	}

	st := m.meta.SetFacl(m.ctx, inode, atype, rule)
	st2 := m.setfacl(inode, atype, rule)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (n *tNode) stat(visited map[Ino]struct{}) (uint64, uint64) {
	if _, ok := visited[n.inode]; ok {
		return 0, 0
	}
	visited[n.inode] = struct{}{}

	var size uint64 = uint64(align4K(n.length))
	var inodes uint64 = 1
	if n._type == TypeDirectory {
		for _, c := range n.children {
			s, i := c.stat(visited)
			size += s
			inodes += i
		}
	}
	return size, inodes
}

func (m *fsMachine) statfs(format Format) (uint64, uint64, uint64, uint64) {
	n := m.nodes[RootInode]
	visited := make(map[Ino]struct{})
	used, iused := n.stat(visited)
	used -= uint64(align4K(0))
	iused -= 1
	var avail, iavail uint64
	avail = 1<<50 - used
	iavail = 10 << 20
	// if inodes is not limited in Format, iavail always keep the same number of inodes
	if format.Inodes > 0 {
		iavail -= iused
	}
	if n.quota != nil {
		if n.quota.size > 0 {
			if used > n.quota.size {
				avail = 0
			} else {
				avail = n.quota.size - used
			}
		}
		if n.quota.inodes > 0 {
			if iused > n.quota.inodes {
				iavail = 0
			} else {
				iavail = uint64(n.quota.inodes) - iused
			}
		}
	}
	return used + avail, avail, iused, iavail
}

func (m *fsMachine) StatFS(t *rapid.T) {
	var totalsize, availspace, iused, iavail uint64
	m.meta.StatFS(m.ctx, RootInode, &totalsize, &availspace, &iused, &iavail)
	total2, avail2, iused2, iavail2 := m.statfs(m.meta.GetFormat())
	if totalsize != total2 || availspace != avail2 || iused != iused2 || iavail != iavail2 {
		t.Fatalf("expect %d %d %d %d but got %d %d %d %d", total2, avail2, iused2, iavail2, totalsize, availspace, iused, iavail)
	}
}

func (m *fsMachine) amtime(inode Ino, flag uint16, atime, mtime int64, oattr *Attr) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}

	changed := false
	if flag&SetAttrAtime != 0 {
		n.atime = atime
		changed = changed || oattr.Atime != atime
	}
	if flag&SetAttrMtime != 0 {
		n.mtime = mtime
		changed = changed || oattr.Mtime != mtime
	}

	if changed {
		if n.uid == 0 && m.ctx.Uid() != 0 {
			return syscall.EPERM
		}
		if ok := n.access(m.ctx, MODE_MASK_W); !ok && n.uid != m.ctx.Uid() {
			return syscall.EACCES
		}
	}
	// TODO ctime
	return 0
}

func (m *fsMachine) SetAmtime(t *rapid.T) {
	inode := m.pickNode(t)

	oattr := &Attr{}
	if st := m.meta.GetAttr(m.ctx, inode, oattr); st != 0 {
		return
	}

	atime := rapid.Int64Range(0, 1e8).Draw(t, "atime")
	mtime := rapid.Int64Range(0, 1e8).Draw(t, "mtime")
	var flag uint16
	attr := &Attr{
		Atime: atime,
		Mtime: mtime,
	}

	if atime > 0 {
		flag |= SetAttrAtime
	}
	if mtime > 0 {
		flag |= SetAttrMtime
	}
	st2 := m.amtime(inode, flag, atime, mtime, oattr)
	st := m.meta.SetAttr(m.ctx, inode, flag, 0, attr)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}

	if st == 0 {
		// validate time only here
		node := m.nodes[inode]
		if flag&SetAttrAtime != 0 && attr.Atime != node.atime {
			t.Fatalf("expect %d but got %d", node.atime, attr.Atime)
		}
		if flag&SetAttrMtime != 0 && attr.Mtime != node.mtime {
			t.Fatalf("expect %d but got %d", node.mtime, attr.Mtime)
		}
	}
}

func (m *fsMachine) chmod(inode Ino, mode uint16) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if n.accACL != nil {
		n.accACL.SetMode(mode)
		n.mode = mode&07000 | n.accACL.GetMode()
	} else {
		if m.ctx.Uid() != 0 && m.ctx.Uid() != n.uid &&
			(n.mode&01777 != mode&01777 || mode&02000 > n.mode&02000 || mode&04000 > n.mode&04000) {
			return syscall.EPERM
		}
		n.mode = mode
	}
	// n.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) Chmod(t *rapid.T) {
	inode := m.pickNode(t)
	mode := rapid.Uint16Range(0, 01777).Draw(t, "mode")
	st := m.meta.SetAttr(m.ctx, inode, SetAttrMode, 0, &Attr{Mode: mode})
	st2 := m.chmod(inode, mode)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) chown(inode Ino, flag uint16, uid, gid uint32) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if flag&SetAttrUID != 0 && n.uid != uid {
		if m.ctx.Uid() != 0 {
			return syscall.EPERM
		}
		n.uid = uid
	}
	if flag&SetAttrGID != 0 {
		if m.ctx.Uid() != 0 && m.ctx.Uid() != n.uid {
			return syscall.EPERM
		}
		if n.gid != gid {
			if m.ctx.CheckPermission() && m.ctx.Uid() != 0 && !containsGid(m.ctx, gid) {
				return syscall.EPERM
			}
			n.gid = gid
		}
	}
	// n.ctime = m.ctx.ts
	return 0
}

func (m *fsMachine) Chown(t *rapid.T) {
	inode := m.pickNode(t)
	uid := rapid.Uint32Range(0, 10).Draw(t, "uid")
	gid := rapid.Uint32Range(0, 10).Draw(t, "gid")
	var flag uint16
	if uid < 10 {
		flag |= SetAttrUID
	}
	if gid < 10 {
		flag |= SetAttrGID
	}
	st := m.meta.SetAttr(m.ctx, inode, flag, 0, &Attr{Uid: uid, Gid: gid})
	st2 := m.chown(inode, flag, uid, gid)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

func (m *fsMachine) flock(inode Ino, owner uint64, typ uint32) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	// m.openfiles[inode] = true
	if n.flocks == nil {
		n.flocks = make(map[ownerKey]byte)
	}
	lowner := ownerKey{Sid: m.sid, Owner: owner}
	switch typ {
	case F_UNLCK:
		delete(n.flocks, lowner)
	case F_RDLCK:
		for o, l := range n.flocks {
			if l == 'W' && o != lowner {
				return syscall.EAGAIN
			}
		}
		n.flocks[lowner] = 'R'
	case F_WRLCK:
		for o := range n.flocks {
			if o == lowner {
				continue
			}
			return syscall.EAGAIN
		}
		n.flocks[lowner] = 'W'
	default:
		return syscall.EINVAL
	}
	return 0
}

func (m *fsMachine) Flock(t *rapid.T) {
	inode := m.pickNode(t)
	owner := rapid.Uint64().Draw(t, "owner")
	typ := rapid.SampledFrom([]uint32{F_WRLCK, F_RDLCK, F_UNLCK}).Draw(t, "typ")
	st := m.flock(inode, owner, typ)
	st2 := m.meta.Flock(m.ctx, inode, owner, typ, false)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st, st2)
	}

	if st == 0 {
		plocks1, flocks1, err1 := m.meta.ListLocks(m.ctx, inode)
		plocks2, flocks2, err2 := m.listLocks(inode)
		if err1 != nil && err2 == nil || err1 == nil && err2 != nil {
			t.Fatalf("expect %s but got %s", err2, err1)
		}
		if err1 == nil {
			sort.Slice(flocks1, func(i, j int) bool {
				return flocks1[i].Owner < flocks1[j].Owner
			})
			sort.Slice(flocks2, func(i, j int) bool {
				return flocks2[i].Owner < flocks2[j].Owner
			})
			if !compareLocks(flocks1, flocks2) {
				t.Fatalf("expect %+v but got %+v", flocks2, flocks1)
			}
			sort.Slice(plocks1, func(i, j int) bool {
				if plocks1[i].Owner != plocks1[j].Owner {
					return plocks1[i].Owner < plocks1[j].Owner
				}
				if plocks1[i].Start != plocks1[j].Start {
					return plocks1[i].Start < plocks1[j].Start
				}
				return plocks1[i].End < plocks1[j].End
			})
			sort.Slice(plocks2, func(i, j int) bool {
				if plocks2[i].Owner != plocks2[j].Owner {
					return plocks2[i].Owner < plocks2[j].Owner
				}
				if plocks2[i].Start != plocks2[j].Start {
					return plocks2[i].Start < plocks2[j].Start
				}
				return plocks2[i].End < plocks2[j].End
			})
			if !compareLocks(plocks1, plocks2) {
				t.Fatalf("expect %+v but got %+v", plocks2, plocks1)
			}
		}
	}
}

func (m *fsMachine) listLocks(inode Ino) ([]PLockItem, []FLockItem, error) {
	var flocks []FLockItem
	var plocks []PLockItem
	n := m.nodes[inode]
	if n == nil {
		return plocks, flocks, syscall.ENOENT
	}
	for o, l := range n.flocks {
		flocks = append(flocks, FLockItem{ownerKey: ownerKey{
			Sid:   o.Sid,
			Owner: o.Owner,
		}, Type: string(l)})
	}
	for o, ls := range n.plocks {
		for _, l := range ls {
			plocks = append(plocks, PLockItem{ownerKey: ownerKey{
				Sid:   o.Sid,
				Owner: o.Owner,
			}, plockRecord: l})
		}
	}
	return plocks, flocks, nil
}

func compareLocks[T any](l1, l2 []T) bool {
	if len(l1) == 0 && len(l2) == 0 {
		return true
	}
	return reflect.DeepEqual(l1, l2)
}

func (m *fsMachine) ListLocks(t *rapid.T) {
	inode := m.pickNode(t)
	plocks1, flocks1, err1 := m.meta.ListLocks(m.ctx, inode)
	plocks2, flocks2, err2 := m.listLocks(inode)
	if err1 != nil && err2 == nil || err1 == nil && err2 != nil {
		t.Fatalf("expect %s but got %s", err2, err1)
	}
	if err1 == nil {
		// sort flocks by owner
		sort.Slice(flocks1, func(i, j int) bool {
			return flocks1[i].Owner < flocks1[j].Owner
		})
		sort.Slice(flocks2, func(i, j int) bool {
			return flocks2[i].Owner < flocks2[j].Owner
		})
		if !compareLocks(flocks1, flocks2) {
			t.Fatalf("expect %+v but got %+v", flocks2, flocks1)
		}
		// sort plocks by owner
		sort.Slice(plocks1, func(i, j int) bool {
			return plocks1[i].Owner < plocks1[j].Owner
		})
		sort.Slice(plocks2, func(i, j int) bool {
			return plocks2[i].Owner < plocks2[j].Owner
		})
		if !compareLocks(plocks1, plocks2) {
			t.Fatalf("expect %+v but got %+v", plocks2, plocks1)
		}
	}
}

func (m *fsMachine) getlk(inode Ino, owner uint64, ltype *uint32, start *uint64, end *uint64, pid *uint32) syscall.Errno {
	if *ltype == F_UNLCK {
		*start = 0
		*end = 0
		*pid = 0
		return 0
	}
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	for o, ls := range n.plocks {
		for _, l := range ls {
			if o.Owner != owner && (*ltype == F_WRLCK || l.Type == F_WRLCK) && *end >= l.Start && *start <= l.End {
				*ltype = l.Type
				*start = l.Start
				*end = l.End
				if o.Sid == m.sid {
					*pid = l.Pid
				} else {
					*pid = 0
				}
				return 0
			}
		}
	}
	*ltype = F_UNLCK
	*start = 0
	*end = 0
	*pid = 0
	return 0
}

func (m *fsMachine) Getlk(t *rapid.T) {
	inode := m.pickNode(t)
	owner := rapid.Uint64().Draw(t, "owner")
	ltype := rapid.Uint32Range(0, 2).Draw(t, "ltype")
	start := rapid.Uint64Range(0, 500<<20).Draw(t, "start")
	length := rapid.Uint64Range(1, 500<<20).Draw(t, "len")
	end := start + length - 1

	var pid1, pid2 uint32
	ftype1, ftype2 := ltype, ltype
	fstart1, fstart2 := start, start
	fend1, fend2 := end, end
	st := m.getlk(inode, owner, &ftype1, &fstart1, &fend1, &pid1)
	st2 := m.meta.Getlk(m.ctx, inode, owner, &ftype2, &fstart2, &fend2, &pid2)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
	if st == 0 && ltype != F_UNLCK && (ftype1 == F_UNLCK && ftype2 != F_UNLCK || ftype1 != F_UNLCK && ftype2 == F_UNLCK) {
		t.Fatalf("status not right, %d %d", ftype1, ftype2)
	}
}

func (m *fsMachine) setlk(inode Ino, owner uint64, ltype uint32, start uint64, end uint64, pid uint32) syscall.Errno {
	n := m.nodes[inode]
	if n == nil {
		return syscall.ENOENT
	}
	if ltype != F_UNLCK {
		// m.openfiles[inode] = true
	}
	if n.plocks == nil {
		n.plocks = make(map[ownerKey][]plockRecord)
	}
	lowner := ownerKey{Sid: m.sid, Owner: owner}
	if ltype == F_UNLCK {
		if n.plocks[lowner] == nil {
			return 0
		}
	} else {
		for o, ls := range n.plocks {
			for _, l := range ls {
				if o != lowner && (ltype == F_WRLCK || l.Type == F_WRLCK) && end >= l.Start && start <= l.End {
					return syscall.EAGAIN
				}
			}
		}
	}
	ls := updateLocks(n.plocks[lowner], plockRecord{ltype, pid, start, end})
	if len(ls) == 0 {
		delete(n.plocks, lowner)
	} else {
		n.plocks[lowner] = ls
	}
	return 0
}

func (m *fsMachine) Setlk(t *rapid.T) {
	inode := m.pickNode(t)
	owner := rapid.Uint64().Draw(t, "owner")
	ltype := rapid.SampledFrom([]uint32{F_WRLCK, F_RDLCK, F_UNLCK}).Draw(t, "ltype")
	start := rapid.Uint64Range(0, 500<<20).Draw(t, "start")
	len := rapid.Uint64Range(1, 500<<20).Draw(t, "len")
	pid := rapid.Uint32Range(1, 10000).Draw(t, "pid")
	var end = start + len - 1
	st := m.meta.Setlk(m.ctx, inode, owner, false, ltype, start, end, pid)
	st2 := m.setlk(inode, owner, ltype, start, end, pid)
	if st != st2 {
		t.Fatalf("expect %s but got %s", st2, st)
	}
}

var metaURL string

func init() {
	flag.StringVar(&metaURL, "rapid.meta", "memkv://jfs-unit-test", "meta URL")
	// flag.StringVar(&metaURL, "rapid.meta", "sqlite3://test.db", "meta URL")
	// flag.StringVar(&metaURL, "rapid.meta", "redis://localhost:6379", "meta URL")
}

func defaultFlag(name string, value string) func() {
	if f := flag.Lookup(name); f.Value.String() == f.DefValue {
		flag.Set(name, value)
		return func() {
			flag.Set(name, f.DefValue)
		}
	}
	return func() {}
}

func TestFSOps(t *testing.T) {
	logger.SetLevel(logrus.ErrorLevel)
	defer logger.SetLevel(logrus.InfoLevel)
	defer defaultFlag("rapid.shrinktime", "1h")()
	defer defaultFlag("rapid.steps", "200")()
	defer defaultFlag("rapid.checks", "5000")()
	//defer defaultFlag("rapid.seed", "1")()
	//defer defaultFlag("rapid.v", "true")()
	//defer defaultFlag("rapid.debug", "true")()
	//defer defaultFlag("rapid.debugvis", "true")()
	//defer defaultFlag("rapid.failfile", "testdata/rapid/TestFSOps/TestFSOps-20250228114121-68323.fail")()

	rapid.Check(t, rapid.Run[*fsMachine]())
}


================================================
FILE: pkg/meta/redis.go
================================================
//go:build !noredis
// +build !noredis

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bufio"
	"context"
	"crypto/tls"
	"crypto/x509"
	"encoding/binary"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"hash/fnv"
	"io"
	"math/rand"
	"net"
	"net/url"
	"os"
	"runtime"
	"runtime/debug"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/dustin/go-humanize"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
	"github.com/redis/go-redis/v9"
	"github.com/redis/go-redis/v9/maintnotifications"
	"golang.org/x/sync/errgroup"
)

/*
	Node:       i$inode -> Attribute{type,mode,uid,gid,atime,mtime,ctime,nlink,length,rdev}
	Dir:        d$inode -> {name -> {inode,type}}
	Parent:     p$inode -> {parent -> count} // for hard links
	File:       c$inode_$indx -> [Slice{pos,id,length,off,len}]
	Symlink:    s$inode -> target
	Xattr:      x$inode -> {name -> value}
	Flock:      lockf$inode -> { $sid_$owner -> ltype }
	POSIX lock: lockp$inode -> { $sid_$owner -> Plock(pid,ltype,start,end) }
	Sessions:   sessions -> [ $sid -> heartbeat ]
	sustained:  session$sid -> [$inode]
	locked:     locked$sid -> { lockf$inode or lockp$inode }

	Removed files: delfiles -> [$inode:$length -> seconds]
	detached nodes: detachedNodes -> [$inode -> seconds]
	Slices refs: sliceRef -> {k$sliceId_$size -> refcount}

	Dir data length:   dirDataLength -> { $inode -> length }
	Dir used space:    dirUsedSpace -> { $inode -> usedSpace }
	Dir used inodes:   dirUsedInodes -> { $inode -> usedInodes }
	Quota:             dirQuota -> { $inode -> {maxSpace, maxInodes} }
	Quota used space:  dirQuotaUsedSpace -> { $inode -> usedSpace }
	Quota used inodes: dirQuotaUsedInodes -> { $inode -> usedInodes }
	Acl: acl -> { $acl_id -> acl }
	KrbToken: krbToken -> { $token_id -> token }

	Redis features:
	  Sorted Set: 1.2+
	  Hash Set: 4.0+
	  Transaction: 2.2+
	  Scripting: 2.6+
	  Scan: 2.8+
*/

type redisMeta struct {
	*baseMeta
	rdb        redis.UniversalClient
	prefix     string
	shaLookup  string // The SHA returned by Redis for the loaded `scriptLookup`
	shaResolve string // The SHA returned by Redis for the loaded `scriptResolve`
	cache      *redisCache
}

var _ Meta = (*redisMeta)(nil)
var _ engine = (*redisMeta)(nil)

func init() {
	Register("redis", newRedisMeta)
	Register("rediss", newRedisMeta)
	Register("unix", newRedisMeta)
}

// newRedisMeta return a meta store using Redis.
func newRedisMeta(driver, addr string, conf *Config) (Meta, error) {
	uri := driver + "://" + addr
	u, err := url.Parse(uri)
	if err != nil {
		return nil, fmt.Errorf("url parse %s: %s", uri, err)
	}
	values := u.Query()
	query := queryMap{&values}
	minRetryBackoff := query.duration("min-retry-backoff", "min_retry_backoff", time.Millisecond*20)
	maxRetryBackoff := query.duration("max-retry-backoff", "max_retry_backoff", time.Second*10)
	readTimeout := query.duration("read-timeout", "read_timeout", time.Second*30)
	writeTimeout := query.duration("write-timeout", "write_timeout", time.Second*5)
	routeRead := query.pop("route-read")
	skipVerify := query.pop("insecure-skip-verify")
	certFile := query.pop("tls-cert-file")
	keyFile := query.pop("tls-key-file")
	caCertFile := query.pop("tls-ca-cert-file")
	tlsServerName := query.pop("tls-server-name")

	// Client-side caching options
	clientCacheStr := query.pop("client-cache")
	clientCache := clientCacheStr != "false" && clientCacheStr != ""
	clientCacheSize := query.getInt("client-cache-size", "client_cache_size", 12800)
	// Default TTL to prevent reading stale cache for a long time when the connection fails.
	clientCacheExpiry := query.duration("client-cache-expire", "client_cache_expire", time.Minute)
	clientCachePreload := query.getInt("client-cache-preload", "client_cache_preload", 0) // may cause conflict
	u.RawQuery = values.Encode()

	hosts := u.Host
	opt, err := redis.ParseURL(u.String())
	if err != nil {
		return nil, fmt.Errorf("redis parse %s: %s", uri, err)
	}
	if opt.TLSConfig != nil {
		opt.TLSConfig.ServerName = tlsServerName // use the host of each connection as ServerName
		opt.TLSConfig.InsecureSkipVerify = skipVerify != ""
		if certFile != "" {
			cert, err := tls.LoadX509KeyPair(certFile, keyFile)
			if err != nil {
				return nil, fmt.Errorf("get certificate error certFile:%s keyFile:%s error:%s", certFile, keyFile, err)
			}
			opt.TLSConfig.Certificates = []tls.Certificate{cert}
		}
		if caCertFile != "" {
			caCert, err := os.ReadFile(caCertFile)
			if err != nil {
				return nil, fmt.Errorf("read ca cert file error path:%s error:%s", caCertFile, err)
			}
			caCertPool := x509.NewCertPool()
			caCertPool.AppendCertsFromPEM(caCert)
			opt.TLSConfig.RootCAs = caCertPool
		}
	}
	if opt.Password == "" {
		opt.Password = os.Getenv("REDIS_PASSWORD")
	}
	if opt.Password == "" {
		opt.Password = os.Getenv("META_PASSWORD")
	}
	if opt.Password == "" {
		if passwordFile := os.Getenv("META_PASSWORD_FILE"); passwordFile != "" {
			password, err := readPasswordFromFile(passwordFile)
			if err != nil {
				logger.Errorf("%v", err)
			} else {
				opt.Password = password
			}
		}
	}
	opt.MaxRetries = conf.Retries
	if opt.MaxRetries == 0 {
		opt.MaxRetries = -1 // Redis use -1 to disable retries
	}
	opt.MinRetryBackoff = minRetryBackoff
	opt.MaxRetryBackoff = maxRetryBackoff
	opt.ReadTimeout = readTimeout
	opt.WriteTimeout = writeTimeout
	opt.MaintNotificationsConfig = &maintnotifications.Config{Mode: maintnotifications.ModeDisabled}
	var prefix string
	var rdb redis.UniversalClient

	if strings.Contains(hosts, ",") && strings.Index(hosts, ",") < strings.Index(hosts, ":") {
		var fopt redis.FailoverOptions
		ps := strings.Split(hosts, ",")
		fopt.MasterName = ps[0]
		fopt.SentinelAddrs = ps[1:]
		_, port, _ := net.SplitHostPort(fopt.SentinelAddrs[len(fopt.SentinelAddrs)-1])
		if port == "" {
			port = "26379"
		}
		for i, addr := range fopt.SentinelAddrs {
			h, p, e := net.SplitHostPort(addr)
			if e != nil {
				fopt.SentinelAddrs[i] = net.JoinHostPort(addr, port)
			} else if p == "" {
				fopt.SentinelAddrs[i] = net.JoinHostPort(h, port)
			}
		}
		fopt.SentinelPassword = os.Getenv("SENTINEL_PASSWORD")
		fopt.DB = opt.DB
		fopt.Username = opt.Username
		fopt.Password = opt.Password
		fopt.TLSConfig = opt.TLSConfig
		fopt.MaxRetries = opt.MaxRetries
		fopt.MinRetryBackoff = opt.MinRetryBackoff
		fopt.MaxRetryBackoff = opt.MaxRetryBackoff
		fopt.DialTimeout = opt.DialTimeout
		fopt.ReadTimeout = opt.ReadTimeout
		fopt.WriteTimeout = opt.WriteTimeout
		fopt.PoolFIFO = opt.PoolFIFO               // default: false
		fopt.PoolSize = opt.PoolSize               // default: GOMAXPROCS * 10
		fopt.PoolTimeout = opt.PoolTimeout         // default: ReadTimeout + 1 second.
		fopt.MinIdleConns = opt.MinIdleConns       // disable by default
		fopt.MaxIdleConns = opt.MaxIdleConns       // disable by default
		fopt.MaxActiveConns = opt.MaxActiveConns   // default: 0, no limit
		fopt.ConnMaxIdleTime = opt.ConnMaxIdleTime // default: 30 minutes
		fopt.ConnMaxLifetime = opt.ConnMaxLifetime // disable by default
		if conf.ReadOnly {
			// NOTE: RouteByLatency and RouteRandomly are not supported since they require cluster client
			fopt.ReplicaOnly = routeRead == "replica"
		}
		rdb = redis.NewFailoverClient(&fopt)
	} else {
		if !strings.Contains(hosts, ",") {
			c := redis.NewClient(opt)
			info, err := c.ClusterInfo(Background()).Result()
			if err != nil && strings.Contains(err.Error(), "cluster mode") || err == nil && strings.Contains(info, "cluster_state:") {
				logger.Infof("redis %s is in cluster mode", hosts)
			} else {
				rdb = c
			}
		}
		if rdb == nil {
			var copt redis.ClusterOptions
			copt.Addrs = strings.Split(hosts, ",")
			copt.MaxRedirects = 1
			copt.Username = opt.Username
			copt.Password = opt.Password
			copt.TLSConfig = opt.TLSConfig
			copt.MaxRetries = opt.MaxRetries
			copt.MinRetryBackoff = opt.MinRetryBackoff
			copt.MaxRetryBackoff = opt.MaxRetryBackoff
			copt.DialTimeout = opt.DialTimeout
			copt.ReadTimeout = opt.ReadTimeout
			copt.WriteTimeout = opt.WriteTimeout
			copt.PoolFIFO = opt.PoolFIFO               // default: false
			copt.PoolSize = opt.PoolSize               // default: GOMAXPROCS * 10
			copt.PoolTimeout = opt.PoolTimeout         // default: ReadTimeout + 1 second.
			copt.MinIdleConns = opt.MinIdleConns       // disable by default
			copt.MaxIdleConns = opt.MaxIdleConns       // disable by default
			copt.MaxActiveConns = opt.MaxActiveConns   // default: 0, no limit
			copt.ConnMaxIdleTime = opt.ConnMaxIdleTime // default: 30 minutes
			copt.ConnMaxLifetime = opt.ConnMaxLifetime // disable by default
			if conf.ReadOnly {
				switch routeRead {
				case "random":
					copt.RouteRandomly = true
				case "latency":
					copt.RouteByLatency = true
				case "replica":
					copt.ReadOnly = true
				default:
					// route to primary
				}
			}
			rdb = redis.NewClusterClient(&copt)
			prefix = fmt.Sprintf("{%d}", opt.DB)
		}
	}

	m := &redisMeta{
		baseMeta: newBaseMeta(addr, conf),
		rdb:      rdb,
		prefix:   prefix,
	}
	if clientCache {
		m.cache = newRedisCache(prefix, clientCacheSize, clientCacheExpiry, clientCachePreload)
		if err = m.cache.init(m.rdb); err != nil {
			logger.Warnf("Failed to setup client-side caching: %v", err)
			m.cache = nil
		}
	}
	m.en = m
	m.checkServerConfig()
	return m, nil
}

func (m *redisMeta) Shutdown() error {
	if m.cache != nil {
		m.cache.close()
		m.cache = nil
	}
	return m.rdb.Close()
}

// Override NewSession to initialize client-side cache after session is created
func (m *redisMeta) NewSession(record bool) error {
	// First, create the session normally
	err := m.baseMeta.NewSession(record)
	if err != nil {
		return err
	}
	go m.preloadCache()
	return nil
}

func (m *redisMeta) doDeleteSlice(id uint64, size uint32) error {
	return m.rdb.HDel(Background(), m.sliceRefs(), m.sliceKey(id, size)).Err()
}

func (m *redisMeta) Name() string {
	return "redis"
}

func (m *redisMeta) doInit(format *Format, force bool) error {
	ctx := Background()
	body, err := m.rdb.Get(ctx, m.setting()).Bytes()
	if err != nil && err != redis.Nil {
		return err
	}
	if err == nil {
		var old Format
		err = json.Unmarshal(body, &old)
		if err != nil {
			return fmt.Errorf("existing format is broken: %s", err)
		}
		if !old.DirStats && format.DirStats {
			// remove dir stats as they are outdated
			err := m.rdb.Del(ctx, m.dirUsedInodesKey(), m.dirUsedSpaceKey()).Err()
			if err != nil {
				return errors.Wrap(err, "remove dir stats")
			}
		}
		if !old.UserGroupQuota && format.UserGroupQuota {
			// remove user group quota as they are outdated
			err := m.rdb.Del(ctx, m.userQuotaKey(), m.userQuotaUsedSpaceKey(), m.userQuotaUsedInodesKey(),
				m.groupQuotaKey(), m.groupQuotaUsedSpaceKey(), m.groupQuotaUsedInodesKey()).Err()
			if err != nil {
				return errors.Wrap(err, "remove user group quota")
			}
		}
		if err = format.update(&old, force); err != nil {
			return errors.Wrap(err, "update format")
		}
	}

	data, err := json.MarshalIndent(format, "", "")
	if err != nil {
		return fmt.Errorf("json: %s", err)
	}
	ts := time.Now().Unix()
	attr := &Attr{
		Typ:    TypeDirectory,
		Atime:  ts,
		Mtime:  ts,
		Ctime:  ts,
		Nlink:  2,
		Length: 4 << 10,
		Parent: RootInode,
	}
	if format.TrashDays > 0 {
		attr.Mode = 0555
		if err = m.rdb.SetNX(ctx, m.inodeKey(TrashInode), m.marshal(attr), 0).Err(); err != nil {
			return err
		}
	}
	if err = m.rdb.Set(ctx, m.setting(), data, 0).Err(); err != nil {
		return err
	}
	m.fmt = format
	if body != nil {
		return nil
	}

	// root inode
	attr.Mode = 0777
	return m.rdb.Set(ctx, m.inodeKey(RootInode), m.marshal(attr), 0).Err()
}

func (m *redisMeta) cacheACLs(ctx Context) error {
	if !m.getFormat().EnableACL {
		return nil
	}

	vals, err := m.rdb.HGetAll(ctx, m.aclKey()).Result()
	if err != nil {
		return err
	}

	for k, v := range vals {
		id, _ := strconv.ParseUint(k, 10, 32)
		tmpRule := &aclAPI.Rule{}
		tmpRule.Decode([]byte(v))
		m.aclCache.Put(uint32(id), tmpRule)
	}
	return nil
}

func (m *redisMeta) Reset() error {
	if m.prefix != "" {
		return m.scan(Background(), "*", func(keys []string) error {
			return m.rdb.Del(Background(), keys...).Err()
		})
	}
	return m.rdb.FlushDB(Background()).Err()
}

func (m *redisMeta) doLoad() ([]byte, error) {
	body, err := m.rdb.Get(Background(), m.setting()).Bytes()
	if err == redis.Nil {
		return nil, nil
	}
	return body, err
}

func (m *redisMeta) doNewSession(sinfo []byte, update bool) error {
	err := m.rdb.ZAdd(Background(), m.allSessions(), redis.Z{
		Score:  float64(m.expireTime()),
		Member: strconv.FormatUint(m.sid, 10)}).Err()
	if err != nil {
		return fmt.Errorf("set session ID %d: %s", m.sid, err)
	}
	if err = m.rdb.HSet(Background(), m.sessionInfos(), m.sid, sinfo).Err(); err != nil {
		return fmt.Errorf("set session info: %s", err)
	}

	if m.shaLookup, err = m.rdb.ScriptLoad(Background(), scriptLookup).Result(); err != nil {
		logger.Warnf("load scriptLookup: %v", err)
		m.shaLookup = ""
	}
	if m.shaResolve, err = m.rdb.ScriptLoad(Background(), scriptResolve).Result(); err != nil {
		logger.Warnf("load scriptResolve: %v", err)
		m.shaResolve = ""
	}

	if !m.conf.NoBGJob {
		go m.cleanupLegacies()
	}
	return nil
}

func (m *redisMeta) getCounter(name string) (int64, error) {
	v, err := m.rdb.Get(Background(), m.counterKey(name)).Int64()
	if err == redis.Nil {
		err = nil
	}
	return v, err
}

func (m *redisMeta) incrCounter(name string, value int64) (int64, error) {
	if m.conf.ReadOnly {
		return 0, syscall.EROFS
	}
	key := m.counterKey(name)
	if name == "nextInode" || name == "nextChunk" {
		// for nextinode, nextchunk
		// the current one is already used
		v, err := m.rdb.IncrBy(Background(), key, value).Result()
		return v + 1, err
	}
	return m.rdb.IncrBy(Background(), key, value).Result()
}

func (m *redisMeta) setIfSmall(name string, value, diff int64) (bool, error) {
	var changed bool
	ctx := Background()
	name = m.prefix + name
	err := m.txn(ctx.WithValue(txMethodKey{}, "setIfSmall:"+name), func(tx *redis.Tx) error {
		changed = false
		old, err := tx.Get(ctx, name).Int64()
		if err != nil && err != redis.Nil {
			return err
		}
		if old > value-diff {
			return nil
		} else {
			changed = true
			_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				pipe.Set(ctx, name, value, 0)
				return nil
			})
			return err
		}
	}, name)

	return changed, err
}

func (m *redisMeta) getSession(sid string, detail bool) (*Session, error) {
	ctx := Background()
	info, err := m.rdb.HGet(ctx, m.sessionInfos(), sid).Bytes()
	if err == redis.Nil { // legacy client has no info
		info = []byte("{}")
	} else if err != nil {
		return nil, fmt.Errorf("HGet sessionInfos %s: %s", sid, err)
	}
	var s Session
	if err := json.Unmarshal(info, &s); err != nil {
		return nil, fmt.Errorf("corrupted session info; json error: %s", err)
	}
	s.Sid, _ = strconv.ParseUint(sid, 10, 64)
	if detail {
		inodes, err := m.rdb.SMembers(ctx, m.sustained(s.Sid)).Result()
		if err != nil {
			return nil, fmt.Errorf("SMembers %s: %s", sid, err)
		}
		s.Sustained = make([]Ino, 0, len(inodes))
		for _, sinode := range inodes {
			inode, _ := strconv.ParseUint(sinode, 10, 64)
			s.Sustained = append(s.Sustained, Ino(inode))
		}

		locks, err := m.rdb.SMembers(ctx, m.lockedKey(s.Sid)).Result()
		if err != nil {
			return nil, fmt.Errorf("SMembers %s: %s", sid, err)
		}
		s.Flocks = make([]Flock, 0, len(locks)) // greedy
		s.Plocks = make([]Plock, 0, len(locks))
		for _, lock := range locks {
			owners, err := m.rdb.HGetAll(ctx, lock).Result()
			if err != nil {
				return nil, fmt.Errorf("HGetAll %s: %s", lock, err)
			}
			isFlock := strings.HasPrefix(lock, m.prefix+"lockf")
			inode, _ := strconv.ParseUint(lock[len(m.prefix)+5:], 10, 64)
			for k, v := range owners {
				parts := strings.Split(k, "_")
				if parts[0] != sid {
					continue
				}
				owner, _ := strconv.ParseUint(parts[1], 16, 64)
				if isFlock {
					s.Flocks = append(s.Flocks, Flock{Ino(inode), owner, v})
				} else {
					s.Plocks = append(s.Plocks, Plock{Ino(inode), owner, loadLocks([]byte(v))})
				}
			}
		}
	}
	return &s, nil
}

func (m *redisMeta) GetSession(sid uint64, detail bool) (*Session, error) {
	var legacy bool
	key := strconv.FormatUint(sid, 10)
	score, err := m.rdb.ZScore(Background(), m.allSessions(), key).Result()
	if err == redis.Nil {
		legacy = true
		score, err = m.rdb.ZScore(Background(), legacySessions, key).Result()
	}
	if err == redis.Nil {
		err = fmt.Errorf("session not found: %d", sid)
	}
	if err != nil {
		return nil, err
	}
	s, err := m.getSession(key, detail)
	if err != nil {
		return nil, err
	}
	s.Expire = time.Unix(int64(score), 0)
	if legacy {
		s.Expire = s.Expire.Add(time.Minute * 5)
	}
	return s, nil
}

func (m *redisMeta) ListSessions() ([]*Session, error) {
	keys, err := m.rdb.ZRangeWithScores(Background(), m.allSessions(), 0, -1).Result()
	if err != nil {
		return nil, err
	}
	sessions := make([]*Session, 0, len(keys))
	for _, k := range keys {
		s, err := m.getSession(k.Member.(string), false)
		if err != nil {
			logger.Errorf("get session: %s", err)
			continue
		}
		s.Expire = time.Unix(int64(k.Score), 0)
		sessions = append(sessions, s)
	}

	// add clients with version before 1.0-beta3 as well
	keys, err = m.rdb.ZRangeWithScores(Background(), legacySessions, 0, -1).Result()
	if err != nil {
		logger.Errorf("Scan legacy sessions: %s", err)
		return sessions, nil
	}
	for _, k := range keys {
		s, err := m.getSession(k.Member.(string), false)
		if err != nil {
			logger.Errorf("Get legacy session: %s", err)
			continue
		}
		s.Expire = time.Unix(int64(k.Score), 0).Add(time.Minute * 5)
		sessions = append(sessions, s)
	}
	return sessions, nil
}

func (m *redisMeta) sustained(sid uint64) string {
	return m.prefix + "session" + strconv.FormatUint(sid, 10)
}

func (m *redisMeta) lockedKey(sid uint64) string {
	return m.prefix + "locked" + strconv.FormatUint(sid, 10)
}

func (m *redisMeta) symKey(inode Ino) string {
	return m.prefix + "s" + inode.String()
}

func (m *redisMeta) inodeKey(inode Ino) string {
	return m.prefix + "i" + inode.String()
}

func (m *redisMeta) entryKey(parent Ino) string {
	return m.prefix + "d" + parent.String()
}

func (m *redisMeta) parentKey(inode Ino) string {
	return m.prefix + "p" + inode.String()
}

func (m *redisMeta) chunkKey(inode Ino, indx uint32) string {
	return m.prefix + "c" + inode.String() + "_" + strconv.FormatInt(int64(indx), 10)
}

func (m *redisMeta) sliceKey(id uint64, size uint32) string {
	// inside hashset
	return "k" + strconv.FormatUint(id, 10) + "_" + strconv.FormatUint(uint64(size), 10)
}

func (m *redisMeta) xattrKey(inode Ino) string {
	return m.prefix + "x" + inode.String()
}

func (m *redisMeta) flockKey(inode Ino) string {
	return m.prefix + "lockf" + inode.String()
}

func (m *redisMeta) ownerKey(owner uint64) string {
	return fmt.Sprintf("%d_%016X", m.sid, owner)
}

func (m *redisMeta) plockKey(inode Ino) string {
	return m.prefix + "lockp" + inode.String()
}

func (m *redisMeta) setting() string {
	return m.prefix + "setting"
}

func (m *redisMeta) usedSpaceKey() string {
	return m.prefix + usedSpace
}

func (m *redisMeta) nextTrashKey() string {
	return m.prefix + "nextTrash"
}

func (m *redisMeta) counterKey(name string) string {
	if name == "nextInode" || name == "nextChunk" || name == "nextSession" {
		name = strings.ToLower(name)
	}
	return m.prefix + name
}

func (m *redisMeta) dirDataLengthKey() string {
	return m.prefix + "dirDataLength"
}

func (m *redisMeta) dirUsedSpaceKey() string {
	return m.prefix + "dirUsedSpace"
}

func (m *redisMeta) dirUsedInodesKey() string {
	return m.prefix + "dirUsedInodes"
}

func (m *redisMeta) dirQuotaUsedSpaceKey() string {
	return m.prefix + "dirQuotaUsedSpace"
}

func (m *redisMeta) dirQuotaUsedInodesKey() string {
	return m.prefix + "dirQuotaUsedInodes"
}

func (m *redisMeta) dirQuotaKey() string {
	return m.prefix + "dirQuota"
}

func (m *redisMeta) userQuotaUsedSpaceKey() string {
	return m.prefix + "userQuotaUsedSpace"
}

func (m *redisMeta) userQuotaUsedInodesKey() string {
	return m.prefix + "userQuotaUsedInodes"
}

func (m *redisMeta) userQuotaKey() string {
	return m.prefix + "userQuota"
}

func (m *redisMeta) groupQuotaUsedSpaceKey() string {
	return m.prefix + "groupQuotaUsedSpace"
}

func (m *redisMeta) groupQuotaUsedInodesKey() string {
	return m.prefix + "groupQuotaUsedInodes"
}

func (m *redisMeta) groupQuotaKey() string {
	return m.prefix + "groupQuota"
}

func (m *redisMeta) totalInodesKey() string {
	return m.prefix + totalInodes
}

func (m *redisMeta) aclKey() string {
	return m.prefix + "acl"
}

func (m *redisMeta) krbTokenKey() string {
	return m.prefix + "krbToken"
}

func (m *redisMeta) delfiles() string {
	return m.prefix + "delfiles"
}

func (m *redisMeta) detachedNodes() string {
	return m.prefix + "detachedNodes"
}

func (r *redisMeta) delSlices() string {
	return r.prefix + "delSlices"
}

func (r *redisMeta) allSessions() string {
	return r.prefix + "allSessions"
}

func (m *redisMeta) sessionInfos() string {
	return m.prefix + "sessionInfos"
}

func (m *redisMeta) sliceRefs() string {
	return m.prefix + "sliceRef"
}

func (m *redisMeta) packQuota(space, inodes int64) []byte {
	wb := utils.NewBuffer(16)
	wb.Put64(uint64(space))
	wb.Put64(uint64(inodes))
	return wb.Bytes()
}

func (m *redisMeta) parseQuota(buf []byte) (space, inodes int64) {
	if len(buf) == 0 {
		return 0, 0
	}
	if len(buf) != 16 {
		logger.Errorf("invalid quota value: %v", buf)
		return 0, 0
	}
	rb := utils.ReadBuffer(buf)
	return int64(rb.Get64()), int64(rb.Get64())
}

func (m *redisMeta) packEntry(_type uint8, inode Ino) []byte {
	wb := utils.NewBuffer(9)
	wb.Put8(_type)
	wb.Put64(uint64(inode))
	return wb.Bytes()
}

func (m *redisMeta) parseEntry(buf []byte) (uint8, Ino) {
	if len(buf) != 9 {
		panic("invalid entry")
	}
	return buf[0], Ino(binary.BigEndian.Uint64(buf[1:]))
}

func (m *redisMeta) updateStats(space int64, inodes int64) {
	atomic.AddInt64(&m.usedSpace, space)
	atomic.AddInt64(&m.usedInodes, inodes)
}

func (m *redisMeta) doSyncVolumeStat(ctx Context) error {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	var used, inodes int64
	if err := m.hscan(ctx, m.dirUsedSpaceKey(), func(keys []string) error {
		for i := 0; i < len(keys); i += 2 {
			v, err := strconv.ParseInt(keys[i+1], 10, 64)
			if err != nil {
				logger.Warnf("invalid used space: %s->%s", keys[i], keys[i+1])
				continue
			}
			used += v
		}
		return nil
	}); err != nil {
		return err
	}
	if err := m.hscan(ctx, m.dirUsedInodesKey(), func(keys []string) error {
		for i := 0; i < len(keys); i += 2 {
			v, err := strconv.ParseInt(keys[i+1], 10, 64)
			if err != nil {
				logger.Warnf("invalid used inode: %s->%s", keys[i], keys[i+1])
				continue
			}
			inodes += v
		}
		return nil
	}); err != nil {
		return err
	}

	var inoKeys []string
	if err := m.scan(ctx, m.prefix+"session*", func(keys []string) error {
		for i := 0; i < len(keys); i += 2 {
			key := keys[i]
			if key == "sessions" {
				continue
			}

			inodes, err := m.rdb.SMembers(ctx, key).Result()
			if err != nil {
				logger.Warnf("SMembers %s: %s", key, err)
				continue
			}
			for _, sinode := range inodes {
				ino, err := strconv.ParseInt(sinode, 10, 64)
				if err != nil {
					logger.Warnf("invalid sustained: %s->%s", key, sinode)
					continue
				}
				inoKeys = append(inoKeys, m.inodeKey(Ino(ino)))
			}
		}
		return nil
	}); err != nil {
		return err
	}

	batch := 1000
	for i := 0; i < len(inoKeys); i += batch {
		end := i + batch
		if end > len(inoKeys) {
			end = len(inoKeys)
		}
		values, err := m.rdb.MGet(ctx, inoKeys[i:end]...).Result()
		if err != nil {
			return err
		}
		var attr Attr
		for _, v := range values {
			if v != nil {
				m.parseAttr([]byte(v.(string)), &attr)
				used += align4K(attr.Length)
				inodes += 1
			}
		}
	}
	if err := m.scanTrashEntry(ctx, func(_ Ino, length uint64) {
		used += align4K(length)
		inodes += 1
	}); err != nil {
		return err
	}
	logger.Debugf("Used space: %s, inodes: %d", humanize.IBytes(uint64(used)), inodes)
	if err := m.rdb.Set(ctx, m.totalInodesKey(), strconv.FormatInt(inodes, 10), 0).Err(); err != nil {
		return fmt.Errorf("set total inodes: %s", err)
	}
	return m.rdb.Set(ctx, m.usedSpaceKey(), strconv.FormatInt(used, 10), 0).Err()
}

// redisMeta updates the usage in each transaction
func (m *redisMeta) doFlushStats() {}

func (m *redisMeta) handleLuaResult(op string, res interface{}, err error, returnedIno *int64, returnedAttr *string) syscall.Errno {
	if err != nil {
		msg := err.Error()
		if strings.Contains(msg, "NOSCRIPT") {
			var err2 error
			switch op {
			case "lookup":
				m.shaLookup, err2 = m.rdb.ScriptLoad(Background(), scriptLookup).Result()
			case "resolve":
				m.shaResolve, err2 = m.rdb.ScriptLoad(Background(), scriptResolve).Result()
			default:
				return syscall.ENOTSUP
			}
			if err2 == nil {
				logger.Infof("loaded script succeed for %s", op)
				return syscall.EAGAIN
			} else {
				logger.Warnf("load script %s: %s", op, err2)
				return syscall.ENOTSUP
			}
		} else if strings.Contains(msg, "ENOENT") {
			return syscall.ENOENT
		} else if strings.Contains(msg, "EACCESS") {
			return syscall.EACCES
		} else if strings.Contains(msg, "ENOTDIR") {
			return syscall.ENOTDIR
		} else if strings.Contains(msg, "ENOTSUP") {
			return syscall.ENOTSUP
		} else {
			logger.Warnf("unexpected error for %s: %s", op, msg)
			switch op {
			case "lookup":
				m.shaLookup = ""
			case "resolve":
				m.shaResolve = ""
			}
			return syscall.ENOTSUP
		}
	}
	vals, ok := res.([]interface{})
	if !ok {
		logger.Errorf("invalid script result: %v", res)
		return syscall.ENOTSUP
	}
	*returnedIno, ok = vals[0].(int64)
	if !ok {
		logger.Errorf("invalid script result: %v", res)
		return syscall.ENOTSUP
	}
	if vals[1] == nil {
		return syscall.ENOTSUP
	}
	*returnedAttr, ok = vals[1].(string)
	if !ok {
		logger.Errorf("invalid script result: %v", res)
		return syscall.ENOTSUP
	}
	return 0
}

func (m *redisMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno {
	var foundIno Ino
	var foundType uint8
	var encodedAttr []byte
	var err error
	entryKey := m.entryKey(parent)
	if m.cache != nil {
		if entry, ok := m.cache.entryCache.Get(m.cache.entryName(parent, name)); ok {
			if !entry.isMark() {
				*inode = entry.ino
				if attr != nil {
					*attr = entry.Attr
				}
				return 0
			}
			m.cache.entryCache.AddIf(m.cache.entryName(parent, name), &entryMark, func(oldEntry *cachedEntry, exists bool) bool {
				return exists
			})
		}
	}
	if len(m.shaLookup) > 0 && attr != nil && !m.conf.CaseInsensi && m.prefix == "" {
		var res interface{}
		var returnedIno int64
		var returnedAttr string
		res, err = m.rdb.EvalSha(ctx, m.shaLookup, []string{entryKey, name}).Result()
		if st := m.handleLuaResult("lookup", res, err, &returnedIno, &returnedAttr); st == 0 {
			foundIno = Ino(returnedIno)
			encodedAttr = []byte(returnedAttr)
		} else if st == syscall.EAGAIN {
			return m.doLookup(ctx, parent, name, inode, attr)
		} else if st != syscall.ENOTSUP {
			return st
		}
	}
	if foundIno == 0 || len(encodedAttr) == 0 {
		var buf []byte
		buf, err = m.rdb.HGet(ctx, entryKey, name).Bytes()
		if err != nil {
			return errno(err)
		}
		foundType, foundIno = m.parseEntry(buf)
		encodedAttr, err = m.rdb.Get(ctx, m.inodeKey(foundIno)).Bytes()
	}

	if err == nil {
		m.parseAttr(encodedAttr, attr)
		m.of.Update(foundIno, attr)
		if m.cache != nil {
			ce := &cachedEntry{ino: foundIno}
			m.parseAttr(encodedAttr, &ce.Attr)
			_, _ = m.cache.entryCache.AddIf(m.cache.entryName(parent, name), ce, func(oldEntry *cachedEntry, exists bool) bool {
				return exists && oldEntry.isMark()
			})
		}
	} else if err == redis.Nil { // corrupt entry
		logger.Warnf("no attribute for inode %d (%d, %s)", foundIno, parent, name)
		*attr = Attr{Typ: foundType}
		err = nil
	}
	*inode = foundIno
	return errno(err)
}

func (m *redisMeta) Resolve(ctx Context, parent Ino, path string, inode *Ino, attr *Attr) syscall.Errno {
	if len(m.shaResolve) == 0 || m.conf.CaseInsensi || m.prefix != "" {
		return syscall.ENOTSUP
	}
	defer m.timeit("Resolve", time.Now())
	parent = m.checkRoot(parent)
	keys := []string{parent.String(), path,
		strconv.FormatUint(uint64(ctx.Uid()), 10)}
	var gids []interface{}
	for _, gid := range ctx.Gids() {
		gids = append(gids, strconv.FormatUint(uint64(gid), 10))
	}
	res, err := m.rdb.EvalSha(ctx, m.shaResolve, keys, gids...).Result()
	var returnedIno int64
	var returnedAttr string
	st := m.handleLuaResult("resolve", res, err, &returnedIno, &returnedAttr)
	if st == 0 {
		if inode != nil {
			*inode = Ino(returnedIno)
		}
		m.parseAttr([]byte(returnedAttr), attr)
	} else if st == syscall.EAGAIN {
		return m.Resolve(ctx, parent, path, inode, attr)
	}
	return st
}

func (m *redisMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	a, err := m.rdb.Get(ctx, m.inodeKey(inode)).Bytes()
	if err == nil {
		m.parseAttr(a, attr)
	}
	return errno(err)
}

type timeoutError interface {
	Timeout() bool
}

func (m *redisMeta) shouldRetry(err error, retryOnFailure bool) bool {
	switch err {
	case redis.TxFailedErr:
		return true
	case io.EOF, io.ErrUnexpectedEOF:
		return retryOnFailure
	case nil, context.Canceled, context.DeadlineExceeded:
		return false
	}

	if v, ok := err.(timeoutError); ok && v.Timeout() {
		return retryOnFailure
	}

	s := err.Error()
	if s == "ERR max number of clients reached" ||
		strings.Contains(s, "Conn is in a bad state") ||
		strings.Contains(s, "EXECABORT") {
		return true
	}
	ps := strings.SplitN(s, " ", 3)
	switch ps[0] {
	case "LOADING":
	case "READONLY":
	case "CLUSTERDOWN":
	case "TRYAGAIN":
	case "MOVED":
	case "ASK":
	case "ERR":
		if len(ps) > 1 {
			switch ps[1] {
			case "DISABLE":
				fallthrough
			case "NOWRITE":
				fallthrough
			case "NOREAD":
				return true
			}
		}
		return false
	default:
		return false
	}
	return true
}

// errNo is an alias to syscall.Errno to disable retry in Redis Cluster
type errNo uintptr

func (e errNo) Error() string {
	return syscall.Errno(e).Error()
}

// replaceErrno replace returned syscall.Errno as errNo
func replaceErrno(txf func(tx *redis.Tx) error) func(tx *redis.Tx) error {
	return func(tx *redis.Tx) error {
		err := txf(tx)
		if eno, ok := err.(syscall.Errno); ok {
			err = errNo(eno)
		}
		return err
	}
}

func (m *redisMeta) txn(ctx Context, txf func(tx *redis.Tx) error, keys ...string) error {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	for _, k := range keys {
		if !strings.HasPrefix(k, m.prefix) {
			panic(fmt.Sprintf("Invalid key %s not starts with prefix %s", k, m.prefix))
		}
	}
	var khash = fnv.New32()
	_, _ = khash.Write([]byte(keys[0]))
	h := uint(khash.Sum32())

	start := time.Now()
	defer func() { m.txDist.Observe(time.Since(start).Seconds()) }()

	m.txLock(h)
	defer m.txUnlock(h)
	// TODO: enable retry for some of idempotent transactions
	var (
		retryOnFailure = false
		lastErr        error
		method         txMethod
	)
	for i := 0; i < 50; i++ {
		if ctx.Canceled() {
			logger.Warnf("Transaction %s interrupted after %s, tried %d, keys: %v", method.name(ctx), time.Since(start), i+1, keys)
			return syscall.EINTR
		}
		err := m.rdb.Watch(ctx, replaceErrno(txf), keys...)
		if eno, ok := err.(errNo); ok {
			if eno == 0 {
				err = nil
			} else {
				err = syscall.Errno(eno)
			}
		}
		if err != nil && m.shouldRetry(err, retryOnFailure) {
			m.txRestart.WithLabelValues(method.name(ctx)).Add(1)
			logger.Debugf("Transaction failed, restart it (tried %d): %s", i+1, err)
			lastErr = err
			time.Sleep(time.Millisecond * time.Duration(rand.Int()%((i+1)*(i+1))))
			continue
		} else if err == nil && i > 1 {
			logger.Warnf("Transaction succeeded after %d tries (%s), keys: %v, method: %s, last error: %s", i+1, time.Since(start), keys, method.name(ctx), lastErr)
		}
		return err
	}
	logger.Warnf("Already tried 50 times, returning: %s", lastErr)
	return lastErr
}

func (m *redisMeta) doTruncate(ctx Context, inode Ino, flags uint8, length uint64, delta *dirStat, attr *Attr, skipPermCheck bool) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		*delta = dirStat{}
		var t Attr
		a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, &t)
		if t.Typ != TypeFile || t.Flags&(FlagImmutable|FlagAppend) != 0 || (flags == 0 && t.Parent > TrashInode) {
			return syscall.EPERM
		}
		if !skipPermCheck {
			if st := m.Access(ctx, inode, MODE_MASK_W, &t); st != 0 {
				return st
			}
		}
		if length == t.Length {
			*attr = t
			return nil
		}
		delta.length = int64(length) - int64(t.Length)
		delta.space = align4K(length) - align4K(t.Length)
		if err := m.checkQuota(ctx, delta.space, 0, t.Uid, t.Gid, m.getParents(ctx, tx, inode, t.Parent)...); err != 0 {
			return err
		}
		var zeroChunks []uint32
		var left, right = t.Length, length
		if left > right {
			right, left = left, right
		}
		if (right-left)/ChunkSize >= 10000 {
			// super large
			var cursor uint64
			var keys []string
			for {
				keys, cursor, err = tx.Scan(ctx, cursor, m.prefix+fmt.Sprintf("c%d_*", inode), 10000).Result()
				if err != nil {
					return err
				}
				for _, key := range keys {
					indx, err := strconv.Atoi(strings.Split(key[len(m.prefix):], "_")[1])
					if err != nil {
						logger.Errorf("parse %s: %s", key, err)
						continue
					}
					if uint64(indx) > left/ChunkSize && uint64(indx) < right/ChunkSize {
						zeroChunks = append(zeroChunks, uint32(indx))
					}
				}
				if cursor <= 0 {
					break
				}
			}
		} else {
			for i := left/ChunkSize + 1; i < right/ChunkSize; i++ {
				zeroChunks = append(zeroChunks, uint32(i))
			}
		}
		t.Length = length
		now := time.Now()
		t.Mtime = now.Unix()
		t.Mtimensec = uint32(now.Nanosecond())
		t.Ctime = now.Unix()
		t.Ctimensec = uint32(now.Nanosecond())
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(&t), 0)
			// zero out from left to right
			var l = uint32(right - left)
			if right > (left/ChunkSize+1)*ChunkSize {
				l = ChunkSize - uint32(left%ChunkSize)
			}
			pipe.RPush(ctx, m.chunkKey(inode, uint32(left/ChunkSize)), marshalSlice(uint32(left%ChunkSize), 0, 0, 0, l))
			buf := marshalSlice(0, 0, 0, 0, ChunkSize)
			for _, indx := range zeroChunks {
				pipe.RPushX(ctx, m.chunkKey(inode, indx), buf)
			}
			if right > (left/ChunkSize+1)*ChunkSize && right%ChunkSize > 0 {
				pipe.RPush(ctx, m.chunkKey(inode, uint32(right/ChunkSize)), marshalSlice(0, 0, 0, 0, uint32(right%ChunkSize)))
			}
			pipe.IncrBy(ctx, m.usedSpaceKey(), delta.space)
			return nil
		})
		if err == nil {
			*attr = t
		}
		return err
	}, m.inodeKey(inode)))
}

func (m *redisMeta) doFallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64, delta *dirStat, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		*delta = dirStat{}
		t := Attr{}
		a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, &t)
		if t.Typ == TypeFIFO {
			return syscall.EPIPE
		}
		if t.Typ != TypeFile || (t.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if st := m.Access(ctx, inode, MODE_MASK_W, &t); st != 0 {
			return st
		}
		if (t.Flags&FlagAppend) != 0 && (mode&^fallocKeepSize) != 0 {
			return syscall.EPERM
		}
		length := t.Length
		if off+size > t.Length {
			if mode&fallocKeepSize == 0 {
				length = off + size
			}
		}

		old := t.Length
		delta.length = int64(length) - int64(old)
		delta.space = align4K(length) - align4K(old)
		if err := m.checkQuota(ctx, delta.space, 0, t.Uid, t.Gid, m.getParents(ctx, tx, inode, t.Parent)...); err != 0 {
			return err
		}
		t.Length = length
		now := time.Now()
		t.Mtime = now.Unix()
		t.Mtimensec = uint32(now.Nanosecond())
		t.Ctime = now.Unix()
		t.Ctimensec = uint32(now.Nanosecond())
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(&t), 0)
			if mode&(fallocZeroRange|fallocPunchHole) != 0 && off < old {
				off, size := off, size
				if off+size > old {
					size = old - off
				}
				for size > 0 {
					indx := uint32(off / ChunkSize)
					coff := off % ChunkSize
					l := size
					if coff+size > ChunkSize {
						l = ChunkSize - coff
					}
					pipe.RPush(ctx, m.chunkKey(inode, indx), marshalSlice(uint32(coff), 0, 0, 0, uint32(l)))
					off += l
					size -= l
				}
			}
			pipe.IncrBy(ctx, m.usedSpaceKey(), align4K(length)-align4K(old))
			return nil
		})
		if err == nil {
			*attr = t
		}
		return err
	}, m.inodeKey(inode)))
}

func (m *redisMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr, oldAttr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		var cur Attr
		a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, &cur)
		if oldAttr != nil {
			*oldAttr = cur
		}
		if cur.Parent > TrashInode {
			return syscall.EPERM
		}
		now := time.Now()

		rule, err := m.getACL(ctx, tx, cur.AccessACL)
		if err != nil {
			return err
		}

		rule = rule.Dup()
		dirtyAttr, st := m.mergeAttr(ctx, inode, set, &cur, attr, now, rule)
		if st != 0 {
			return st
		}
		if dirtyAttr == nil {
			return nil
		}

		dirtyAttr.AccessACL, err = m.insertACL(ctx, tx, rule)
		if err != nil {
			return err
		}

		dirtyAttr.Ctime = now.Unix()
		dirtyAttr.Ctimensec = uint32(now.Nanosecond())
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(dirtyAttr), 0)
			return nil
		})
		if err == nil {
			*attr = *dirtyAttr
		}
		return err
	}, m.inodeKey(inode)))
}

func (m *redisMeta) doReadlink(ctx Context, inode Ino, noatime bool) (atime int64, target []byte, err error) {
	if noatime {
		target, err = m.rdb.Get(ctx, m.symKey(inode)).Bytes()
		if err == redis.Nil {
			err = nil
		}
		return
	}

	attr := &Attr{}
	now := time.Now()
	err = m.txn(ctx, func(tx *redis.Tx) error {
		rs, e := tx.MGet(ctx, m.inodeKey(inode), m.symKey(inode)).Result()
		if e != nil {
			return e
		}
		if rs[0] == nil {
			return syscall.ENOENT
		}
		m.parseAttr([]byte(rs[0].(string)), attr)
		if attr.Typ != TypeSymlink {
			return syscall.EINVAL
		}
		if rs[1] == nil {
			return syscall.EIO
		}
		target = []byte(rs[1].(string))
		if !m.atimeNeedsUpdate(attr, now) {
			atime = attr.Atime*int64(time.Second) + int64(attr.Atimensec)
			return nil
		}
		attr.Atime = now.Unix()
		attr.Atimensec = uint32(now.Nanosecond())
		atime = now.UnixNano()
		_, e = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
			return nil
		})
		return e
	}, m.inodeKey(inode))
	return
}

func (m *redisMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, path string, inode *Ino, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		var pattr Attr
		a, err := tx.Get(ctx, m.inodeKey(parent)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pattr.Flags & FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if (pattr.Flags & FlagSkipTrash) != 0 {
			attr.Flags |= FlagSkipTrash
		}

		buf, err := tx.HGet(ctx, m.entryKey(parent), name).Bytes()
		if err != nil && err != redis.Nil {
			return err
		}
		var foundIno Ino
		var foundType uint8
		if err == nil {
			foundType, foundIno = m.parseEntry(buf)
		} else if m.conf.CaseInsensi { // err == redis.Nil
			if entry := m.resolveCase(ctx, parent, name); entry != nil {
				foundType, foundIno = entry.Attr.Typ, entry.Inode
			}
		}
		if foundIno != 0 {
			if _type == TypeFile || _type == TypeDirectory { // file for create, directory for subTrash
				a, err = tx.Get(ctx, m.inodeKey(foundIno)).Bytes()
				if err == nil {
					m.parseAttr(a, attr)
				} else if err == redis.Nil {
					*attr = Attr{Typ: foundType, Parent: parent} // corrupt entry
				} else {
					return err
				}
				*inode = foundIno
			}
			return syscall.EEXIST
		} else if parent == TrashInode {
			if next, err := tx.Incr(ctx, m.nextTrashKey()).Result(); err != nil { // Some inode will be wasted if conflict happens
				return err
			} else {
				*inode = TrashInode + Ino(next)
			}
		}

		mode &= 07777
		if pattr.DefaultACL != aclAPI.None && _type != TypeSymlink {
			// inherit default acl
			if _type == TypeDirectory {
				attr.DefaultACL = pattr.DefaultACL
			}

			// set access acl by parent's default acl
			rule, err := m.getACL(ctx, tx, pattr.DefaultACL)
			if err != nil {
				return err
			}

			if rule.IsMinimal() {
				// simple acl as default
				attr.Mode = mode & (0xFE00 | rule.GetMode())
			} else {
				cRule := rule.ChildAccessACL(mode)
				id, err := m.insertACL(ctx, tx, cRule)
				if err != nil {
					return err
				}

				attr.AccessACL = id
				attr.Mode = (mode & 0xFE00) | cRule.GetMode()
			}
		} else {
			attr.Mode = mode & ^cumask
		}

		var updateParent bool
		now := time.Now()
		if parent != TrashInode {
			if _type == TypeDirectory {
				pattr.Nlink++
				updateParent = true
			}
			if updateParent || now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime {
				pattr.Mtime = now.Unix()
				pattr.Mtimensec = uint32(now.Nanosecond())
				pattr.Ctime = now.Unix()
				pattr.Ctimensec = uint32(now.Nanosecond())
				updateParent = true
			}
		}
		attr.Atime = now.Unix()
		attr.Atimensec = uint32(now.Nanosecond())
		attr.Mtime = now.Unix()
		attr.Mtimensec = uint32(now.Nanosecond())
		attr.Ctime = now.Unix()
		attr.Ctimensec = uint32(now.Nanosecond())
		if ctx.Value(CtxKey("behavior")) == "Hadoop" || runtime.GOOS == "darwin" {
			attr.Gid = pattr.Gid
		} else if runtime.GOOS == "linux" && pattr.Mode&02000 != 0 {
			attr.Gid = pattr.Gid
			if _type == TypeDirectory {
				attr.Mode |= 02000
			} else if attr.Mode&02010 == 02010 && ctx.Uid() != 0 {
				var found bool
				for _, gid := range ctx.Gids() {
					if gid == pattr.Gid {
						found = true
					}
				}
				if !found {
					attr.Mode &= ^uint16(02000)
				}
			}
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(*inode), m.marshal(attr), 0)
			if updateParent {
				pipe.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
			}
			if _type == TypeSymlink {
				pipe.Set(ctx, m.symKey(*inode), path, 0)
			}
			pipe.HSet(ctx, m.entryKey(parent), name, m.packEntry(_type, *inode))
			if _type == TypeDirectory {
				field := (*inode).String()
				pipe.HSet(ctx, m.dirUsedInodesKey(), field, "0")
				pipe.HSet(ctx, m.dirDataLengthKey(), field, "0")
				pipe.HSet(ctx, m.dirUsedSpaceKey(), field, "0")
			}
			pipe.IncrBy(ctx, m.usedSpaceKey(), align4K(0))
			pipe.Incr(ctx, m.totalInodesKey())
			return nil
		})
		return err
	}, m.inodeKey(parent), m.entryKey(parent)))
}

func (m *redisMeta) doUnlink(ctx Context, parent Ino, name string, attr *Attr, skipCheckTrash ...bool) syscall.Errno {
	var trash, inode Ino
	if !(len(skipCheckTrash) == 1 && skipCheckTrash[0]) {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}
	if trash == 0 {
		defer func() { m.of.InvalidateChunk(inode, invalidateAttrOnly) }()
	}
	if attr == nil {
		attr = &Attr{}
	}
	var _type uint8
	var opened bool
	var newSpace, newInode int64
	err := m.txn(ctx, func(tx *redis.Tx) error {
		opened = false
		*attr = Attr{}
		newSpace, newInode = 0, 0
		buf, err := tx.HGet(ctx, m.entryKey(parent), name).Bytes()
		if err == redis.Nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parent, name); e != nil {
				name = string(e.Name)
				buf = m.packEntry(e.Attr.Typ, e.Inode)
				err = nil
			}
		}
		if err != nil {
			return err
		}
		_type, inode = m.parseEntry(buf)
		if _type == TypeDirectory {
			return syscall.EPERM
		}
		if err := tx.Watch(ctx, m.inodeKey(inode)).Err(); err != nil {
			return err
		}
		rs, err := tx.MGet(ctx, m.inodeKey(parent), m.inodeKey(inode)).Result()
		if err != nil {
			return err
		}
		if rs[0] == nil {
			return redis.Nil
		}
		var pattr Attr
		m.parseAttr([]byte(rs[0].(string)), &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pattr.Flags&FlagAppend) != 0 || (pattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		var updateParent bool
		now := time.Now()
		if !parent.IsTrash() && now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime {
			pattr.Mtime = now.Unix()
			pattr.Mtimensec = uint32(now.Nanosecond())
			pattr.Ctime = now.Unix()
			pattr.Ctimensec = uint32(now.Nanosecond())
			updateParent = true
		}
		if rs[1] != nil {
			m.parseAttr([]byte(rs[1].(string)), attr)
			if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid {
				return syscall.EACCES
			}
			if (attr.Flags&FlagAppend) != 0 || (attr.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if (attr.Flags&FlagSkipTrash) != 0 && trash > 0 {
				trash = 0
				defer func() { m.of.InvalidateChunk(inode, invalidateAttrOnly) }()
			}
			if trash > 0 && attr.Nlink > 1 && tx.HExists(ctx, m.entryKey(trash), m.trashEntry(parent, inode, name)).Val() {
				trash = 0
				defer func() { m.of.InvalidateChunk(inode, invalidateAttrOnly) }()
			}
			attr.Ctime = now.Unix()
			attr.Ctimensec = uint32(now.Nanosecond())
			if trash == 0 {
				attr.Nlink--
				if _type == TypeFile && attr.Nlink == 0 && m.sid > 0 {
					opened = m.of.IsOpen(inode)
				}
			} else if attr.Parent > 0 {
				attr.Parent = trash
			}
		} else {
			logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name)
			trash = 0
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.HDel(ctx, m.entryKey(parent), name)
			if updateParent {
				pipe.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
			}
			if attr.Nlink > 0 {
				pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
				if trash > 0 {
					pipe.HSet(ctx, m.entryKey(trash), m.trashEntry(parent, inode, name), buf)
					if attr.Parent == 0 {
						pipe.HIncrBy(ctx, m.parentKey(inode), trash.String(), 1)
					}
				}
				if attr.Parent == 0 {
					pipe.HIncrBy(ctx, m.parentKey(inode), parent.String(), -1)
				}
			} else {
				switch _type {
				case TypeFile:
					if opened {
						pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
						pipe.SAdd(ctx, m.sustained(m.sid), strconv.Itoa(int(inode)))
					} else {
						pipe.ZAdd(ctx, m.delfiles(), redis.Z{Score: float64(now.Unix()), Member: m.toDelete(inode, attr.Length)})
						pipe.Del(ctx, m.inodeKey(inode))
						newSpace, newInode = -align4K(attr.Length), -1
						pipe.IncrBy(ctx, m.usedSpaceKey(), newSpace)
						pipe.Decr(ctx, m.totalInodesKey())
					}
				case TypeSymlink:
					pipe.Del(ctx, m.symKey(inode))
					fallthrough
				default:
					pipe.Del(ctx, m.inodeKey(inode))
					newSpace, newInode = -align4K(0), -1
					pipe.IncrBy(ctx, m.usedSpaceKey(), newSpace)
					pipe.Decr(ctx, m.totalInodesKey())
				}
				pipe.Del(ctx, m.xattrKey(inode))
				if attr.Parent == 0 {
					pipe.Del(ctx, m.parentKey(inode))
				}
			}
			return nil
		})

		return err
	}, m.inodeKey(parent), m.entryKey(parent))
	if err == nil && trash == 0 {
		if _type == TypeFile && attr.Nlink == 0 {
			m.fileDeleted(opened, parent.IsTrash(), inode, attr.Length)
		}
		m.updateStats(newSpace, newInode)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, newSpace, newInode)
	}
	return errno(err)
}

func (m *redisMeta) doBatchUnlink(ctx Context, parent Ino, entries []*Entry, delta *dirStat, skipCheckTrash ...bool) syscall.Errno {
	if len(entries) == 0 {
		return 0
	}
	var trash Ino
	if len(skipCheckTrash) == 0 || !skipCheckTrash[0] {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}

	type entryInfo struct {
		name      string
		inode     Ino
		typ       uint8
		trash     Ino
		attr      *Attr
		trashName string
		buf       []byte
	}
	type dNode struct {
		opened bool
		length uint64
	}

	// Each entry averages ~4 tx operations, so batch size should be 1000/4
	batchSize := 1000 / 4
	for len(entries) > 0 {
		if batchSize > len(entries) {
			batchSize = len(entries)
		}
		batch := entries[:batchSize]
		entries = entries[batchSize:]

		var entryInfos []*entryInfo
		var batchDirLength, batchDirSpace, batchDirInodes int64
		var batchFsSpace, batchFsInodes int64
		var deltas ugQuotaDeltas
		var delNodes map[Ino]*dNode
		watchKeys := []string{m.inodeKey(parent), m.entryKey(parent)}

		err := m.txn(ctx, func(tx *redis.Tx) error {
			batchDirLength, batchDirSpace, batchDirInodes = 0, 0, 0
			batchFsSpace, batchFsInodes = 0, 0
			deltas = make(ugQuotaDeltas)
			delNodes = make(map[Ino]*dNode)

			rs, err := tx.Get(ctx, m.inodeKey(parent)).Result()
			if err != nil {
				return err
			}
			var pattr Attr
			m.parseAttr([]byte(rs), &pattr)
			if pattr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
			if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
				return st
			}
			if (pattr.Flags&FlagAppend) != 0 || (pattr.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}

			entryKey := m.entryKey(parent)
			entryInfos = make([]*entryInfo, 0, len(batch))
			now := time.Now()
			enames := make([]string, 0, len(batch))
			for _, entry := range batch {
				enames = append(enames, string(entry.Name))
			}
			vals, err := tx.HMGet(ctx, entryKey, enames...).Result()
			if err != nil {
				return err
			}
			for idx, entry := range batch {
				val := vals[idx]
				if val == nil {
					continue
				}
				buf := []byte(val.(string))
				typ, ino := m.parseEntry(buf)
				if entry.Inode != ino || typ == TypeDirectory || (entry.Attr != nil && entry.Attr.Typ != typ) {
					continue
				}
				entryInfos = append(entryInfos, &entryInfo{
					name:  string(entry.Name),
					inode: ino,
					typ:   typ,
					trash: trash,
					buf:   buf,
				})
			}

			inodesSet := make(map[Ino]struct{}, len(entryInfos))
			for _, info := range entryInfos {
				if _, ok := inodesSet[info.inode]; !ok {
					inodesSet[info.inode] = struct{}{}
				}
			}

			// load inode attrs for all distinct inodes
			if len(inodesSet) > 0 {
				inodesList := make([]Ino, 0, len(inodesSet))
				keys := make([]string, 0, len(inodesSet))
				for ino := range inodesSet {
					inodesList = append(inodesList, ino)
					keys = append(keys, m.inodeKey(ino))
				}
				if err := tx.Watch(ctx, keys...).Err(); err != nil {
					return err
				}
				rs, err := tx.MGet(ctx, keys...).Result()
				if err != nil {
					return err
				}
				nodeMap := make(map[Ino]*Attr, len(inodesList))
				for i, v := range rs {
					if v == nil {
						continue
					}
					var a Attr
					m.parseAttr([]byte(v.(string)), &a)
					nodeMap[inodesList[i]] = &a
				}

				// iterate all target entries, apply basic checks and build info
				for _, info := range entryInfos {
					attr, ok := nodeMap[info.inode]
					if !ok {
						info.trash = 0
						info.attr = nil
						continue
					}
					if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid {
						return syscall.EACCES
					}
					if (attr.Flags&FlagAppend) != 0 || (attr.Flags&FlagImmutable) != 0 {
						return syscall.EPERM
					}
					if (attr.Flags & FlagSkipTrash) != 0 {
						info.trash = 0
					}
					info.attr = attr
				}
			}

			// check trash entries for hard links
			for _, info := range entryInfos {
				if info.attr == nil {
					continue
				}
				if info.trash > 0 && info.attr.Nlink > 1 {
					info.trashName = m.trashEntry(parent, info.inode, info.name)
					exists, err := tx.HExists(ctx, m.entryKey(info.trash), info.trashName).Result()
					if err != nil {
						return err
					}
					if exists {
						info.trash = 0
					}
				}
				// update ctime
				info.attr.Ctime = now.Unix()
				info.attr.Ctimensec = uint32(now.Nanosecond())
				if info.trash > 0 && info.attr.Parent > 0 {
					info.attr.Parent = info.trash
				}
				if info.trash == 0 && info.attr.Nlink > 0 {
					info.attr.Nlink--
				}
			}

			// check opened status for all inodes with Nlink == 0 after all decrements
			for _, info := range entryInfos {
				if info.attr != nil && info.trash == 0 && info.attr.Nlink == 0 && info.typ == TypeFile {
					opened := false
					if m.sid > 0 {
						opened = m.of.IsOpen(info.inode)
					}
					delNodes[info.inode] = &dNode{opened, info.attr.Length}
				}
			}

			var updateParent bool
			if !parent.IsTrash() && now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime {
				pattr.Mtime = now.Unix()
				pattr.Mtimensec = uint32(now.Nanosecond())
				pattr.Ctime = now.Unix()
				pattr.Ctimensec = uint32(now.Nanosecond())
				updateParent = true
			}

			nowUnix := now.Unix()
			visited := make(map[Ino]bool)
			visited[0] = true // skip dummyNode

			// collect data for batch operations
			var names []string
			var keys []string
			var sustained []interface{}
			var delfiles []redis.Z
			var inodes map[Ino]*Attr
			parentOps := make(map[string]map[string]int64)      // key -> field -> incr
			trashOps := make(map[string]map[string]interface{}) // key -> field -> value
			stats := make(map[string]int64)                     // key -> delta

			for _, info := range entryInfos {
				names = append(names, info.name)
				if info.attr == nil {
					continue
				}
				if info.typ == TypeFile {
					batchDirLength -= int64(info.attr.Length)
					batchDirSpace -= align4K(info.attr.Length)
				} else {
					batchDirSpace -= align4K(0)
				}
				batchDirInodes--

				if !visited[info.inode] {
					if info.attr.Nlink > 0 {
						if inodes == nil {
							inodes = make(map[Ino]*Attr)
						}
						inodes[info.inode] = info.attr
					} else {
						switch info.typ {
						case TypeFile:
							if dnode, ok := delNodes[info.inode]; ok && dnode.opened {
								if inodes == nil {
									inodes = make(map[Ino]*Attr)
								}
								inodes[info.inode] = info.attr
								sustained = append(sustained, strconv.Itoa(int(info.inode)))
							} else {
								delfiles = append(delfiles, redis.Z{
									Score:  float64(nowUnix),
									Member: m.toDelete(info.inode, info.attr.Length),
								})
								keys = append(keys, m.inodeKey(info.inode))
								batchFsSpace -= align4K(info.attr.Length)
								batchFsInodes--
								stats[m.usedSpaceKey()] -= align4K(info.attr.Length)
								stats[m.totalInodesKey()]--
								deltas.add(&ugQuotaDelta{
									Uid:    info.attr.Uid,
									Gid:    info.attr.Gid,
									Space:  -align4K(info.attr.Length),
									Inodes: -1,
								})
							}
						case TypeSymlink:
							keys = append(keys, m.symKey(info.inode))
							fallthrough
						default:
							keys = append(keys, m.inodeKey(info.inode))
							batchFsSpace -= align4K(0)
							batchFsInodes--
							stats[m.usedSpaceKey()] -= align4K(0)
							stats[m.totalInodesKey()]--
							deltas.add(&ugQuotaDelta{
								Uid:    info.attr.Uid,
								Gid:    info.attr.Gid,
								Space:  -align4K(0),
								Inodes: -1,
							})
						}
						keys = append(keys, m.xattrKey(info.inode))
						if info.attr.Parent == 0 {
							keys = append(keys, m.parentKey(info.inode))
						}
					}
					m.of.InvalidateChunk(info.inode, invalidateAttrOnly)
				}
				if info.attr.Nlink > 0 && info.attr.Parent == 0 {
					key := m.parentKey(info.inode)
					if parentOps[key] == nil {
						parentOps[key] = make(map[string]int64)
					}
					parentOps[key][parent.String()]--
				}

				if info.attr.Nlink > 0 && info.trash > 0 {
					if info.trashName == "" {
						info.trashName = m.trashEntry(parent, info.inode, info.name)
					}
					key := m.entryKey(info.trash)
					if trashOps[key] == nil {
						trashOps[key] = make(map[string]interface{})
					}
					trashOps[key][info.trashName] = info.buf
					if info.attr.Parent == 0 {
						key := m.parentKey(info.inode)
						if parentOps[key] == nil {
							parentOps[key] = make(map[string]int64)
						}
						parentOps[key][info.trash.String()]++
					}
				}
				visited[info.inode] = true
			}

			// execute batched operations using pipeline
			_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				if len(names) > 0 {
					pipe.HDel(ctx, m.entryKey(parent), names...)
				}
				for inode, attr := range inodes {
					pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
				}
				if len(sustained) > 0 {
					pipe.SAdd(ctx, m.sustained(m.sid), sustained...)
				}
				if len(delfiles) > 0 {
					pipe.ZAdd(ctx, m.delfiles(), delfiles...)
				}
				if len(keys) > 0 {
					pipe.Del(ctx, keys...)
				}
				for key, delta := range stats {
					if delta != 0 {
						pipe.IncrBy(ctx, key, delta)
					}
				}
				for key, fields := range parentOps {
					for field, incr := range fields {
						if incr != 0 {
							pipe.HIncrBy(ctx, key, field, incr)
						}
					}
				}
				for key, fields := range trashOps {
					for field, value := range fields {
						pipe.HSet(ctx, key, field, value)
					}
				}
				if updateParent {
					pipe.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
				}
				return nil
			})
			return err
		}, watchKeys...)

		if err != nil {
			return errno(err)
		}

		// outside of transaction: trigger data deletion callbacks
		for inode, info := range delNodes {
			m.fileDeleted(info.opened, parent.IsTrash(), inode, info.length)
		}

		delta.length += batchDirLength
		delta.space += batchDirSpace
		delta.inodes += batchDirInodes
		m.updateStats(batchFsSpace, batchFsInodes)
		for _, q := range deltas {
			m.updateUserGroupStat(ctx, q.Uid, q.Gid, q.Space, q.Inodes)
		}
	}
	return 0
}

func (m *redisMeta) doRmdir(ctx Context, parent Ino, name string, pinode *Ino, oldAttr *Attr, skipCheckTrash ...bool) syscall.Errno {
	var trash Ino
	if !(len(skipCheckTrash) == 1 && skipCheckTrash[0]) {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}
	var attr Attr
	err := m.txn(ctx, func(tx *redis.Tx) error {
		buf, err := tx.HGet(ctx, m.entryKey(parent), name).Bytes()
		if err == redis.Nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parent, name); e != nil {
				name = string(e.Name)
				buf = m.packEntry(e.Attr.Typ, e.Inode)
				err = nil
			}
		}
		if err != nil {
			return err
		}
		typ, inode := m.parseEntry(buf)
		if typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pinode != nil {
			*pinode = inode
		}
		if err = tx.Watch(ctx, m.inodeKey(inode), m.entryKey(inode)).Err(); err != nil {
			return err
		}

		rs, err := tx.MGet(ctx, m.inodeKey(parent), m.inodeKey(inode)).Result()
		if err != nil {
			return err
		}
		if rs[0] == nil {
			return redis.Nil
		}
		var pattr Attr
		m.parseAttr([]byte(rs[0].(string)), &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pattr.Flags&FlagAppend) != 0 || (pattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		now := time.Now()
		pattr.Nlink--
		pattr.Mtime = now.Unix()
		pattr.Mtimensec = uint32(now.Nanosecond())
		pattr.Ctime = now.Unix()
		pattr.Ctimensec = uint32(now.Nanosecond())

		cnt, err := tx.HLen(ctx, m.entryKey(inode)).Result()
		if err != nil {
			return err
		}
		if cnt > 0 {
			return syscall.ENOTEMPTY
		}
		if rs[1] != nil {
			m.parseAttr([]byte(rs[1].(string)), &attr)
			if oldAttr != nil {
				*oldAttr = attr
			}
			if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid {
				return syscall.EACCES
			}
			if (attr.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			if trash > 0 {
				attr.Ctime = now.Unix()
				attr.Ctimensec = uint32(now.Nanosecond())
				attr.Parent = trash
			}
		} else {
			logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name)
			trash = 0
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.HDel(ctx, m.entryKey(parent), name)
			if !parent.IsTrash() {
				pipe.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
			}
			if trash > 0 {
				pipe.Set(ctx, m.inodeKey(inode), m.marshal(&attr), 0)
				pipe.HSet(ctx, m.entryKey(trash), m.trashEntry(parent, inode, name), buf)
			} else {
				pipe.Del(ctx, m.inodeKey(inode))
				pipe.Del(ctx, m.xattrKey(inode))
				pipe.IncrBy(ctx, m.usedSpaceKey(), -align4K(0))
				pipe.Decr(ctx, m.totalInodesKey())
			}

			field := inode.String()
			pipe.HDel(ctx, m.dirDataLengthKey(), field)
			pipe.HDel(ctx, m.dirUsedSpaceKey(), field)
			pipe.HDel(ctx, m.dirUsedInodesKey(), field)
			pipe.HDel(ctx, m.dirQuotaKey(), field)
			pipe.HDel(ctx, m.dirQuotaUsedSpaceKey(), field)
			pipe.HDel(ctx, m.dirQuotaUsedInodesKey(), field)
			return nil
		})
		return err
	}, m.inodeKey(parent), m.entryKey(parent))
	if err == nil && trash == 0 {
		m.updateStats(-align4K(0), -1)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, -align4K(0), -1)
	}
	return errno(err)
}

func (m *redisMeta) doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode, tInode *Ino, attr, tAttr *Attr) syscall.Errno {
	exchange := flags == RenameExchange
	var opened bool
	var trash, dino Ino
	var dtyp uint8
	var tattr Attr
	var newSpace, newInode int64
	keys := []string{m.inodeKey(parentSrc), m.entryKey(parentSrc), m.inodeKey(parentDst), m.entryKey(parentDst)}
	if parentSrc.IsTrash() {
		// lock the parentDst
		keys[0], keys[2] = keys[2], keys[0]
	}
	if !exchange {
		if st := m.checkTrash(parentDst, &trash); st != 0 {
			return st
		}
	}
	err := m.txn(ctx, func(tx *redis.Tx) error {
		opened = false
		dino, dtyp = 0, 0
		tattr = Attr{}
		newSpace, newInode = 0, 0
		buf, err := tx.HGet(ctx, m.entryKey(parentSrc), nameSrc).Bytes()
		if err == redis.Nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parentSrc, nameSrc); e != nil {
				nameSrc = string(e.Name)
				buf = m.packEntry(e.Attr.Typ, e.Inode)
				err = nil
			}
		}
		if err != nil {
			return err
		}
		typ, ino := m.parseEntry(buf)
		if parentSrc == parentDst && nameSrc == nameDst {
			if inode != nil {
				*inode = ino
			}
			return nil
		}
		keys := []string{m.inodeKey(ino)}

		dbuf, err := tx.HGet(ctx, m.entryKey(parentDst), nameDst).Bytes()
		if err == redis.Nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parentDst, nameDst); e != nil {
				if (nameSrc != string(e.Name)) || parentDst != parentSrc {
					nameDst = string(e.Name)
					dbuf = m.packEntry(e.Attr.Typ, e.Inode)
					err = nil
				}
			}
		}
		if err != nil && err != redis.Nil {
			return err
		}
		if err == nil {
			if flags&RenameNoReplace != 0 {
				return syscall.EEXIST
			}
			dtyp, dino = m.parseEntry(dbuf)
			keys = append(keys, m.inodeKey(dino))
			if dtyp == TypeDirectory {
				keys = append(keys, m.entryKey(dino))
			}
			if !exchange {
				if st := m.checkTrash(parentDst, &trash); st != 0 {
					return st
				}
			}
		}
		if err := tx.Watch(ctx, keys...).Err(); err != nil {
			return err
		}
		if dino > 0 {
			if ino == dino {
				return errno(nil)
			}
			if exchange {
			} else if typ == TypeDirectory && dtyp != TypeDirectory {
				return syscall.ENOTDIR
			} else if typ != TypeDirectory && dtyp == TypeDirectory {
				return syscall.EISDIR
			}
		}

		keys = []string{m.inodeKey(parentSrc), m.inodeKey(parentDst), m.inodeKey(ino)}
		if dino > 0 {
			keys = append(keys, m.inodeKey(dino))
		}
		rs, err := tx.MGet(ctx, keys...).Result()
		if err != nil {
			return err
		}
		if rs[0] == nil || rs[1] == nil || rs[2] == nil {
			return redis.Nil
		}
		var sattr, dattr, iattr Attr
		m.parseAttr([]byte(rs[0].(string)), &sattr)
		if sattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if st := m.Access(ctx, parentSrc, MODE_MASK_W|MODE_MASK_X, &sattr); st != 0 {
			return st
		}
		m.parseAttr([]byte(rs[1].(string)), &dattr)
		if dattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if flags&RenameRestore == 0 && dattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parentDst, MODE_MASK_W|MODE_MASK_X, &dattr); st != 0 {
			return st
		}
		// TODO: check parentDst is a subdir of source node
		if ino == parentDst || ino == dattr.Parent {
			return syscall.EPERM
		}
		m.parseAttr([]byte(rs[2].(string)), &iattr)
		if (sattr.Flags&FlagAppend) != 0 || (sattr.Flags&FlagImmutable) != 0 || (dattr.Flags&FlagImmutable) != 0 || (iattr.Flags&FlagAppend) != 0 || (iattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if parentSrc != parentDst && sattr.Mode&0o1000 != 0 && ctx.Uid() != 0 &&
			ctx.Uid() != iattr.Uid && (ctx.Uid() != sattr.Uid || iattr.Typ == TypeDirectory) {
			return syscall.EACCES
		}

		var supdate, dupdate bool
		now := time.Now()
		if dino > 0 {
			if rs[3] == nil {
				logger.Warnf("no attribute for inode %d (%d, %s)", dino, parentDst, nameDst)
				trash = 0
			} else {
				m.parseAttr([]byte(rs[3].(string)), &tattr)
			}
			if (tattr.Flags&FlagAppend) != 0 || (tattr.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if (tattr.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			tattr.Ctime = now.Unix()
			tattr.Ctimensec = uint32(now.Nanosecond())
			if exchange {
				if parentSrc != parentDst {
					if dtyp == TypeDirectory {
						tattr.Parent = parentSrc
						dattr.Nlink--
						sattr.Nlink++
						supdate, dupdate = true, true
					} else if tattr.Parent > 0 {
						tattr.Parent = parentSrc
					}
				}
			} else {
				if dtyp == TypeDirectory {
					cnt, err := tx.HLen(ctx, m.entryKey(dino)).Result()
					if err != nil {
						return err
					}
					if cnt != 0 {
						return syscall.ENOTEMPTY
					}
					dattr.Nlink--
					dupdate = true
					if trash > 0 {
						tattr.Parent = trash
					}
				} else {
					if trash == 0 {
						tattr.Nlink--
						if dtyp == TypeFile && tattr.Nlink == 0 && m.sid > 0 {
							opened = m.of.IsOpen(dino)
						}
						defer func() { m.of.InvalidateChunk(dino, invalidateAttrOnly) }()
					} else if tattr.Parent > 0 {
						tattr.Parent = trash
					}
				}
			}
			if ctx.Uid() != 0 && dattr.Mode&01000 != 0 && ctx.Uid() != dattr.Uid && ctx.Uid() != tattr.Uid {
				return syscall.EACCES
			}
		} else {
			if exchange {
				return syscall.ENOENT
			}
		}
		if ctx.Uid() != 0 && sattr.Mode&01000 != 0 && ctx.Uid() != sattr.Uid && ctx.Uid() != iattr.Uid {
			return syscall.EACCES
		}

		if parentSrc != parentDst {
			if typ == TypeDirectory {
				iattr.Parent = parentDst
				sattr.Nlink--
				dattr.Nlink++
				supdate, dupdate = true, true
			} else if iattr.Parent > 0 {
				iattr.Parent = parentDst
			}
		}
		if supdate || now.Sub(time.Unix(sattr.Mtime, int64(sattr.Mtimensec))) >= m.conf.SkipDirMtime {
			sattr.Mtime = now.Unix()
			sattr.Mtimensec = uint32(now.Nanosecond())
			sattr.Ctime = now.Unix()
			sattr.Ctimensec = uint32(now.Nanosecond())
			supdate = true
		}
		if dupdate || now.Sub(time.Unix(dattr.Mtime, int64(dattr.Mtimensec))) >= m.conf.SkipDirMtime {
			dattr.Mtime = now.Unix()
			dattr.Mtimensec = uint32(now.Nanosecond())
			dattr.Ctime = now.Unix()
			dattr.Ctimensec = uint32(now.Nanosecond())
			dupdate = true
		}
		iattr.Ctime = now.Unix()
		iattr.Ctimensec = uint32(now.Nanosecond())
		if inode != nil {
			*inode = ino
		}
		if attr != nil {
			*attr = iattr
		}
		if dino > 0 {
			*tInode = dino
			*tAttr = tattr
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			if exchange { // dbuf, tattr are valid
				pipe.Set(ctx, m.inodeKey(dino), m.marshal(&tattr), 0)
				pipe.HSet(ctx, m.entryKey(parentSrc), nameSrc, dbuf)
				if parentSrc != parentDst && tattr.Parent == 0 {
					pipe.HIncrBy(ctx, m.parentKey(dino), parentSrc.String(), 1)
					pipe.HIncrBy(ctx, m.parentKey(dino), parentDst.String(), -1)
				}
			} else {
				pipe.HDel(ctx, m.entryKey(parentSrc), nameSrc)
				if dino > 0 {
					if trash > 0 {
						pipe.Set(ctx, m.inodeKey(dino), m.marshal(&tattr), 0)
						pipe.HSet(ctx, m.entryKey(trash), m.trashEntry(parentDst, dino, nameDst), dbuf)
						if tattr.Parent == 0 {
							pipe.HIncrBy(ctx, m.parentKey(dino), trash.String(), 1)
							pipe.HIncrBy(ctx, m.parentKey(dino), parentDst.String(), -1)
						}
					} else if dtyp != TypeDirectory && tattr.Nlink > 0 {
						pipe.Set(ctx, m.inodeKey(dino), m.marshal(&tattr), 0)
						if tattr.Parent == 0 {
							pipe.HIncrBy(ctx, m.parentKey(dino), parentDst.String(), -1)
						}
					} else {
						if dtyp == TypeFile {
							if opened {
								pipe.Set(ctx, m.inodeKey(dino), m.marshal(&tattr), 0)
								pipe.SAdd(ctx, m.sustained(m.sid), strconv.Itoa(int(dino)))
							} else {
								pipe.ZAdd(ctx, m.delfiles(), redis.Z{Score: float64(now.Unix()), Member: m.toDelete(dino, tattr.Length)})
								pipe.Del(ctx, m.inodeKey(dino))
								newSpace, newInode = -align4K(tattr.Length), -1
								pipe.IncrBy(ctx, m.usedSpaceKey(), newSpace)
								pipe.Decr(ctx, m.totalInodesKey())
							}
						} else {
							if dtyp == TypeSymlink {
								pipe.Del(ctx, m.symKey(dino))
							}
							pipe.Del(ctx, m.inodeKey(dino))
							newSpace, newInode = -align4K(0), -1
							pipe.IncrBy(ctx, m.usedSpaceKey(), newSpace)
							pipe.Decr(ctx, m.totalInodesKey())
						}
						pipe.Del(ctx, m.xattrKey(dino))
						if tattr.Parent == 0 {
							pipe.Del(ctx, m.parentKey(dino))
						}
					}
					if dtyp == TypeDirectory {
						field := dino.String()
						pipe.HDel(ctx, m.dirQuotaKey(), field)
						pipe.HDel(ctx, m.dirQuotaUsedSpaceKey(), field)
						pipe.HDel(ctx, m.dirQuotaUsedInodesKey(), field)
					}
				}
			}
			if parentDst != parentSrc {
				if !parentSrc.IsTrash() && supdate {
					pipe.Set(ctx, m.inodeKey(parentSrc), m.marshal(&sattr), 0)
				}
				if iattr.Parent == 0 {
					pipe.HIncrBy(ctx, m.parentKey(ino), parentDst.String(), 1)
					pipe.HIncrBy(ctx, m.parentKey(ino), parentSrc.String(), -1)
				}
			}
			pipe.Set(ctx, m.inodeKey(ino), m.marshal(&iattr), 0)
			pipe.HSet(ctx, m.entryKey(parentDst), nameDst, buf)
			if dupdate {
				pipe.Set(ctx, m.inodeKey(parentDst), m.marshal(&dattr), 0)
			}
			return nil
		})
		return err
	}, keys...)
	if err == nil && !exchange && trash == 0 {
		if dino > 0 && dtyp == TypeFile && tattr.Nlink == 0 {
			m.fileDeleted(opened, false, dino, tattr.Length)
		}
		m.updateStats(newSpace, newInode)
		m.updateUserGroupStat(ctx, tattr.Uid, tattr.Gid, newSpace, newInode)
	}
	return errno(err)
}

func (m *redisMeta) doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		rs, err := tx.MGet(ctx, m.inodeKey(parent), m.inodeKey(inode)).Result()
		if err != nil {
			return err
		}
		if rs[0] == nil || rs[1] == nil {
			return redis.Nil
		}
		var pattr, iattr Attr
		m.parseAttr([]byte(rs[0].(string)), &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if pattr.Flags&FlagImmutable != 0 {
			return syscall.EPERM
		}
		var updateParent bool
		now := time.Now()
		if now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime {
			pattr.Mtime = now.Unix()
			pattr.Mtimensec = uint32(now.Nanosecond())
			pattr.Ctime = now.Unix()
			pattr.Ctimensec = uint32(now.Nanosecond())
			updateParent = true
		}
		m.parseAttr([]byte(rs[1].(string)), &iattr)
		if iattr.Typ == TypeDirectory {
			return syscall.EPERM
		}
		if (iattr.Flags&FlagAppend) != 0 || (iattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		oldParent := iattr.Parent
		iattr.Parent = 0
		iattr.Ctime = now.Unix()
		iattr.Ctimensec = uint32(now.Nanosecond())
		iattr.Nlink++

		err = tx.HGet(ctx, m.entryKey(parent), name).Err()
		if err != nil && err != redis.Nil {
			return err
		} else if err == nil {
			return syscall.EEXIST
		} else if err == redis.Nil && m.conf.CaseInsensi && m.resolveCase(ctx, parent, name) != nil {
			return syscall.EEXIST
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.HSet(ctx, m.entryKey(parent), name, m.packEntry(iattr.Typ, inode))
			if updateParent {
				pipe.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
			}
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(&iattr), 0)
			if oldParent > 0 {
				pipe.HIncrBy(ctx, m.parentKey(inode), oldParent.String(), 1)
			}
			pipe.HIncrBy(ctx, m.parentKey(inode), parent.String(), 1)
			return nil
		})
		if err == nil && attr != nil {
			*attr = iattr
		}
		return err
	}, m.inodeKey(parent), m.entryKey(parent), m.inodeKey(inode)))
}

func (m *redisMeta) fillAttr(ctx Context, es []*Entry) error {
	if len(es) == 0 {
		return nil
	}
	var keys = make([]string, len(es))
	for i, e := range es {
		keys[i] = m.inodeKey(e.Inode)
	}
	rs, err := m.rdb.MGet(ctx, keys...).Result()
	if err != nil {
		return err
	}
	for j, re := range rs {
		if re != nil {
			if a, ok := re.(string); ok {
				m.parseAttr([]byte(a), es[j].Attr)
				m.of.Update(es[j].Inode, es[j].Attr)
			}
		}
	}
	return nil
}

func (m *redisMeta) doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry, limit int) syscall.Errno {
	var stop = errors.New("stop")
	err := m.hscan(ctx, m.entryKey(inode), func(keys []string) error {
		newEntries := make([]Entry, len(keys)/2)
		newAttrs := make([]Attr, len(keys)/2)
		for i := 0; i < len(keys); i += 2 {
			typ, ino := m.parseEntry([]byte(keys[i+1]))
			if keys[i] == "" {
				logger.Errorf("Corrupt entry with empty name: inode %d parent %d", ino, inode)
				continue
			}
			ent := &newEntries[i/2]
			ent.Inode = ino
			ent.Name = []byte(keys[i])
			ent.Attr = &newAttrs[i/2]
			ent.Attr.Typ = typ
			*entries = append(*entries, ent)
			if limit > 0 && len(*entries) >= limit {
				return stop
			}
		}
		return nil
	})
	if errors.Is(err, stop) {
		err = nil
	}
	if err != nil {
		return errno(err)
	}

	if plus != 0 && len(*entries) != 0 {
		batchSize := 4096
		nEntries := len(*entries)
		if nEntries <= batchSize {
			err = m.fillAttr(ctx, *entries)
		} else {
			indexCh := make(chan []*Entry, 10)
			var wg sync.WaitGroup
			for i := 0; i < 2; i++ {
				wg.Add(1)
				go func() {
					defer wg.Done()
					for es := range indexCh {
						e := m.fillAttr(ctx, es)
						if e != nil {
							err = e
							break
						}
					}
				}()
			}
			for i := 0; i < nEntries; i += batchSize {
				if i+batchSize > nEntries {
					indexCh <- (*entries)[i:]
				} else {
					indexCh <- (*entries)[i : i+batchSize]
				}
			}
			close(indexCh)
			wg.Wait()
		}
		if err != nil {
			return errno(err)
		}
	}
	return 0
}

func (m *redisMeta) doCleanStaleSession(sid uint64) error {
	var fail bool
	// release locks
	var ctx = Background()
	ssid := strconv.FormatInt(int64(sid), 10)
	key := m.lockedKey(sid)
	if inodes, err := m.rdb.SMembers(ctx, key).Result(); err == nil {
		for _, k := range inodes {
			owners, err := m.rdb.HKeys(ctx, k).Result()
			if err != nil {
				logger.Warnf("HKeys %s: %s", k, err)
				fail = true
				continue
			}
			var fields []string
			for _, o := range owners {
				if strings.Split(o, "_")[0] == ssid {
					fields = append(fields, o)
				}
			}
			if len(fields) > 0 {
				if err = m.rdb.HDel(ctx, k, fields...).Err(); err != nil {
					logger.Warnf("HDel %s %s: %s", k, fields, err)
					fail = true
					continue
				}
			}
			if err = m.rdb.SRem(ctx, key, k).Err(); err != nil {
				logger.Warnf("SRem %s %s: %s", key, k, err)
				fail = true
			}
		}
	} else {
		logger.Warnf("SMembers %s: %s", key, err)
		fail = true
	}

	key = m.sustained(sid)
	if inodes, err := m.rdb.SMembers(ctx, key).Result(); err == nil {
		for _, sinode := range inodes {
			inode, _ := strconv.ParseUint(sinode, 10, 64)
			if err = m.doDeleteSustainedInode(sid, Ino(inode)); err != nil {
				logger.Warnf("Delete sustained inode %d of sid %d: %s", inode, sid, err)
				fail = true
			}
		}
	} else {
		logger.Warnf("SMembers %s: %s", key, err)
		fail = true
	}

	if !fail {
		if err := m.rdb.HDel(ctx, m.sessionInfos(), ssid).Err(); err != nil {
			logger.Warnf("HDel sessionInfos %s: %s", ssid, err)
			fail = true
		}
	}
	if fail {
		return fmt.Errorf("failed to clean up sid %d", sid)
	} else {
		if n, err := m.rdb.ZRem(ctx, m.allSessions(), ssid).Result(); err != nil {
			return err
		} else if n == 1 {
			return nil
		}
		return m.rdb.ZRem(ctx, legacySessions, ssid).Err()
	}
}

func (m *redisMeta) doFindStaleSessions(limit int) ([]uint64, error) {
	vals, err := m.rdb.ZRangeByScore(Background(), m.allSessions(), &redis.ZRangeBy{
		Min:   "-inf",
		Max:   strconv.FormatInt(time.Now().Unix(), 10),
		Count: int64(limit)}).Result()
	if err != nil {
		return nil, err
	}
	sids := make([]uint64, len(vals))
	for i, v := range vals {
		sids[i], _ = strconv.ParseUint(v, 10, 64)
	}
	limit -= len(sids)
	if limit <= 0 {
		return sids, nil
	}

	// check clients with version before 1.0-beta3 as well
	vals, err = m.rdb.ZRangeByScore(Background(), legacySessions, &redis.ZRangeBy{
		Min:   "-inf",
		Max:   strconv.FormatInt(time.Now().Add(time.Minute*-5).Unix(), 10),
		Count: int64(limit)}).Result()
	if err != nil {
		logger.Errorf("Scan stale legacy sessions: %s", err)
		return sids, nil
	}
	for _, v := range vals {
		sid, _ := strconv.ParseUint(v, 10, 64)
		sids = append(sids, sid)
	}
	return sids, nil
}

func (m *redisMeta) doRefreshSession() error {
	ctx := Background()
	ssid := strconv.FormatUint(m.sid, 10)
	// we have to check sessionInfo here because the operations are not within a transaction
	ok, err := m.rdb.HExists(ctx, m.sessionInfos(), ssid).Result()
	if err == nil && !ok {
		logger.Warnf("Session %d was stale and cleaned up, but now it comes back again", m.sid)
		err = m.rdb.HSet(ctx, m.sessionInfos(), m.sid, m.newSessionInfo()).Err()
	}
	if err != nil {
		return err
	}
	return m.rdb.ZAdd(ctx, m.allSessions(), redis.Z{
		Score:  float64(m.expireTime()),
		Member: ssid}).Err()
}

func (m *redisMeta) doDeleteSustainedInode(sid uint64, inode Ino) error {
	var attr Attr
	var ctx = Background()
	var newSpace int64
	err := m.txn(ctx, func(tx *redis.Tx) error {
		newSpace = 0
		a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes()
		if err == redis.Nil {
			return nil
		}
		if err != nil {
			return err
		}
		m.parseAttr(a, &attr)
		newSpace = -align4K(attr.Length)
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.ZAdd(ctx, m.delfiles(), redis.Z{Score: float64(time.Now().Unix()), Member: m.toDelete(inode, attr.Length)})
			pipe.Del(ctx, m.inodeKey(inode))
			pipe.IncrBy(ctx, m.usedSpaceKey(), newSpace)
			pipe.Decr(ctx, m.totalInodesKey())
			pipe.SRem(ctx, m.sustained(sid), strconv.Itoa(int(inode)))
			return nil
		})
		return err
	}, m.inodeKey(inode))
	if err == nil && newSpace < 0 {
		m.updateStats(newSpace, -1)
		m.tryDeleteFileData(inode, attr.Length, false)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, newSpace, 0)
	}
	return err
}

func (m *redisMeta) doRead(ctx Context, inode Ino, indx uint32) ([]*slice, syscall.Errno) {
	vals, err := m.rdb.LRange(ctx, m.chunkKey(inode, indx), 0, -1).Result()
	if err != nil {
		return nil, errno(err)
	}
	return readSlices(vals), 0
}

func (m *redisMeta) doList(ctx Context, inode Ino) ([]*slice, syscall.Errno) {
	var attr Attr
	err := m.doGetAttr(ctx, inode, &attr)
	if err != 0 {
		return nil, err
	}
	p := m.rdb.Pipeline()
	var slices []*slice
	var indx uint32
	for uint64(indx)*ChunkSize < attr.Length {
		for i := 0; uint64(indx)*ChunkSize < attr.Length && i < 1000; i++ {
			_ = p.LRange(ctx, m.chunkKey(inode, indx), 0, -1)
			indx++
		}
		cmds, err := p.Exec(ctx)
		if err != nil {
			logger.Warnf("list of inode %d: %s", inode, err)
			return nil, errno(err)
		}
		for _, cmd := range cmds {
			val := cmd.(*redis.StringSliceCmd).Val()
			if len(val) == 0 {
				continue
			}
			ss := readSlices(val)
			if ss == nil {
				continue
			}
			slices = append(slices, ss...)
		}
	}

	return slices, 0
}

func (m *redisMeta) doWrite(ctx Context, inode Ino, indx uint32, off uint32, slice Slice, mtime time.Time, numSlices *int, delta *dirStat, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		*delta = dirStat{}
		*attr = Attr{}
		a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, attr)
		if attr.Typ != TypeFile {
			return syscall.EPERM
		}
		newleng := uint64(indx)*ChunkSize + uint64(off) + uint64(slice.Len)
		if newleng > attr.Length {
			delta.length = int64(newleng - attr.Length)
			delta.space = align4K(newleng) - align4K(attr.Length)
			attr.Length = newleng
		}
		if err := m.checkQuota(ctx, delta.space, 0, attr.Uid, attr.Gid, m.getParents(ctx, tx, inode, attr.Parent)...); err != 0 {
			return err
		}
		now := time.Now()
		attr.Mtime = mtime.Unix()
		attr.Mtimensec = uint32(mtime.Nanosecond())
		attr.Ctime = now.Unix()
		attr.Ctimensec = uint32(now.Nanosecond())

		var rpush *redis.IntCmd
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			rpush = pipe.RPush(ctx, m.chunkKey(inode, indx), marshalSlice(off, slice.Id, slice.Size, slice.Off, slice.Len))
			// most of chunk are used by single inode, so use that as the default (1 == not exists)
			// pipe.Incr(ctx, r.sliceKey(slice.ID, slice.Size))
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
			if delta.space > 0 {
				pipe.IncrBy(ctx, m.usedSpaceKey(), delta.space)
			}
			return nil
		})
		if err == nil {
			*numSlices = int(rpush.Val())
		}
		return err
	}, m.inodeKey(inode)))
}

func (m *redisMeta) CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied, outLength *uint64) syscall.Errno {
	defer m.timeit("CopyFileRange", time.Now())
	f := m.of.find(fout)
	if f != nil {
		f.Lock()
		defer f.Unlock()
	}
	var newLength, newSpace int64
	var sattr, attr Attr
	defer func() { m.of.InvalidateChunk(fout, invalidateAllChunks) }()
	err := m.txn(ctx, func(tx *redis.Tx) error {
		newLength, newSpace = 0, 0
		rs, err := tx.MGet(ctx, m.inodeKey(fin), m.inodeKey(fout)).Result()
		if err != nil {
			return err
		}
		if rs[0] == nil || rs[1] == nil {
			return redis.Nil
		}
		sattr = Attr{}
		m.parseAttr([]byte(rs[0].(string)), &sattr)
		if sattr.Typ != TypeFile {
			return syscall.EINVAL
		}
		if offIn >= sattr.Length {
			if copied != nil {
				*copied = 0
			}
			return nil
		}
		size := size
		if offIn+size > sattr.Length {
			size = sattr.Length - offIn
		}
		attr = Attr{}
		m.parseAttr([]byte(rs[1].(string)), &attr)
		if attr.Typ != TypeFile {
			return syscall.EINVAL
		}
		if (attr.Flags&FlagImmutable) != 0 || (attr.Flags&FlagAppend) != 0 {
			return syscall.EPERM
		}

		newleng := offOut + size
		if newleng > attr.Length {
			newLength = int64(newleng - attr.Length)
			newSpace = align4K(newleng) - align4K(attr.Length)
			attr.Length = newleng
		}
		if err := m.checkQuota(ctx, newSpace, 0, attr.Uid, attr.Gid, m.getParents(ctx, tx, fout, attr.Parent)...); err != 0 {
			return err
		}
		now := time.Now()
		attr.Mtime = now.Unix()
		attr.Mtimensec = uint32(now.Nanosecond())
		attr.Ctime = now.Unix()
		attr.Ctimensec = uint32(now.Nanosecond())
		if outLength != nil {
			*outLength = attr.Length
		}

		var vals [][]string
		for i := offIn / ChunkSize; i <= (offIn+size)/ChunkSize; i++ {
			val, err := tx.LRange(ctx, m.chunkKey(fin, uint32(i)), 0, -1).Result()
			if err != nil {
				return err
			}
			vals = append(vals, val)
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			coff := offIn / ChunkSize * ChunkSize
			for _, sv := range vals {
				// Add a zero chunk for hole
				ss := readSlices(sv)
				if ss == nil {
					return syscall.EIO
				}
				ss = append([]*slice{{len: ChunkSize}}, ss...)
				cs := buildSlice(ss)
				tpos := coff
				for _, s := range cs {
					pos := tpos
					tpos += uint64(s.Len)
					if pos < offIn+size && pos+uint64(s.Len) > offIn {
						if pos < offIn {
							dec := offIn - pos
							s.Off += uint32(dec)
							pos += dec
							s.Len -= uint32(dec)
						}
						if pos+uint64(s.Len) > offIn+size {
							dec := pos + uint64(s.Len) - (offIn + size)
							s.Len -= uint32(dec)
						}
						doff := pos - offIn + offOut
						indx := uint32(doff / ChunkSize)
						dpos := uint32(doff % ChunkSize)
						if dpos+s.Len > ChunkSize {
							pipe.RPush(ctx, m.chunkKey(fout, indx), marshalSlice(dpos, s.Id, s.Size, s.Off, ChunkSize-dpos))
							if s.Id > 0 {
								pipe.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.Id, s.Size), 1)
							}

							skip := ChunkSize - dpos
							pipe.RPush(ctx, m.chunkKey(fout, indx+1), marshalSlice(0, s.Id, s.Size, s.Off+skip, s.Len-skip))
							if s.Id > 0 {
								pipe.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.Id, s.Size), 1)
							}
						} else {
							pipe.RPush(ctx, m.chunkKey(fout, indx), marshalSlice(dpos, s.Id, s.Size, s.Off, s.Len))
							if s.Id > 0 {
								pipe.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.Id, s.Size), 1)
							}
						}
					}
				}
				coff += ChunkSize
			}
			pipe.Set(ctx, m.inodeKey(fout), m.marshal(&attr), 0)
			if newSpace > 0 {
				pipe.IncrBy(ctx, m.usedSpaceKey(), newSpace)
			}
			return nil
		})
		if err == nil {
			if copied != nil {
				*copied = size
			}
		}
		return err
	}, m.inodeKey(fout), m.inodeKey(fin))
	if err == nil {
		m.updateParentStat(ctx, fout, attr.Parent, newLength, newSpace)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, newSpace, 0)
	}
	return errno(err)
}

func (m *redisMeta) getParents(ctx Context, tx *redis.Tx, inode, parent Ino) []Ino {
	if parent > 0 {
		return []Ino{parent}
	}
	vals, err := tx.HGetAll(ctx, m.parentKey(inode)).Result()
	if err != nil {
		logger.Warnf("Scan parent key of inode %d: %s", inode, err)
		return nil
	}
	ps := make([]Ino, 0, len(vals))
	for k, v := range vals {
		if n, _ := strconv.Atoi(v); n > 0 {
			ino, _ := strconv.ParseUint(k, 10, 64)
			ps = append(ps, Ino(ino))
		}
	}
	return ps
}

func (m *redisMeta) doGetParents(ctx Context, inode Ino) map[Ino]int {
	vals, err := m.rdb.HGetAll(ctx, m.parentKey(inode)).Result()
	if err != nil {
		logger.Warnf("Scan parent key of inode %d: %s", inode, err)
		return nil
	}
	ps := make(map[Ino]int)
	for k, v := range vals {
		if n, _ := strconv.Atoi(v); n > 0 {
			ino, _ := strconv.ParseUint(k, 10, 64)
			ps[Ino(ino)] = n
		}
	}
	return ps
}

func (m *redisMeta) doSyncDirStat(ctx Context, ino Ino) (*dirStat, syscall.Errno) {
	if m.conf.ReadOnly {
		return nil, syscall.EROFS
	}
	field := ino.String()
	stat, st := m.calcDirStat(ctx, ino)
	if st != 0 {
		return nil, st
	}
	err := m.txn(ctx, func(tx *redis.Tx) error {
		n, err := tx.Exists(ctx, m.inodeKey(ino)).Result()
		if err != nil {
			return err
		}
		if n <= 0 {
			return syscall.ENOENT
		}
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.HSet(ctx, m.dirDataLengthKey(), field, stat.length)
			pipe.HSet(ctx, m.dirUsedSpaceKey(), field, stat.space)
			pipe.HSet(ctx, m.dirUsedInodesKey(), field, stat.inodes)
			return nil
		})
		return err
	}, m.inodeKey(ino))
	return stat, errno(err)
}

func (m *redisMeta) doUpdateDirStat(ctx Context, batch map[Ino]dirStat) error {
	spaceKey := m.dirUsedSpaceKey()
	lengthKey := m.dirDataLengthKey()
	inodesKey := m.dirUsedInodesKey()
	nonexist := make(map[Ino]bool, 0)
	statList := make([]Ino, 0, len(batch))
	pipeline := m.rdb.Pipeline()
	for ino := range batch {
		pipeline.HExists(ctx, spaceKey, ino.String())
		statList = append(statList, ino)
	}
	rets, err := pipeline.Exec(ctx)
	if err != nil {
		return err
	}
	for i, ret := range rets {
		if ret.Err() != nil {
			return ret.Err()
		}
		if exist, _ := ret.(*redis.BoolCmd).Result(); !exist {
			nonexist[statList[i]] = true
		}
	}
	if len(nonexist) > 0 {
		wg := m.parallelSyncDirStat(ctx, nonexist)
		defer wg.Wait()
	}

	for _, group := range m.groupBatch(batch, 1000) {
		_, err := m.rdb.Pipelined(ctx, func(pipe redis.Pipeliner) error {
			for _, ino := range group {
				field := ino.String()
				if nonexist[ino] {
					continue
				}
				stat := batch[ino]
				if stat.length != 0 {
					pipe.HIncrBy(ctx, lengthKey, field, stat.length)
				}
				if stat.space != 0 {
					pipe.HIncrBy(ctx, spaceKey, field, stat.space)
				}
				if stat.inodes != 0 {
					pipe.HIncrBy(ctx, inodesKey, field, stat.inodes)
				}
			}
			return nil
		})
		if err != nil {
			return err
		}
	}
	return nil
}

func (m *redisMeta) doGetDirStat(ctx Context, ino Ino, trySync bool) (*dirStat, syscall.Errno) {
	field := ino.String()
	dataLength, errLength := m.rdb.HGet(ctx, m.dirDataLengthKey(), field).Int64()
	if errLength != nil && errLength != redis.Nil {
		return nil, errno(errLength)
	}
	usedSpace, errSpace := m.rdb.HGet(ctx, m.dirUsedSpaceKey(), field).Int64()
	if errSpace != nil && errSpace != redis.Nil {
		return nil, errno(errSpace)
	}
	usedInodes, errInodes := m.rdb.HGet(ctx, m.dirUsedInodesKey(), field).Int64()
	if errInodes != nil && errSpace != redis.Nil {
		return nil, errno(errInodes)
	}
	if errLength != redis.Nil && errSpace != redis.Nil && errInodes != redis.Nil {
		if trySync && (dataLength < 0 || usedSpace < 0 || usedInodes < 0) {
			return m.doSyncDirStat(ctx, ino)
		}
		return &dirStat{dataLength, usedSpace, usedInodes}, 0
	}

	if trySync {
		return m.doSyncDirStat(ctx, ino)
	}
	return nil, 0
}

// For now only deleted files
func (m *redisMeta) cleanupLegacies() {
	for {
		utils.SleepWithJitter(time.Minute)
		rng := &redis.ZRangeBy{Min: "-inf", Max: strconv.FormatInt(time.Now().Add(-time.Hour).Unix(), 10), Count: 1000}
		vals, err := m.rdb.ZRangeByScore(Background(), m.delfiles(), rng).Result()
		if err != nil {
			continue
		}
		var count int
		for _, v := range vals {
			ps := strings.Split(v, ":")
			if len(ps) != 2 {
				inode, _ := strconv.ParseUint(ps[0], 10, 64)
				var length uint64 = 1 << 30
				if len(ps) > 2 {
					length, _ = strconv.ParseUint(ps[2], 10, 64)
				}
				logger.Infof("cleanup legacy delfile inode %d with %d bytes (%s)", inode, length, v)
				m.doDeleteFileData_(Ino(inode), length, v)
				count++
			}
		}
		if count == 0 {
			return
		}
	}
}

func (m *redisMeta) doFindDeletedFiles(ts int64, limit int) (map[Ino]uint64, error) {
	rng := &redis.ZRangeBy{Min: "-inf", Max: strconv.FormatInt(ts, 10), Count: int64(limit)}
	vals, err := m.rdb.ZRangeByScore(Background(), m.delfiles(), rng).Result()
	if err != nil {
		return nil, err
	}
	files := make(map[Ino]uint64, len(vals))
	for _, v := range vals {
		ps := strings.Split(v, ":")
		if len(ps) != 2 { // will be cleaned up as legacy
			continue
		}
		inode, _ := strconv.ParseUint(ps[0], 10, 64)
		files[Ino(inode)], _ = strconv.ParseUint(ps[1], 10, 64)
	}
	return files, nil
}

func (m *redisMeta) doCleanupSlices(ctx Context, count *uint64) error {
	return m.hscan(ctx, m.sliceRefs(), func(keys []string) error {
		for i := 0; i < len(keys); i += 2 {
			key, val := keys[i], keys[i+1]
			if strings.HasPrefix(val, "-") { // < 0
				ps := strings.Split(key, "_")
				if len(ps) == 2 {
					id, _ := strconv.ParseUint(ps[0][1:], 10, 64)
					size, _ := strconv.ParseUint(ps[1], 10, 32)
					if id > 0 && size > 0 {
						m.deleteSlice(id, uint32(size))
						if count != nil {
							*count++
						}
					}
				}
			} else if val == "0" {
				m.cleanupZeroRef(key)
			}
			if ctx.Canceled() {
				return ctx.Err()
			}
		}
		return nil
	})
}

func (m *redisMeta) cleanupZeroRef(key string) {
	var ctx = Background()
	_ = m.txn(ctx, func(tx *redis.Tx) error {
		v, err := tx.HGet(ctx, m.sliceRefs(), key).Int()
		if err != nil && err != redis.Nil {
			return err
		}
		if v != 0 {
			return syscall.EINVAL
		}
		_, err = tx.TxPipelined(ctx, func(p redis.Pipeliner) error {
			p.HDel(ctx, m.sliceRefs(), key)
			return nil
		})
		return err
	}, m.sliceRefs())
}

func (m *redisMeta) cleanupLeakedChunks(delete bool) {
	var ctx = Background()
	prefix := len(m.prefix)
	_ = m.scan(ctx, "c*", func(ckeys []string) error {
		var ikeys []string
		var rs []*redis.IntCmd
		p := m.rdb.Pipeline()
		for _, k := range ckeys {
			ps := strings.Split(k, "_")
			if len(ps) != 2 {
				continue
			}
			ino, _ := strconv.ParseUint(ps[0][prefix+1:], 10, 64)
			ikeys = append(ikeys, k)
			rs = append(rs, p.Exists(ctx, m.inodeKey(Ino(ino))))
		}
		if len(rs) > 0 {
			cmds, err := p.Exec(ctx)
			if err != nil {
				for _, c := range cmds {
					if c.Err() != nil {
						logger.Errorf("Check inodes with command %s: %s", c.String(), c.Err())
					}
				}
				return err
			}
			for i, rr := range rs {
				if rr.Val() == 0 {
					key := ikeys[i]
					logger.Infof("found leaked chunk %s", key)
					if delete {
						ps := strings.Split(key, "_")
						ino, _ := strconv.ParseUint(ps[0][prefix+1:], 10, 64)
						indx, _ := strconv.Atoi(ps[1])
						_ = m.deleteChunk(Ino(ino), uint32(indx))
					}
				}
			}
		}
		return nil
	})
}

func (m *redisMeta) cleanupOldSliceRefs(delete bool) {
	var ctx = Background()
	_ = m.scan(ctx, "k*", func(ckeys []string) error {
		values, err := m.rdb.MGet(ctx, ckeys...).Result()
		if err != nil {
			logger.Warnf("mget slices: %s", err)
			return err
		}
		var todel []string
		for i, v := range values {
			if v == nil {
				continue
			}
			if strings.HasPrefix(v.(string), m.prefix+"-") || v == "0" { // < 0
				// the objects will be deleted by gc
				todel = append(todel, ckeys[i])
			} else {
				vv, _ := strconv.Atoi(v.(string))
				m.rdb.HIncrBy(ctx, m.sliceRefs(), ckeys[i], int64(vv))
				m.rdb.DecrBy(ctx, ckeys[i], int64(vv))
				logger.Infof("move refs %d for slice %s", vv, ckeys[i])
			}
		}
		if delete && len(todel) > 0 {
			m.rdb.Del(ctx, todel...)
		}
		return nil
	})
}

func (m *redisMeta) toDelete(inode Ino, length uint64) string {
	return inode.String() + ":" + strconv.Itoa(int(length))
}

func (m *redisMeta) deleteChunk(inode Ino, indx uint32) error {
	var ctx = Background()
	key := m.chunkKey(inode, indx)
	var todel []*slice
	var rs []*redis.IntCmd
	err := m.txn(ctx, func(tx *redis.Tx) error {
		todel = todel[:0]
		rs = rs[:0]
		vals, err := tx.LRange(ctx, key, 0, -1).Result()
		if err != nil || len(vals) == 0 {
			return err
		}
		slices := readSlices(vals)
		if slices == nil {
			logger.Errorf("Corrupt value for inode %d chunk index %d, use `gc` to clean up leaked slices", inode, indx)
		}
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Del(ctx, key)
			for _, s := range slices {
				if s.id > 0 {
					todel = append(todel, s)
					rs = append(rs, pipe.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.id, s.size), -1))
				}
			}
			return nil
		})
		return err
	}, key)
	if err != nil {
		return fmt.Errorf("delete slice from chunk %s fail: %s, retry later", key, err)
	}
	for i, s := range todel {
		if rs[i].Val() < 0 {
			m.deleteSlice(s.id, s.size)
		}
	}
	return nil
}

func (m *redisMeta) doDeleteFileData(inode Ino, length uint64) {
	m.doDeleteFileData_(inode, length, "")
}

func (m *redisMeta) doDeleteFileData_(inode Ino, length uint64, tracking string) {
	var ctx = Background()
	var indx uint32
	p := m.rdb.Pipeline()
	for uint64(indx)*ChunkSize < length {
		var keys []string
		for i := 0; uint64(indx)*ChunkSize < length && i < 1000; i++ {
			key := m.chunkKey(inode, indx)
			keys = append(keys, key)
			_ = p.LLen(ctx, key)
			indx++
		}
		cmds, err := p.Exec(ctx)
		if err != nil {
			logger.Warnf("delete chunks of inode %d: %s", inode, err)
			return
		}
		for i, cmd := range cmds {
			val, err := cmd.(*redis.IntCmd).Result()
			if err == redis.Nil || val == 0 {
				continue
			}
			idx, _ := strconv.Atoi(strings.Split(keys[i][len(m.prefix):], "_")[1])
			err = m.deleteChunk(inode, uint32(idx))
			if err != nil {
				logger.Warnf("delete chunk %s: %s", keys[i], err)
				return
			}
		}
	}
	if tracking == "" {
		tracking = inode.String() + ":" + strconv.FormatInt(int64(length), 10)
	}
	_ = m.rdb.ZRem(ctx, m.delfiles(), tracking)
}

func (r *redisMeta) doCleanupDelayedSlices(ctx Context, edge int64) (int, error) {
	var count int
	var ss []Slice
	var rs []*redis.IntCmd
	err := r.hscan(ctx, r.delSlices(), func(keys []string) error {
		for i := 0; i < len(keys); i += 2 {
			if ctx.Canceled() {
				return ctx.Err()
			}
			key := keys[i]
			ps := strings.Split(key, "_")
			if len(ps) != 2 {
				logger.Warnf("Invalid key %s", key)
				continue
			}
			if ts, e := strconv.ParseUint(ps[1], 10, 64); e != nil {
				logger.Warnf("Invalid key %s", key)
				continue
			} else if ts >= uint64(edge) {
				continue
			}

			if err := r.txn(ctx, func(tx *redis.Tx) error {
				ss, rs = ss[:0], rs[:0]
				val, e := tx.HGet(ctx, r.delSlices(), key).Result()
				if e == redis.Nil {
					return nil
				} else if e != nil {
					return e
				}
				buf := []byte(val)
				r.decodeDelayedSlices(buf, &ss)
				if len(ss) == 0 {
					return fmt.Errorf("invalid value for delSlices %s: %v", key, buf)
				}
				_, e = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
					for _, s := range ss {
						rs = append(rs, pipe.HIncrBy(ctx, r.sliceRefs(), r.sliceKey(s.Id, s.Size), -1))
					}
					pipe.HDel(ctx, r.delSlices(), key)
					return nil
				})
				return e
			}, r.delSlices()); err != nil {
				logger.Warnf("Cleanup delSlices %s: %s", key, err)
				continue
			}
			for i, s := range ss {
				if rs[i].Err() == nil && rs[i].Val() < 0 {
					r.deleteSlice(s.Id, s.Size)
					count++
				}
				if ctx.Canceled() {
					return ctx.Err()
				}
			}
		}
		return nil
	})
	return count, err
}

func (m *redisMeta) doCompactChunk(inode Ino, indx uint32, origin []byte, ss []*slice, skipped int, pos uint32, id uint64, size uint32, delayed []byte) syscall.Errno {
	var rs []*redis.IntCmd // trash disabled: check reference of slices
	if delayed == nil {
		rs = make([]*redis.IntCmd, len(ss))
	}
	key := m.chunkKey(inode, indx)
	ctx := Background()
	st := errno(m.txn(ctx, func(tx *redis.Tx) error {
		n := len(origin) / sliceBytes
		vals2, err := tx.LRange(ctx, key, 0, int64(n-1)).Result()
		if err != nil {
			return err
		}
		if len(vals2) != n {
			return syscall.EINVAL
		}
		for i, val := range vals2 {
			if val != string(origin[i*sliceBytes:(i+1)*sliceBytes]) {
				return syscall.EINVAL
			}
		}

		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.LTrim(ctx, key, int64(n), -1)
			pipe.LPush(ctx, key, marshalSlice(pos, id, size, 0, size))
			for i := skipped; i > 0; i-- {
				pipe.LPush(ctx, key, origin[(i-1)*sliceBytes:i*sliceBytes])
			}
			pipe.HSet(ctx, m.sliceRefs(), m.sliceKey(id, size), "0") // create the key to tracking it
			if delayed != nil {
				if len(delayed) > 0 {
					pipe.HSet(ctx, m.delSlices(), fmt.Sprintf("%d_%d", id, time.Now().Unix()), delayed)
				}
			} else {
				for i, s := range ss {
					if s.id > 0 {
						rs[i] = pipe.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.id, s.size), -1)
					}
				}
			}
			return nil
		})
		return err
	}, key))
	// there could be false-negative that the compaction is successful, double-check
	if st != 0 && st != syscall.EINVAL {
		if e := m.rdb.HGet(ctx, m.sliceRefs(), m.sliceKey(id, size)).Err(); e == nil {
			st = 0 // successful
		} else if e == redis.Nil {
			logger.Infof("compacted chunk %d was not used", id)
			st = syscall.EINVAL // failed
		}
	}

	if st == syscall.EINVAL {
		m.rdb.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(id, size), -1)
	} else if st == 0 {
		m.cleanupZeroRef(m.sliceKey(id, size))
		if delayed == nil {
			for i, s := range ss {
				if s.id > 0 && rs[i].Err() == nil && rs[i].Val() < 0 {
					m.deleteSlice(s.id, s.size)
				}
			}
		}
	}
	return st
}

func (m *redisMeta) scanAllChunks(ctx Context, ch chan<- cchunk, bar *utils.Bar) error {
	p := m.rdb.Pipeline()
	return m.scan(ctx, "c*_*", func(keys []string) error {
		for _, key := range keys {
			_ = p.LLen(ctx, key)
		}
		cmds, err := p.Exec(ctx)
		if err != nil {
			for _, c := range cmds {
				if c.Err() != nil {
					logger.Warnf("Scan chunks with command %s: %s", c.String(), c.Err())
				}
			}
			return err
		}
		for i, cmd := range cmds {
			cnt := cmd.(*redis.IntCmd).Val()
			if cnt > 1 {
				var inode uint64
				var indx uint32
				n, err := fmt.Sscanf(keys[i], m.prefix+"c%d_%d", &inode, &indx)
				if err == nil && n == 2 {
					bar.IncrTotal(1)
					ch <- cchunk{Ino(inode), indx, int(cnt)}
				}
			}
		}
		return nil
	})
}

func (m *redisMeta) cleanupLeakedInodes(delete bool) {
	var ctx = Background()
	var foundInodes = make(map[Ino]struct{})
	foundInodes[RootInode] = struct{}{}
	foundInodes[TrashInode] = struct{}{}
	cutoff := time.Now().Add(time.Hour * -1)
	prefix := len(m.prefix)

	_ = m.scan(ctx, "d[0-9]*", func(keys []string) error {
		for _, key := range keys {
			ino, _ := strconv.Atoi(key[prefix+1:])
			var entries []*Entry
			eno := m.doReaddir(ctx, Ino(ino), 0, &entries, 0)
			if eno != syscall.ENOENT && eno != 0 {
				logger.Errorf("readdir %d: %s", ino, eno)
				return eno
			}
			for _, e := range entries {
				foundInodes[e.Inode] = struct{}{}
			}
		}
		return nil
	})
	_ = m.scan(ctx, "i*", func(keys []string) error {
		values, err := m.rdb.MGet(ctx, keys...).Result()
		if err != nil {
			logger.Warnf("mget inodes: %s", err)
			return nil
		}
		for i, v := range values {
			if v == nil {
				continue
			}
			var attr Attr
			m.parseAttr([]byte(v.(string)), &attr)
			ino, _ := strconv.Atoi(keys[i][prefix+1:])
			if _, ok := foundInodes[Ino(ino)]; !ok && time.Unix(attr.Ctime, 0).Before(cutoff) {
				logger.Infof("found dangling inode: %s %+v", keys[i], attr)
				if delete {
					err = m.doDeleteSustainedInode(0, Ino(ino))
					if err != nil {
						logger.Errorf("delete leaked inode %d : %s", ino, err)
					}
				}
			}
		}
		return nil
	})
}

func (m *redisMeta) scan(ctx context.Context, pattern string, f func([]string) error) error {
	var rdb *redis.Client
	if c, ok := m.rdb.(*redis.ClusterClient); ok {
		var err error
		rdb, err = c.MasterForKey(ctx, m.prefix)
		if err != nil {
			return err
		}
	} else {
		rdb = m.rdb.(*redis.Client)
	}
	var cursor uint64
	for {
		keys, c, err := rdb.Scan(ctx, cursor, m.prefix+pattern, 10000).Result()
		if err != nil {
			logger.Warnf("scan %s: %s", pattern, err)
			return err
		}
		if len(keys) > 0 {
			err = f(keys)
			if err != nil {
				return err
			}
		}
		if c == 0 {
			break
		}
		cursor = c
	}
	return nil
}

func (m *redisMeta) hscan(ctx context.Context, key string, f func([]string) error) error {
	var cursor uint64
	for {
		keys, c, err := m.rdb.HScan(ctx, key, cursor, "*", 10000).Result()
		if err != nil {
			logger.Warnf("HSCAN %s: %s", key, err)
			return err
		}
		if len(keys) > 0 {
			if err = f(keys); err != nil {
				return err
			}
		}
		if c == 0 {
			break
		}
		cursor = c
	}
	return nil
}

func (m *redisMeta) ListSlices(ctx Context, slices map[Ino][]Slice, scanPending, delete bool, showProgress func()) syscall.Errno {
	logger.Debugf("start cleanup...")
	m.cleanupLeakedInodes(delete)
	m.cleanupLeakedChunks(delete)
	m.cleanupOldSliceRefs(delete)
	if delete {
		_ = m.doCleanupSlices(ctx, nil)
	}
	logger.Debugf("start listing slices...")

	p := m.rdb.Pipeline()
	err := m.scan(ctx, "c*_*", func(keys []string) error {
		for _, key := range keys {
			_ = p.LRange(ctx, key, 0, -1)
		}
		cmds, err := p.Exec(ctx)
		if err != nil {
			for _, c := range cmds {
				if c.Err() != nil {
					logger.Warnf("List slices with command %s: %s", c.String(), c.Err())
				}
			}
			return err
		}
		for _, cmd := range cmds {
			key := cmd.(*redis.StringSliceCmd).Args()[1].(string)
			inode, _ := strconv.Atoi(strings.Split(key[len(m.prefix)+1:], "_")[0])
			vals := cmd.(*redis.StringSliceCmd).Val()
			ss := readSlices(vals)
			if ss == nil {
				logger.Errorf("Corrupt value for inode %d chunk key %s", inode, key)
				continue
			}
			for _, s := range ss {
				if s.id > 0 {
					slices[Ino(inode)] = append(slices[Ino(inode)], Slice{Id: s.id, Size: s.size})
					if showProgress != nil {
						showProgress()
					}
				}
			}
		}
		return nil
	})
	if err != nil {
		logger.Warnf("scan chunks: %s", err)
		return errno(err)
	}

	if scanPending {
		_ = m.hscan(Background(), m.sliceRefs(), func(keys []string) error {
			for i := 0; i < len(keys); i += 2 {
				key, val := keys[i], keys[i+1]
				if strings.HasPrefix(val, "-") { // < 0
					ps := strings.Split(key, "_")
					if len(ps) == 2 {
						id, _ := strconv.ParseUint(ps[0][1:], 10, 64)
						size, _ := strconv.ParseUint(ps[1], 10, 32)
						if id > 0 && size > 0 {
							slices[0] = append(slices[0], Slice{Id: id, Size: uint32(size)})
						}
					}
				}
			}
			return nil
		})
	}

	if m.getFormat().TrashDays == 0 {
		return 0
	}
	return errno(m.scanTrashSlices(ctx, func(ss []Slice, _ int64) (bool, error) {
		slices[1] = append(slices[1], ss...)
		if showProgress != nil {
			for range ss {
				showProgress()
			}
		}
		return false, nil
	}))
}

func (m *redisMeta) scanTrashSlices(ctx Context, scan trashSliceScan) error {
	if scan == nil {
		return nil
	}

	delKeys := make(chan string, 1000)
	c, cancel := context.WithCancel(ctx)
	defer cancel()
	go func() {
		_ = m.hscan(c, m.delSlices(), func(keys []string) error {
			for i := 0; i < len(keys); i += 2 {
				delKeys <- keys[i]
			}
			return nil
		})
		close(delKeys)
	}()

	var ss []Slice
	var rs []*redis.IntCmd
	for key := range delKeys {
		var clean bool
		task := func(tx *redis.Tx) error {
			ss = ss[:0]
			rs = rs[:0]
			val, err := tx.HGet(ctx, m.delSlices(), key).Result()
			if err == redis.Nil {
				return nil
			} else if err != nil {
				return err
			}
			ps := strings.Split(key, "_")
			if len(ps) != 2 {
				return fmt.Errorf("invalid key %s", key)
			}
			ts, err := strconv.ParseInt(ps[1], 10, 64)
			if err != nil {
				return fmt.Errorf("invalid key %s, fail to parse timestamp", key)
			}

			m.decodeDelayedSlices([]byte(val), &ss)
			clean, err = scan(ss, ts)
			if err != nil {
				return err
			}
			if clean {
				_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
					for _, s := range ss {
						rs = append(rs, pipe.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.Id, s.Size), -1))
					}
					pipe.HDel(ctx, m.delSlices(), key)
					return nil
				})
			}
			return err
		}
		err := m.txn(ctx, task, m.delSlices())
		if err != nil {
			return err
		}
		if clean && len(rs) == len(ss) {
			for i, s := range ss {
				if rs[i].Err() == nil && rs[i].Val() < 0 {
					m.deleteSlice(s.Id, s.Size)
				}
			}
		}
	}

	return nil
}

func (m *redisMeta) scanPendingSlices(ctx Context, scan pendingSliceScan) error {
	if scan == nil {
		return nil
	}

	pendingKeys := make(chan string, 1000)
	c, cancel := context.WithCancel(ctx)
	defer cancel()
	go func() {
		_ = m.hscan(c, m.sliceRefs(), func(keys []string) error {
			for i := 0; i < len(keys); i += 2 {
				val := keys[i+1]
				refs, err := strconv.ParseInt(val, 10, 64)
				if err != nil {
					// ignored
					logger.Warn(errors.Wrapf(err, "parse slice ref: %s", val))
					return nil
				}
				if refs < 0 {
					pendingKeys <- keys[i]
				}
			}
			return nil
		})
		close(pendingKeys)
	}()

	for key := range pendingKeys {
		ps := strings.Split(key[1:], "_")
		if len(ps) != 2 {
			return fmt.Errorf("invalid key %s", key)
		}
		id, err := strconv.ParseUint(ps[0], 10, 64)
		if err != nil {
			return errors.Wrapf(err, "invalid key %s, fail to parse id", key)
		}
		size, err := strconv.ParseUint(ps[1], 10, 64)
		if err != nil {
			return errors.Wrapf(err, "invalid key %s, fail to parse size", key)
		}
		clean, err := scan(id, uint32(size))
		if err != nil {
			return errors.Wrap(err, "scan pending slices")
		}
		if clean {
			// TODO: m.deleteSlice(id, uint32(size))
			// avoid lint warning
			_ = clean
		}
	}
	return nil
}

func (m *redisMeta) scanPendingFiles(ctx Context, scan pendingFileScan) error {
	if scan == nil {
		return nil
	}

	visited := make(map[Ino]bool)
	start := int64(0)
	const batchSize = 1000

	for {
		pairs, err := m.rdb.ZRangeWithScores(Background(), m.delfiles(), start, start+batchSize).Result()
		if err != nil {
			return err
		}

		for _, p := range pairs {
			v := p.Member.(string)
			ps := strings.Split(v, ":")
			if len(ps) != 2 { // will be cleaned up as legacy
				continue
			}
			inode, _ := strconv.ParseUint(ps[0], 10, 64)
			if visited[Ino(inode)] {
				continue
			}
			visited[Ino(inode)] = true
			size, _ := strconv.ParseUint(ps[1], 10, 64)
			if _, err := scan(Ino(inode), size, int64(p.Score)); err != nil {
				return err
			}
		}

		start += batchSize
		if len(pairs) < batchSize {
			break
		}
	}

	return nil
}

func (m *redisMeta) doRepair(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		attr.Nlink = 2
		vals, err := tx.HGetAll(ctx, m.entryKey(inode)).Result()
		if err != nil {
			return err
		}
		for _, v := range vals {
			typ, _ := m.parseEntry([]byte(v))
			if typ == TypeDirectory {
				attr.Nlink++
			}
		}
		_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
			return nil
		})
		return err
	}, m.inodeKey(inode), m.entryKey(inode)))
}

func (m *redisMeta) GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno {
	defer m.timeit("GetXattr", time.Now())
	inode = m.checkRoot(inode)
	var err error
	*vbuff, err = m.rdb.HGet(ctx, m.xattrKey(inode), name).Bytes()
	if err == redis.Nil {
		err = ENOATTR
	}
	return errno(err)
}

func (m *redisMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno {
	defer m.timeit("ListXattr", time.Now())
	inode = m.checkRoot(inode)
	vals, err := m.rdb.HKeys(ctx, m.xattrKey(inode)).Result()
	if err != nil {
		return errno(err)
	}
	*names = nil
	for _, name := range vals {
		*names = append(*names, []byte(name)...)
		*names = append(*names, 0)
	}

	val, err := m.rdb.Get(ctx, m.inodeKey(inode)).Bytes()
	if err != nil {
		return errno(err)
	}
	attr := &Attr{}
	m.parseAttr(val, attr)
	setXAttrACL(names, attr.AccessACL, attr.DefaultACL)
	return 0
}

func (m *redisMeta) doSetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno {
	key := m.xattrKey(inode)
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		switch flags {
		case XattrCreate:
			ok, err := tx.HSetNX(ctx, key, name, value).Result()
			if err != nil {
				return err
			}
			if !ok {
				return syscall.EEXIST
			}
			return nil
		case XattrReplace:
			if ok, err := tx.HExists(ctx, key, name).Result(); err != nil {
				return err
			} else if !ok {
				return ENOATTR
			}
			_, err := tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				pipe.HSet(ctx, key, name, value)
				return nil
			})
			return err
		default: // XattrCreateOrReplace
			_, err := tx.HSet(ctx, key, name, value).Result()
			return err
		}
	}, key))
}

func (m *redisMeta) doRemoveXattr(ctx Context, inode Ino, name string) syscall.Errno {
	n, err := m.rdb.HDel(ctx, m.xattrKey(inode), name).Result()
	if err != nil {
		return errno(err)
	} else if n == 0 {
		return ENOATTR
	} else {
		return 0
	}
}

type quotaKeys struct {
	quotaKey      string
	usedSpaceKey  string
	usedInodesKey string
}

func (m *redisMeta) getQuotaKeys(qtype uint32) (*quotaKeys, error) {
	switch qtype {
	case DirQuotaType:
		return &quotaKeys{
			quotaKey:      m.dirQuotaKey(),
			usedSpaceKey:  m.dirQuotaUsedSpaceKey(),
			usedInodesKey: m.dirQuotaUsedInodesKey(),
		}, nil
	case UserQuotaType:
		return &quotaKeys{
			quotaKey:      m.userQuotaKey(),
			usedSpaceKey:  m.userQuotaUsedSpaceKey(),
			usedInodesKey: m.userQuotaUsedInodesKey(),
		}, nil
	case GroupQuotaType:
		return &quotaKeys{
			quotaKey:      m.groupQuotaKey(),
			usedSpaceKey:  m.groupQuotaUsedSpaceKey(),
			usedInodesKey: m.groupQuotaUsedInodesKey(),
		}, nil
	default:
		return nil, fmt.Errorf("unknown quota type: %d", qtype)
	}
}

func (m *redisMeta) doGetQuota(ctx Context, qtype uint32, key uint64) (*Quota, error) {
	config, err := m.getQuotaKeys(qtype)
	if err != nil {
		return nil, err
	}

	field := strconv.FormatUint(key, 10)
	cmds, err := m.rdb.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
		pipe.HGet(ctx, config.quotaKey, field)
		pipe.HGet(ctx, config.usedSpaceKey, field)
		pipe.HGet(ctx, config.usedInodesKey, field)
		return nil
	})
	if err == redis.Nil {
		return nil, nil
	} else if err != nil {
		return nil, err
	}

	buf, _ := cmds[0].(*redis.StringCmd).Bytes()
	if len(buf) != 16 {
		return nil, fmt.Errorf("invalid quota value: %v", buf)
	}

	var quota Quota
	quota.MaxSpace, quota.MaxInodes = m.parseQuota(buf)
	if quota.UsedSpace, err = cmds[1].(*redis.StringCmd).Int64(); err != nil {
		return nil, err
	}
	if quota.UsedInodes, err = cmds[2].(*redis.StringCmd).Int64(); err != nil {
		return nil, err
	}
	return &quota, nil
}

func (m *redisMeta) doSetQuota(ctx Context, qtype uint32, key uint64, quota *Quota) (bool, error) {
	config, err := m.getQuotaKeys(qtype)
	if err != nil {
		return false, err
	}

	var created bool
	err = m.txn(ctx, func(tx *redis.Tx) error {
		origin := &Quota{MaxSpace: -1, MaxInodes: -1}
		field := strconv.FormatUint(key, 10)

		buf, e := tx.HGet(ctx, config.quotaKey, field).Bytes()
		if e == nil {
			created = false
			origin.MaxSpace, origin.MaxInodes = m.parseQuota(buf)
		} else if e == redis.Nil {
			created = true
		} else {
			return e
		}

		if quota.MaxSpace >= 0 {
			origin.MaxSpace = quota.MaxSpace
		}
		if quota.MaxInodes >= 0 {
			origin.MaxInodes = quota.MaxInodes
		}

		_, e = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.HSet(ctx, config.quotaKey, field, m.packQuota(origin.MaxSpace, origin.MaxInodes))
			if quota.UsedSpace >= 0 {
				pipe.HSet(ctx, config.usedSpaceKey, field, quota.UsedSpace)
			} else if created {
				pipe.HSet(ctx, config.usedSpaceKey, field, 0)
			}
			if quota.UsedInodes >= 0 {
				pipe.HSet(ctx, config.usedInodesKey, field, quota.UsedInodes)
			} else if created {
				pipe.HSet(ctx, config.usedInodesKey, field, 0)
			}
			return nil
		})
		return e
	}, m.inodeKey(Ino(key)))
	return created, err
}

func (m *redisMeta) doDelQuota(ctx Context, qtype uint32, key uint64) error {
	config, err := m.getQuotaKeys(qtype)
	if err != nil {
		return err
	}

	field := strconv.FormatUint(key, 10)
	_, err = m.rdb.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
		if qtype == UserQuotaType || qtype == GroupQuotaType {
			quotaData := m.packQuota(-1, -1) // -1 means unlimited
			pipe.HSet(ctx, config.quotaKey, field, quotaData)
		} else {
			pipe.HDel(ctx, config.quotaKey, field)
			pipe.HDel(ctx, config.usedSpaceKey, field)
			pipe.HDel(ctx, config.usedInodesKey, field)
		}
		return nil
	})
	return err
}

func (m *redisMeta) doLoadQuotas(ctx Context) (map[uint64]*Quota, map[uint64]*Quota, map[uint64]*Quota, error) {
	quotaTypes := []struct {
		qtype uint32
		name  string
	}{
		{DirQuotaType, "dir"},
		{UserQuotaType, "user"},
		{GroupQuotaType, "group"},
	}

	quotaMaps := make([]map[uint64]*Quota, 3)
	for i, qt := range quotaTypes {
		config, err := m.getQuotaKeys(qt.qtype)
		if err != nil {
			return nil, nil, nil, fmt.Errorf("failed to load %s quotas: %w", qt.name, err)
		}

		quotas := make(map[uint64]*Quota)
		if err := m.hscan(ctx, config.usedInodesKey, func(keys []string) error {
			for i := 0; i < len(keys); i += 2 {
				key := keys[i]
				id, err := strconv.ParseUint(key, 10, 64)
				if err != nil {
					logger.Errorf("invalid key in %s: %s", qt.name, key)
					continue
				}
				usedInodes, err := strconv.ParseInt(keys[i+1], 10, 64)
				if err != nil {
					logger.Errorf("invalid usedInodes for %s %s: %s", qt.name, key, keys[i+1])
					continue
				}

				usedSpace, err := m.rdb.HGet(ctx, config.usedSpaceKey, key).Int64()
				if err != nil && err != redis.Nil {
					return err
				}

				var maxSpace, maxInodes int64 = -1, -1
				if buf, err := m.rdb.HGet(ctx, config.quotaKey, key).Bytes(); err == nil {
					if len(buf) != 16 {
						logger.Errorf("invalid quota value for %s %s: len=%d", qt.name, key, len(buf))
						continue
					}
					maxSpace, maxInodes = m.parseQuota(buf)
				} else if err != redis.Nil {
					return err
				}

				quotas[id] = &Quota{
					MaxSpace:   maxSpace,
					MaxInodes:  maxInodes,
					UsedSpace:  usedSpace,
					UsedInodes: usedInodes,
				}
			}
			return nil
		}); err != nil {
			return nil, nil, nil, err
		}
		quotaMaps[i] = quotas
	}

	return quotaMaps[0], quotaMaps[1], quotaMaps[2], nil
}

func (m *redisMeta) doFlushQuotas(ctx Context, quotas []*iQuota) error {
	_, err := m.rdb.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
		for _, q := range quotas {
			config, err := m.getQuotaKeys(q.qtype)
			if err != nil {
				return err
			}

			key := strconv.FormatUint(q.qkey, 10)
			pipe.HSetNX(ctx, config.quotaKey, key, m.packQuota(-1, -1))
			pipe.HIncrBy(ctx, config.usedSpaceKey, key, q.quota.newSpace)
			pipe.HIncrBy(ctx, config.usedInodesKey, key, q.quota.newInodes)
		}
		return nil
	})
	return err
}

func (m *redisMeta) checkServerConfig() {
	rawInfo, err := m.rdb.Info(Background()).Result()
	if err != nil {
		logger.Warnf("parse info: %s", err)
		return
	}
	rInfo, err := checkRedisInfo(rawInfo)
	if err != nil {
		logger.Warnf("parse info: %s", err)
	}
	if rInfo.storageProvider == "" && rInfo.maxMemoryPolicy != "" && rInfo.maxMemoryPolicy != "noeviction" {
		logger.Warnf("maxmemory_policy is %q,  we will try to reconfigure it to 'noeviction'.", rInfo.maxMemoryPolicy)
		if _, err := m.rdb.ConfigSet(Background(), "maxmemory-policy", "noeviction").Result(); err != nil {
			logger.Errorf("try to reconfigure maxmemory-policy to 'noeviction' failed: %s", err)
		} else if result, err := m.rdb.ConfigGet(Background(), "maxmemory-policy").Result(); err != nil {
			logger.Warnf("get config maxmemory-policy failed: %s", err)
		} else if len(result) == 1 && result["maxmemory-policy"] != "noeviction" {
			logger.Warnf("reconfigured maxmemory-policy to 'noeviction', but it's still %s", result["maxmemory-policy"])
		} else {
			logger.Infof("set maxmemory-policy to 'noeviction' successfully")
		}
	}
	start := time.Now()
	_, err = m.rdb.Ping(Background()).Result()
	if err != nil {
		logger.Errorf("Ping redis: %s", err.Error())
		return
	}
	logger.Infof("Ping redis latency: %s", time.Since(start))
}

func (m *redisMeta) dumpEntries(es ...*DumpedEntry) error {
	ctx := Background()
	var keys []string
	for _, e := range es {
		keys = append(keys, m.inodeKey(e.Attr.Inode))
	}
	return m.txn(ctx, func(tx *redis.Tx) error {
		p := tx.Pipeline()
		var ar = make([]*redis.StringCmd, len(es))
		var xr = make([]*redis.MapStringStringCmd, len(es))
		var sr = make([]*redis.StringCmd, len(es))
		var cr = make([]*redis.StringSliceCmd, len(es))
		var dr = make([]*redis.ScanCmd, len(es))
		for i, e := range es {
			inode := e.Attr.Inode
			ar[i] = p.Get(ctx, m.inodeKey(inode))
			xr[i] = p.HGetAll(ctx, m.xattrKey(inode))
			switch e.Attr.Type {
			case "regular":
				cr[i] = p.LRange(ctx, m.chunkKey(inode, 0), 0, -1)
			case "directory":
				dr[i] = p.HScan(ctx, m.entryKey(inode), 0, "*", 1000)
			case "symlink":
				sr[i] = p.Get(ctx, m.symKey(inode))
			}
		}
		if _, err := p.Exec(ctx); err != nil && err != redis.Nil {
			return err
		}

		type lchunk struct {
			inode Ino
			indx  uint32
			i     uint32
		}
		var lcs []*lchunk
		for i, e := range es {
			inode := e.Attr.Inode
			typ := typeFromString(e.Attr.Type)
			a, err := ar[i].Bytes()
			if err != nil {
				if err != redis.Nil {
					return err
				}
				if inode != TrashInode {
					logger.Warnf("Corrupt inode: %d, missing attribute", inode)
				}
			}

			var attr Attr
			attr.Typ = typ
			attr.Nlink = 1
			m.parseAttr(a, &attr)
			if attr.Typ != typ {
				e.Attr.Type = typeToString(attr.Typ)
				return redis.TxFailedErr // retry
			}
			if err == redis.Nil && attr.Typ == TypeDirectory {
				attr.Nlink = 2
			}
			dumpAttr(&attr, e.Attr)

			keys, err := xr[i].Result()
			if err != nil {
				return err
			}
			if len(keys) > 0 {
				xattrs := make([]*DumpedXattr, 0, len(keys))
				for k, v := range keys {
					xattrs = append(xattrs, &DumpedXattr{k, v})
				}
				sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name })
				e.Xattrs = xattrs
			}

			accessACl, err := m.getACL(ctx, tx, attr.AccessACL)
			if err != nil {
				return err
			}
			e.AccessACL = dumpACL(accessACl)
			defaultACL, err := m.getACL(ctx, tx, attr.DefaultACL)
			if err != nil {
				return err
			}
			e.DefaultACL = dumpACL(defaultACL)

			switch typ {
			case TypeFile:
				e.Chunks = e.Chunks[:0]
				if attr.Length > 0 {
					vals, err := cr[i].Result()
					if err != nil {
						return err
					}
					if len(vals) > 0 {
						ss := readSlices(vals)
						if ss == nil {
							logger.Errorf("Corrupt value for inode %d chunk index %d", inode, 0)
						}
						slices := make([]*DumpedSlice, 0, len(ss))
						for _, s := range ss {
							slices = append(slices, &DumpedSlice{Id: s.id, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len})
						}
						e.Chunks = append(e.Chunks, &DumpedChunk{0, slices})
					}
				}
				if attr.Length > ChunkSize {
					for indx := uint32(1); uint64(indx)*ChunkSize < attr.Length; indx++ {
						lcs = append(lcs, &lchunk{inode, indx, uint32(i)})
					}
				}
			case TypeDirectory:
				keys, cursor, err := dr[i].Result()
				if err != nil {
					return err
				}
				if cursor == 0 {
					e.Entries = make(map[string]*DumpedEntry)
					for i := 0; i < len(keys); i += 2 {
						name := keys[i]
						t, inode := m.parseEntry([]byte(keys[i+1]))
						ce := entryPool.Get()
						ce.Name = name
						ce.Attr.Inode = inode
						ce.Attr.Type = typeToString(t)
						e.Entries[name] = ce
					}
				}
			case TypeSymlink:
				if e.Symlink, err = sr[i].Result(); err != nil {
					if err != redis.Nil {
						return err
					}
					logger.Warnf("The symlink of inode %d is not found", inode)
				}
			}
		}

		cr = make([]*redis.StringSliceCmd, len(es)*3)
		for len(lcs) > 0 {
			if len(cr) > len(lcs) {
				cr = cr[:len(lcs)]
			}
			for i := range cr {
				c := lcs[i]
				cr[i] = p.LRange(ctx, m.chunkKey(c.inode, c.indx), 0, -1)
			}
			if _, err := p.Exec(ctx); err != nil {
				return err
			}
			for i := range cr {
				vals, err := cr[i].Result()
				if err != nil {
					return err
				}
				if len(vals) > 0 {
					e := es[lcs[i].i]
					ss := readSlices(vals)
					if ss == nil {
						logger.Errorf("Corrupt value for inode %d chunk index %d", e.Attr.Inode, lcs[i].indx)
					}
					slices := make([]*DumpedSlice, 0, len(ss))
					for _, s := range ss {
						slices = append(slices, &DumpedSlice{Id: s.id, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len})
					}
					e.Chunks = append(e.Chunks, &DumpedChunk{lcs[i].indx, slices})
				}
			}
			lcs = lcs[len(cr):]
		}
		return nil
	}, keys...)
}

func (m *redisMeta) dumpDir(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth, threads int, showProgress func(totalIncr, currentIncr int64)) error {
	bwWrite := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}

	if tree.Entries == nil {
		tree.Entries = make(map[string]*DumpedEntry)
		err := m.hscan(Background(), m.entryKey(inode), func(keys []string) error {
			for i := 0; i < len(keys); i += 2 {
				name := keys[i]
				t, inode := m.parseEntry([]byte(keys[i+1]))
				e := entryPool.Get()
				e.Name = name
				e.Attr.Inode = inode
				e.Attr.Type = typeToString(t)
				tree.Entries[name] = e
			}
			return nil
		})
		if err != nil {
			return err
		}
	}

	var err error
	if err = tree.writeJsonWithOutEntry(bw, depth); err != nil {
		return err
	}
	entries := make([]*DumpedEntry, 0, len(tree.Entries))
	for _, e := range tree.Entries {
		entries = append(entries, e)
	}
	sort.Slice(entries, func(i, j int) bool { return entries[i].Name < entries[j].Name })
	if showProgress != nil {
		showProgress(int64(len(entries)), 0)
	}

	var batch = 100
	ms := make([]sync.Mutex, threads)
	conds := make([]*sync.Cond, threads)
	ready := make([]int, threads)
	for c := 0; c < threads; c++ {
		conds[c] = sync.NewCond(&ms[c])
		if c*batch < len(entries) {
			go func(c int) {
				for i := c * batch; i < len(entries) && err == nil; i += threads * batch {
					es := entries[i:]
					if len(es) > batch {
						es = es[:batch]
					}
					e := m.dumpEntries(es...)
					ms[c].Lock()
					ready[c] = len(es)
					if e != nil {
						err = e
					}
					conds[c].Signal()
					for ready[c] > 0 && err == nil {
						conds[c].Wait()
					}
					ms[c].Unlock()
				}
			}(c)
		}
	}
	for i, e := range entries {
		b := i / batch
		c := b % threads
		ms[c].Lock()
		for ready[c] == 0 && err == nil {
			conds[c].Wait()
		}
		ready[c]--
		if ready[c] == 0 {
			conds[c].Signal()
		}
		ms[c].Unlock()
		if err != nil {
			return err
		}
		if e.Attr.Type == "directory" {
			err = m.dumpDir(e.Attr.Inode, e, bw, depth+2, threads, showProgress)
		} else {
			err = e.writeJSON(bw, depth+2)
		}
		entries[i] = nil
		entryPool.Put(e)
		if err != nil {
			return err
		}
		if i != len(entries)-1 {
			bwWrite(",")
		}
		if showProgress != nil {
			showProgress(0, 1)
		}
	}
	bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth)))
	return nil
}

func (m *redisMeta) DumpMeta(w io.Writer, root Ino, threads int, keepSecret, fast, skipTrash bool) (err error) {
	defer func() {
		if p := recover(); p != nil {
			debug.PrintStack()
			if e, ok := p.(error); ok {
				err = e
			} else {
				err = errors.Errorf("DumpMeta error: %v", p)
			}
		}
	}()
	ctx := Background()
	zs, err := m.rdb.ZRangeWithScores(ctx, m.delfiles(), 0, -1).Result()
	if err != nil {
		return err
	}
	dels := make([]*DumpedDelFile, 0, len(zs))
	for _, z := range zs {
		parts := strings.Split(z.Member.(string), ":")
		if len(parts) != 2 {
			logger.Warnf("invalid delfile string: %s", z.Member.(string))
			continue
		}
		inode, _ := strconv.ParseUint(parts[0], 10, 64)
		length, _ := strconv.ParseUint(parts[1], 10, 64)
		dels = append(dels, &DumpedDelFile{Ino(inode), length, int64(z.Score)})
	}

	names := []string{usedSpace, totalInodes, "nextinode", "nextchunk", "nextsession", "nextTrash"}
	for i := range names {
		names[i] = m.prefix + names[i]
	}
	rs, _ := m.rdb.MGet(ctx, names...).Result()
	cs := make([]int64, len(rs))
	for i, r := range rs {
		if r != nil {
			cs[i], _ = strconv.ParseInt(r.(string), 10, 64)
		}
	}

	keys, err := m.rdb.ZRange(ctx, m.allSessions(), 0, -1).Result()
	if err != nil {
		return err
	}
	sessions := make([]*DumpedSustained, 0, len(keys))
	for _, k := range keys {
		sid, _ := strconv.ParseUint(k, 10, 64)
		var ss []string
		ss, err = m.rdb.SMembers(ctx, m.sustained(sid)).Result()
		if err != nil {
			return err
		}
		if len(ss) > 0 {
			inodes := make([]Ino, 0, len(ss))
			for _, s := range ss {
				inode, _ := strconv.ParseUint(s, 10, 64)
				inodes = append(inodes, Ino(inode))
			}
			sessions = append(sessions, &DumpedSustained{sid, inodes})
		}
	}
	quotas := make(map[Ino]*DumpedQuota)
	for k, v := range m.rdb.HGetAll(ctx, m.dirQuotaKey()).Val() {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse inode: %s: %v", k, err)
			continue
		}
		if len(v) != 16 {
			logger.Warnf("invalid quota string: %s", hex.EncodeToString([]byte(v)))
			continue
		}
		var quota DumpedQuota
		quota.MaxSpace, quota.MaxInodes = m.parseQuota([]byte(v))
		quotas[Ino(inode)] = &quota
	}

	dm := &DumpedMeta{
		Setting: *m.getFormat(),
		Counters: &DumpedCounters{
			UsedSpace:   cs[0],
			UsedInodes:  cs[1],
			NextInode:   cs[2] + 1, // Redis nextInode/nextChunk is 1 smaller than sql/tkv
			NextChunk:   cs[3] + 1,
			NextSession: cs[4],
			NextTrash:   cs[5],
		},
		Sustained: sessions,
		DelFiles:  dels,
		Quotas:    quotas,
	}
	if !keepSecret && dm.Setting.SecretKey != "" {
		dm.Setting.SecretKey = "removed"
		logger.Warnf("Secret key is removed for the sake of safety")
	}
	if !keepSecret && dm.Setting.SessionToken != "" {
		dm.Setting.SessionToken = "removed"
		logger.Warnf("Session token is removed for the sake of safety")
	}
	bw, err := dm.writeJsonWithOutTree(w)
	if err != nil {
		return err
	}
	root = m.checkRoot(root)
	progress := utils.NewProgress(false)
	bar := progress.AddCountBar("Dumped entries", 1) // with root
	useTotal := root == RootInode && !skipTrash
	if useTotal {
		bar.SetTotal(dm.Counters.UsedInodes)
	}

	showProgress := func(totalIncr, currentIncr int64) {
		if !useTotal {
			bar.IncrTotal(totalIncr)
		}
		bar.IncrInt64(currentIncr)
	}

	var tree = &DumpedEntry{
		Name: "FSTree",
		Attr: &DumpedAttr{
			Inode: root,
			Type:  typeToString(TypeDirectory),
		},
	}
	if err = m.dumpEntries(tree); err != nil {
		return err
	}
	bar.Increment()
	if err = m.dumpDir(root, tree, bw, 1, threads, showProgress); err != nil {
		return err
	}
	if root == RootInode && !skipTrash {
		trash := &DumpedEntry{
			Name: "Trash",
			Attr: &DumpedAttr{
				Inode: TrashInode,
				Type:  typeToString(TypeDirectory),
			},
		}
		if err = m.dumpEntries(trash); err != nil {
			return err
		}
		if _, err = bw.WriteString(","); err != nil {
			return err
		}
		if err = m.dumpDir(TrashInode, trash, bw, 1, threads, showProgress); err != nil {
			return err
		}
	}
	if _, err = bw.WriteString("\n}\n"); err != nil {
		return err
	}
	progress.Done()

	return bw.Flush()
}

func (m *redisMeta) loadEntry(e *DumpedEntry, p redis.Pipeliner, tryExec func(), aclMaxId *uint32) {
	ctx := Background()
	inode := e.Attr.Inode
	attr := loadAttr(e.Attr)
	attr.Parent = e.Parents[0]
	batch := 100
	if attr.Typ == TypeFile {
		attr.Length = e.Attr.Length
		for _, c := range e.Chunks {
			if len(c.Slices) == 0 {
				continue
			}
			slices := make([]string, 0, len(c.Slices))
			for _, s := range c.Slices {
				slices = append(slices, string(marshalSlice(s.Pos, s.Id, s.Size, s.Off, s.Len)))
				if len(slices) > batch {
					p.RPush(ctx, m.chunkKey(inode, c.Index), slices)
					tryExec()
					slices = slices[:0]
				}
			}
			if len(slices) > 0 {
				p.RPush(ctx, m.chunkKey(inode, c.Index), slices)
			}
		}
	} else if attr.Typ == TypeDirectory {
		attr.Length = 4 << 10
		dentries := make(map[string]interface{}, batch)
		var stat dirStat
		for name, c := range e.Entries {
			length := uint64(0)
			if typeFromString(c.Attr.Type) == TypeFile {
				length = c.Attr.Length
			}
			stat.length += int64(length)
			stat.space += align4K(length)
			stat.inodes++

			dentries[string(unescape(name))] = m.packEntry(typeFromString(c.Attr.Type), c.Attr.Inode)
			if len(dentries) >= batch {
				p.HSet(ctx, m.entryKey(inode), dentries)
				tryExec()
				dentries = make(map[string]interface{}, batch)
			}
		}
		if len(dentries) > 0 {
			p.HSet(ctx, m.entryKey(inode), dentries)
		}
		field := inode.String()
		p.HSet(ctx, m.dirDataLengthKey(), field, stat.length)
		p.HSet(ctx, m.dirUsedSpaceKey(), field, stat.space)
		p.HSet(ctx, m.dirUsedInodesKey(), field, stat.inodes)
	} else if attr.Typ == TypeSymlink {
		symL := unescape(e.Symlink)
		attr.Length = uint64(len(symL))
		p.Set(ctx, m.symKey(inode), symL, 0)
	}

	if len(e.Xattrs) > 0 {
		xattrs := make(map[string]interface{})
		for _, x := range e.Xattrs {
			xattrs[x.Name] = unescape(x.Value)
		}
		p.HSet(ctx, m.xattrKey(inode), xattrs)
	}

	attr.AccessACL = m.saveACL(loadACL(e.AccessACL), aclMaxId)
	attr.DefaultACL = m.saveACL(loadACL(e.DefaultACL), aclMaxId)

	p.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
	tryExec()
}

func (m *redisMeta) LoadMeta(r io.Reader) (err error) {
	ctx := Background()
	if _, ok := m.rdb.(*redis.ClusterClient); ok {
		err = m.scan(ctx, "*", func(keys []string) error {
			return fmt.Errorf("found key with same prefix: %s", keys[0])
		})
		if err != nil {
			return err
		}
	} else {
		dbsize, err := m.rdb.DBSize(ctx).Result()
		if err != nil {
			return err
		}
		if dbsize > 0 {
			return fmt.Errorf("Database redis://%s is not empty", m.addr)
		}
	}

	p := m.rdb.TxPipeline()
	tryExec := func() {
		if p.Len() > 1000 {
			if rs, err := p.Exec(ctx); err != nil {
				for i, r := range rs {
					if r.Err() != nil {
						logger.Errorf("failed command %d %+v: %s", i, r, r.Err())
						break
					}
				}
				panic(err)
			}
		}
	}
	defer func() {
		if e := recover(); e != nil {
			if ee, ok := e.(error); ok {
				err = ee
			} else {
				panic(e)
			}
		}
	}()

	var aclMaxId uint32
	dm, counters, parents, refs, err := loadEntries(r, func(e *DumpedEntry) { m.loadEntry(e, p, tryExec, &aclMaxId) }, nil)
	if err != nil {
		return err
	}
	m.loadDumpedQuotas(ctx, dm.Quotas)
	if err = m.loadDumpedACLs(ctx); err != nil {
		return err
	}
	format, _ := json.MarshalIndent(dm.Setting, "", "")
	p.Set(ctx, m.setting(), format, 0)
	cs := make(map[string]interface{})
	cs[m.prefix+usedSpace] = counters.UsedSpace
	cs[m.prefix+totalInodes] = counters.UsedInodes
	cs[m.prefix+"nextinode"] = counters.NextInode - 1
	cs[m.prefix+"nextchunk"] = counters.NextChunk - 1
	cs[m.prefix+"nextsession"] = counters.NextSession
	cs[m.prefix+"nextTrash"] = counters.NextTrash
	p.MSet(ctx, cs)
	if l := len(dm.DelFiles); l > 0 {
		if l > 100 {
			l = 100
		}
		zs := make([]redis.Z, 0, l)
		for _, d := range dm.DelFiles {
			if len(zs) >= 100 {
				p.ZAdd(ctx, m.delfiles(), zs...)
				tryExec()
				zs = zs[:0]
			}
			zs = append(zs, redis.Z{
				Score:  float64(d.Expire),
				Member: m.toDelete(d.Inode, d.Length),
			})
		}
		p.ZAdd(ctx, m.delfiles(), zs...)
	}
	slices := make(map[string]interface{})
	for k, v := range refs {
		if v > 1 {
			if len(slices) > 100 {
				p.HSet(ctx, m.sliceRefs(), slices)
				tryExec()
				slices = make(map[string]interface{})
			}
			slices[m.sliceKey(k.id, k.size)] = v - 1
		}
	}
	if len(slices) > 0 {
		p.HSet(ctx, m.sliceRefs(), slices)
	}
	if _, err = p.Exec(ctx); err != nil {
		return err
	}

	// update nlinks and parents for hardlinks
	st := make(map[Ino]int64)
	for i, ps := range parents {
		if len(ps) > 1 {
			a, _ := m.rdb.Get(ctx, m.inodeKey(i)).Bytes()
			// reset nlink and parent
			binary.BigEndian.PutUint32(a[47:51], uint32(len(ps))) // nlink
			binary.BigEndian.PutUint64(a[63:71], 0)
			p.Set(ctx, m.inodeKey(i), a, 0)
			for k := range st {
				delete(st, k)
			}
			for _, p := range ps {
				st[p] = st[p] + 1
			}
			for parent, c := range st {
				p.HIncrBy(ctx, m.parentKey(i), parent.String(), c)
			}
		}
	}
	_, err = p.Exec(ctx)
	return err
}

func (m *redisMeta) doCloneEntry(ctx Context, srcIno Ino, parent Ino, name string, ino Ino, originAttr *Attr, cmode uint8, cumask uint16, top bool) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		a, err := tx.Get(ctx, m.inodeKey(srcIno)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, originAttr)
		attr := *originAttr
		if eno := m.Access(ctx, srcIno, MODE_MASK_R, &attr); eno != 0 {
			return eno
		}
		attr.Parent = parent
		now := time.Now()
		if cmode&CLONE_MODE_PRESERVE_ATTR == 0 {
			attr.Uid = ctx.Uid()
			attr.Gid = ctx.Gid()
			attr.Mode &= ^cumask
			attr.Atime = now.Unix()
			attr.Mtime = now.Unix()
			attr.Ctime = now.Unix()
			attr.Atimensec = uint32(now.Nanosecond())
			attr.Mtimensec = uint32(now.Nanosecond())
			attr.Ctimensec = uint32(now.Nanosecond())
		}
		// TODO: preserve hardlink
		if attr.Typ == TypeFile && attr.Nlink > 1 {
			attr.Nlink = 1
		}
		srcXattr, err := tx.HGetAll(ctx, m.xattrKey(srcIno)).Result()
		if err != nil {
			return err
		}

		var pattr Attr
		if top {
			if a, err := tx.Get(ctx, m.inodeKey(parent)).Bytes(); err != nil {
				return err
			} else {
				m.parseAttr(a, &pattr)
			}
			if pattr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
			if (pattr.Flags & FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if exist, err := tx.HExists(ctx, m.entryKey(parent), name).Result(); err != nil {
				return err
			} else if exist {
				return syscall.EEXIST
			}
			if eno := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); eno != 0 {
				return eno
			}
		}

		_, err = tx.TxPipelined(ctx, func(p redis.Pipeliner) error {
			p.Set(ctx, m.inodeKey(ino), m.marshal(&attr), 0)
			p.IncrBy(ctx, m.usedSpaceKey(), align4K(attr.Length))
			p.Incr(ctx, m.totalInodesKey())
			if len(srcXattr) > 0 {
				p.HMSet(ctx, m.xattrKey(ino), srcXattr)
			}
			if top && attr.Typ == TypeDirectory {
				p.ZAdd(ctx, m.detachedNodes(), redis.Z{Member: ino.String(), Score: float64(time.Now().Unix())})
			} else {
				p.HSet(ctx, m.entryKey(parent), name, m.packEntry(attr.Typ, ino))
				if top {
					now := time.Now()
					pattr.Mtime = now.Unix()
					pattr.Mtimensec = uint32(now.Nanosecond())
					pattr.Ctime = now.Unix()
					pattr.Ctimensec = uint32(now.Nanosecond())
					p.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
				}
			}

			switch attr.Typ {
			case TypeDirectory:
				sfield := srcIno.String()
				field := ino.String()
				if v, err := tx.HGet(ctx, m.dirUsedInodesKey(), sfield).Result(); err == nil {
					p.HSet(ctx, m.dirUsedInodesKey(), field, v)
					p.HSet(ctx, m.dirDataLengthKey(), field, tx.HGet(ctx, m.dirDataLengthKey(), sfield).Val())
					p.HSet(ctx, m.dirUsedSpaceKey(), field, tx.HGet(ctx, m.dirUsedSpaceKey(), sfield).Val())
				}
			case TypeFile:
				// copy chunks
				if attr.Length != 0 {
					var vals [][]string
					for i := 0; i <= int(attr.Length/ChunkSize); i++ {
						val, err := tx.LRange(ctx, m.chunkKey(srcIno, uint32(i)), 0, -1).Result()
						if err != nil {
							return err
						}
						vals = append(vals, val)
					}

					for i, sv := range vals {
						if len(sv) == 0 {
							continue
						}
						ss := readSlices(sv)
						if ss == nil {
							return syscall.EIO
						}
						p.RPush(ctx, m.chunkKey(ino, uint32(i)), sv)
						for _, s := range ss {
							if s.id > 0 {
								p.HIncrBy(ctx, m.sliceRefs(), m.sliceKey(s.id, s.size), 1)
							}
						}
					}
				}
			case TypeSymlink:
				path, err := tx.Get(ctx, m.symKey(srcIno)).Result()
				if err != nil {
					return err
				}
				p.Set(ctx, m.symKey(ino), path, 0)
			}
			return nil
		})
		return err
	}, m.inodeKey(srcIno), m.xattrKey(srcIno)))
}

func (m *redisMeta) doBatchClone(ctx Context, srcParent Ino, dstParent Ino, entries []*Entry, cmode uint8, cumask uint16, result *batchCloneResult) syscall.Errno {
	// TODO: Implement batch clone for Redis backend
	return syscall.ENOTSUP
}

func (m *redisMeta) doCleanupDetachedNode(ctx Context, ino Ino) syscall.Errno {
	exists, err := m.rdb.Exists(ctx, m.inodeKey(ino)).Result()
	if err != nil || exists == 0 {
		return errno(err)
	}
	rmConcurrent := make(chan int, 10)
	if eno := m.emptyDir(ctx, ino, true, nil, rmConcurrent); eno != 0 {
		return eno
	}
	m.updateStats(-align4K(0), -1)
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		_, err := tx.TxPipelined(ctx, func(p redis.Pipeliner) error {
			p.Del(ctx, m.inodeKey(ino))
			p.Del(ctx, m.xattrKey(ino))
			p.DecrBy(ctx, m.usedSpaceKey(), align4K(0))
			p.Decr(ctx, m.totalInodesKey())
			field := ino.String()
			p.HDel(ctx, m.dirUsedInodesKey(), field)
			p.HDel(ctx, m.dirDataLengthKey(), field)
			p.HDel(ctx, m.dirUsedSpaceKey(), field)
			p.ZRem(ctx, m.detachedNodes(), field)
			return nil
		})
		return err
	}, m.inodeKey(ino), m.xattrKey(ino)))
}

func (m *redisMeta) doFindDetachedNodes(t time.Time) []Ino {
	var inodes []Ino
	vals, err := m.rdb.ZRangeByScore(Background(), m.detachedNodes(), &redis.ZRangeBy{Min: "-inf", Max: strconv.FormatInt(t.Unix(), 10)}).Result()
	if err != nil {
		logger.Errorf("Scan detached nodes error: %s", err)
		return nil
	}
	for _, node := range vals {
		inode, _ := strconv.ParseUint(node, 10, 64)
		inodes = append(inodes, Ino(inode))
	}
	return inodes
}

func (m *redisMeta) doAttachDirNode(ctx Context, parent Ino, dstIno Ino, name string) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		var pattr Attr
		a, err := tx.Get(ctx, m.inodeKey(parent)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if (pattr.Flags & FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if tx.HExists(ctx, m.entryKey(parent), name).Val() {
			return syscall.EEXIST
		}

		_, err = tx.TxPipelined(ctx, func(p redis.Pipeliner) error {
			p.HSet(ctx, m.entryKey(parent), name, m.packEntry(TypeDirectory, dstIno))
			pattr.Nlink++
			now := time.Now()
			pattr.Mtime = now.Unix()
			pattr.Mtimensec = uint32(now.Nanosecond())
			pattr.Ctime = now.Unix()
			pattr.Ctimensec = uint32(now.Nanosecond())
			p.Set(ctx, m.inodeKey(parent), m.marshal(&pattr), 0)
			p.ZRem(ctx, m.detachedNodes(), dstIno.String())
			return nil
		})
		return err
	}, m.inodeKey(parent), m.entryKey(parent)))
}

func (m *redisMeta) doTouchAtime(ctx Context, inode Ino, attr *Attr, now time.Time) (bool, error) {
	var updated bool
	err := m.txn(ctx, func(tx *redis.Tx) error {
		a, err := tx.Get(ctx, m.inodeKey(inode)).Bytes()
		if err != nil {
			return err
		}
		m.parseAttr(a, attr)
		if !m.atimeNeedsUpdate(attr, now) {
			return nil
		}
		attr.Atime = now.Unix()
		attr.Atimensec = uint32(now.Nanosecond())
		if _, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
			pipe.Set(ctx, m.inodeKey(inode), m.marshal(attr), 0)
			return nil
		}); err == nil {
			updated = true
		}
		return err
	}, m.inodeKey(inode))
	return updated, err
}

func (m *redisMeta) doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		val, err := tx.Get(ctx, m.inodeKey(ino)).Bytes()
		if err != nil {
			return err
		}
		attr := &Attr{}
		m.parseAttr(val, attr)

		if ctx.Uid() != 0 && ctx.Uid() != attr.Uid {
			return syscall.EPERM
		}

		if attr.Flags&FlagImmutable != 0 {
			return syscall.EPERM
		}

		oriACL, oriMode := getAttrACLId(attr, aclType), attr.Mode

		// https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/fs/fuse/acl.c#L143-L151
		// TODO: check linux capabilities
		if ctx.Uid() != 0 && !inGroup(ctx, attr.Gid) {
			// clear sgid
			attr.Mode &= 05777
		}

		if rule.IsEmpty() {
			// remove acl
			setAttrACLId(attr, aclType, aclAPI.None)
		} else if rule.IsMinimal() && aclType == aclAPI.TypeAccess {
			// remove acl
			setAttrACLId(attr, aclType, aclAPI.None)
			// set mode
			attr.Mode &= 07000
			attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7)
		} else {
			rule.InheritPerms(attr.Mode)
			aclId, err := m.insertACL(ctx, tx, rule)
			if err != nil {
				return err
			}
			setAttrACLId(attr, aclType, aclId)

			// set mode
			if aclType == aclAPI.TypeAccess {
				attr.Mode &= 07000
				attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Mask & 7) << 3) | (rule.Other & 7)
			}
		}

		// update attr
		if oriACL != getAttrACLId(attr, aclType) || oriMode != attr.Mode {
			now := time.Now()
			attr.Ctime = now.Unix()
			attr.Ctimensec = uint32(now.Nanosecond())
			_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				pipe.Set(ctx, m.inodeKey(ino), m.marshal(attr), 0)
				return nil
			})
			return err
		}
		return nil
	}, m.inodeKey(ino)))
}

func (m *redisMeta) doGetFacl(ctx Context, ino Ino, aclType uint8, aclId uint32, rule *aclAPI.Rule) syscall.Errno {
	if aclId == aclAPI.None {
		val, err := m.rdb.Get(ctx, m.inodeKey(ino)).Bytes()
		if err != nil {
			return errno(err)
		}
		attr := &Attr{}
		m.parseAttr(val, attr)
		m.of.Update(ino, attr)

		aclId = getAttrACLId(attr, aclType)
	}

	a, err := m.getACL(ctx, nil, aclId)
	if err != nil {
		return errno(err)
	}
	if a == nil {
		return ENOATTR
	}
	*rule = *a
	return 0
}

func (m *redisMeta) getACL(ctx Context, tx *redis.Tx, id uint32) (*aclAPI.Rule, error) {
	if id == aclAPI.None {
		return nil, nil
	}
	if cRule := m.aclCache.Get(id); cRule != nil {
		return cRule, nil
	}

	var val []byte
	var err error
	if tx != nil {
		val, err = tx.HGet(ctx, m.aclKey(), strconv.FormatUint(uint64(id), 10)).Bytes()
	} else {
		val, err = m.rdb.HGet(ctx, m.aclKey(), strconv.FormatUint(uint64(id), 10)).Bytes()
	}
	if err != nil {
		return nil, err
	}
	if val == nil {
		return nil, syscall.EIO
	}

	rule := &aclAPI.Rule{}
	rule.Decode(val)
	m.aclCache.Put(id, rule)
	return rule, nil
}

func (m *redisMeta) insertACL(ctx Context, tx *redis.Tx, rule *aclAPI.Rule) (uint32, error) {
	if rule == nil || rule.IsEmpty() {
		return aclAPI.None, nil
	}

	if err := m.tryLoadMissACLs(ctx, tx); err != nil {
		logger.Warnf("SetFacl: load miss acls error: %s", err)
	}

	// set acl
	var aclId uint32
	if aclId = m.aclCache.GetId(rule); aclId == aclAPI.None {
		// TODO failures may result in some id wastage.
		newId, err := m.incrCounter(aclCounter, 1)
		if err != nil {
			return aclAPI.None, err
		}
		aclId = uint32(newId)

		if err = tx.HSetNX(ctx, m.aclKey(), strconv.FormatUint(uint64(aclId), 10), rule.Encode()).Err(); err != nil {
			return aclAPI.None, err
		}
		m.aclCache.Put(aclId, rule)
	}
	return aclId, nil
}

func (m *redisMeta) tryLoadMissACLs(ctx Context, tx *redis.Tx) error {
	missIds := m.aclCache.GetMissIds()
	if len(missIds) > 0 {
		missKeys := make([]string, len(missIds))
		for i, id := range missIds {
			missKeys[i] = strconv.FormatUint(uint64(id), 10)
		}

		vals, err := tx.HMGet(ctx, m.aclKey(), missKeys...).Result()
		if err != nil {
			return err
		}
		for i, data := range vals {
			var rule aclAPI.Rule
			if data != nil {
				rule.Decode([]byte(data.(string)))
			}
			m.aclCache.Put(missIds[i], &rule)
		}
	}
	return nil
}

func (m *redisMeta) loadDumpedACLs(ctx Context) error {
	id2Rule := m.aclCache.GetAll()
	if len(id2Rule) == 0 {
		return nil
	}

	return m.txn(ctx, func(tx *redis.Tx) error {
		maxId := uint32(0)
		acls := make(map[string]interface{}, len(id2Rule))
		for id, rule := range id2Rule {
			if id > maxId {
				maxId = id
			}
			acls[strconv.FormatUint(uint64(id), 10)] = rule.Encode()
		}
		if err := tx.HSet(ctx, m.aclKey(), acls).Err(); err != nil {
			return err
		}
		return tx.Set(ctx, m.prefix+aclCounter, maxId, 0).Err()
	}, m.inodeKey(RootInode))
}

func (m *redisMeta) doStoreToken(ctx Context, token []byte) (id uint32, st syscall.Errno) {
	err := m.txn(ctx, func(tx *redis.Tx) error {
		newId, err := m.incrCounter(krbTokenCounter, 1)
		if err != nil {
			return err
		}
		err = tx.HSet(ctx, m.krbTokenKey(), strconv.FormatUint(uint64(newId), 10), token).Err()
		if err == nil {
			id = uint32(newId)
		}
		return err
	}, m.krbTokenKey())
	return id, errno(err)
}

func (m *redisMeta) doUpdateToken(ctx Context, id uint32, token []byte) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		exist, err := tx.HExists(ctx, m.krbTokenKey(), strconv.FormatUint(uint64(id), 10)).Result()
		if err != nil {
			return err
		}
		if !exist {
			return syscall.ENOENT
		}
		return tx.HSet(ctx, m.krbTokenKey(), strconv.FormatUint(uint64(id), 10), token).Err()
	}, m.krbTokenKey()))
}

func (m *redisMeta) doLoadToken(ctx Context, id uint32) (token []byte, st syscall.Errno) {
	err := m.txn(ctx, func(tx *redis.Tx) error {
		val, err := tx.HGet(ctx, m.krbTokenKey(), strconv.FormatUint(uint64(id), 10)).Bytes()
		if err != nil {
			return err
		}
		if val == nil {
			return syscall.ENOENT
		}
		token = val
		return nil
	}, m.krbTokenKey())
	return token, errno(err)
}

func (m *redisMeta) doDeleteTokens(ctx Context, ids []uint32) syscall.Errno {
	return errno(m.txn(ctx, func(tx *redis.Tx) error {
		strIds := make([]string, len(ids))
		for i, id := range ids {
			strIds[i] = strconv.FormatUint(uint64(id), 10)
		}
		return tx.HDel(ctx, m.krbTokenKey(), strIds...).Err()
	}, m.krbTokenKey()))
}

func (m *redisMeta) doListTokens(ctx Context) (tokens map[uint32][]byte, st syscall.Errno) {
	tokens = make(map[uint32][]byte)
	err := m.txn(ctx, func(tx *redis.Tx) error {
		vals, err := tx.HGetAll(ctx, m.krbTokenKey()).Result()
		if err != nil {
			return err
		}
		for k, v := range vals {
			id, err := strconv.ParseUint(k, 10, 32)
			if err != nil {
				logger.Errorf("parse token id: %s: %v", k, err)
				continue
			}
			tokens[uint32(id)] = []byte(v)
		}
		return nil
	}, m.krbTokenKey())
	return tokens, errno(err)
}

func (m *redisMeta) newDirHandler(inode Ino, plus bool, entries []*Entry) DirHandler {
	return &redisDirHandler{
		en:          m,
		inode:       inode,
		plus:        plus,
		initEntries: entries,
		batchNum:    DirBatchNum["redis"],
	}
}

type redisDirHandler struct {
	sync.Mutex
	inode       Ino
	plus        bool
	en          *redisMeta
	initEntries []*Entry
	entries     []*Entry
	indexes     map[string]int
	readOff     int
	batchNum    int
}

func (s *redisDirHandler) Close() {
	s.Lock()
	s.entries = nil
	s.readOff = 0
	s.Unlock()
}

func (s *redisDirHandler) Delete(name string) {
	s.Lock()
	defer s.Unlock()

	if len(s.entries) == 0 {
		return
	}

	if idx, ok := s.indexes[name]; ok && idx >= s.readOff {
		delete(s.indexes, name)
		n := len(s.entries)
		if idx < n-1 {
			// TODO: sorted
			s.entries[idx] = s.entries[n-1]
			s.indexes[string(s.entries[idx].Name)] = idx
		}
		s.entries = s.entries[:n-1]
	}
}

func (s *redisDirHandler) Insert(inode Ino, name string, attr *Attr) {
	s.Lock()
	defer s.Unlock()

	if len(s.entries) == 0 {
		return
	}

	// TODO: sorted
	s.entries = append(s.entries, &Entry{Inode: inode, Name: []byte(name), Attr: attr})
	s.indexes[name] = len(s.entries) - 1
}

func (s *redisDirHandler) List(ctx Context, offset int) ([]*Entry, syscall.Errno) {
	var prefix []*Entry
	if offset < len(s.initEntries) {
		prefix = s.initEntries[offset:]
		offset = 0
	} else {
		offset -= len(s.initEntries)
	}

	s.Lock()
	defer s.Unlock()
	if s.entries == nil {
		var entries []*Entry
		err := s.en.hscan(ctx, s.en.entryKey(s.inode), func(keys []string) error {
			newEntries := make([]Entry, len(keys)/2)
			newAttrs := make([]Attr, len(keys)/2)
			for i := 0; i < len(keys); i += 2 {
				typ, ino := s.en.parseEntry([]byte(keys[i+1]))
				if keys[i] == "" {
					logger.Errorf("Corrupt entry with empty name: inode %d parent %d", ino, s.inode)
					continue
				}
				ent := &newEntries[i/2]
				ent.Inode = ino
				ent.Name = []byte(keys[i])
				ent.Attr = &newAttrs[i/2]
				ent.Attr.Typ = typ
				entries = append(entries, ent)
			}
			return nil
		})
		if err != nil {
			return nil, errno(err)
		}

		if s.en.conf.SortDir {
			sort.Slice(entries, func(i, j int) bool {
				return string(entries[i].Name) < string(entries[j].Name)
			})
		}
		if s.plus {
			nEntries := len(entries)
			if nEntries <= s.batchNum {
				err = s.en.fillAttr(ctx, entries)
			} else {
				eg := errgroup.Group{}
				eg.SetLimit(2)
				for i := 0; i < nEntries; i += s.batchNum {
					var es []*Entry
					if i+s.batchNum > nEntries {
						es = entries[i:]

					} else {
						es = entries[i : i+s.batchNum]
					}
					eg.Go(func() error {
						return s.en.fillAttr(ctx, es)
					})
				}
				err = eg.Wait()
			}
			if err != nil {
				return nil, errno(err)
			}
		}
		s.entries = entries

		indexes := make(map[string]int, len(entries))
		for i, e := range entries {
			indexes[string(e.Name)] = i
		}
		s.indexes = indexes
	}

	size := len(s.entries) - offset
	if size > s.batchNum {
		size = s.batchNum
	}
	s.readOff = offset + size
	entries := s.entries[offset : offset+size]
	if len(prefix) > 0 {
		entries = append(prefix, entries...)
	}
	return entries, 0
}

func (s *redisDirHandler) Read(offset int) {
	s.readOff = offset - len(s.initEntries)
}


================================================
FILE: pkg/meta/redis_bak.go
================================================
//go:build !noredis
// +build !noredis

/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"encoding/hex"
	"fmt"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"

	"github.com/juicedata/juicefs/pkg/meta/pb"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
	"github.com/redis/go-redis/v9"
	"golang.org/x/sync/errgroup"
	"google.golang.org/protobuf/proto"
)

var (
	redisBatchSize = 10000
	redisPipeLimit = 1000
)

func (m *redisMeta) dump(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var dumps = []func(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error{
		m.dumpFormat,
		m.dumpCounters,
		m.dumpMix, // node, edge, chunk, symlink, xattr, parent
		m.dumpSustained,
		m.dumpDelFiles,
		m.dumpSliceRef,
		m.dumpACL,
		m.dumpQuota,
		m.dumpDirStat,
	}
	for _, f := range dumps {
		err := f(ctx, opt, ch)
		if err != nil {
			return err
		}
	}
	return nil
}

func (m *redisMeta) dumpCounters(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	counters := make([]*pb.Counter, 0, len(counterNames))
	for _, name := range counterNames {
		cnt, err := m.getCounter(name)
		if err != nil {
			return errors.Wrapf(err, "get counter %s", name)
		}
		if name == "nextInode" || name == "nextChunk" {
			cnt++ // Redis nextInode/nextChunk is one smaller than db
		}
		counters = append(counters, &pb.Counter{Key: name, Value: cnt})
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Counters: counters}})
}

func (m *redisMeta) dumpMix(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	logger.Warnf("please make sure the redis server is readonly, otherwise the dumped metadata will be inconsistent")
	pools := map[int][]*sync.Pool{
		segTypeNode:    {{New: func() interface{} { return &pb.Node{} }}},
		segTypeEdge:    {{New: func() interface{} { return &pb.Edge{} }}},
		segTypeChunk:   {{New: func() interface{} { return &pb.Chunk{} }}, {New: func() interface{} { return make([]byte, 8*sliceBytes) }}},
		segTypeSymlink: {{New: func() interface{} { return &pb.Symlink{} }}},
		segTypeXattr:   {{New: func() interface{} { return &pb.Xattr{} }}},
		segTypeParent:  {{New: func() interface{} { return &pb.Parent{} }}},
	}
	release := func(p proto.Message) {
		b := p.(*pb.Batch)
		for _, n := range b.Nodes {
			pools[segTypeNode][0].Put(n)
		}
		for _, e := range b.Edges {
			pools[segTypeEdge][0].Put(e)
		}
		for _, c := range b.Chunks {
			pools[segTypeChunk][1].Put(c.Slices) // nolint:staticcheck
			c.Slices = nil
			pools[segTypeChunk][0].Put(c)
		}
		for _, s := range b.Symlinks {
			pools[segTypeSymlink][0].Put(s)
		}
		for _, x := range b.Xattrs {
			pools[segTypeXattr][0].Put(x)
		}
		for _, p := range b.Parents {
			pools[segTypeParent][0].Put(p)
		}
	}
	char2Typ := map[byte]int{
		'i': segTypeNode,
		'd': segTypeEdge,
		'c': segTypeChunk,
		's': segTypeSymlink,
		'x': segTypeXattr,
		'p': segTypeParent,
	}
	typ2Limit := map[int]int{
		segTypeNode:    redisBatchSize,
		segTypeEdge:    redisBatchSize,
		segTypeChunk:   redisPipeLimit,
		segTypeSymlink: redisBatchSize,
		segTypeXattr:   redisPipeLimit,
		segTypeParent:  redisPipeLimit,
	}
	var typ2Keys = make(map[int][]string, len(typ2Limit))
	for typ, limit := range typ2Limit {
		typ2Keys[typ] = make([]string, 0, limit)
	}

	var sums = map[int]*atomic.Uint64{
		segTypeNode:    {},
		segTypeEdge:    {},
		segTypeChunk:   {},
		segTypeSymlink: {},
		segTypeXattr:   {},
		segTypeParent:  {},
	}
	typ2Handles := map[int]func(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error{
		segTypeNode:    m.dumpNodes,
		segTypeEdge:    m.dumpEdges,
		segTypeChunk:   m.dumpChunks,
		segTypeSymlink: m.dumpSymlinks,
		segTypeXattr:   m.dumpXattrs,
		segTypeParent:  m.dumpParents,
	}

	eg, egCtx := errgroup.WithContext(ctx)
	eg.SetLimit(opt.Threads)

	keyCh := make(chan []string, opt.Threads*2)
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		var keys []string
		for {
			select {
			case <-ctx.Done():
				return
			case keys = <-keyCh:
			}
			if keys == nil {
				break
			}
			for _, key := range keys {
				if typ, ok := char2Typ[key[len(m.prefix)]]; ok {
					typ2Keys[typ] = append(typ2Keys[typ], key)
					if len(typ2Keys[typ]) >= typ2Limit[typ] {
						iPools, sum, keys := pools[typ], sums[typ], typ2Keys[typ]
						eg.Go(func() error {
							return typ2Handles[typ](ctx, ch, keys, iPools, release, sum)
						})
						typ2Keys[typ] = make([]string, 0, typ2Limit[typ])
					}
				}
			}
		}
		for typ, keys := range typ2Keys {
			if len(keys) > 0 {
				iKeys, iTyp := keys, typ
				eg.Go(func() error {
					return typ2Handles[iTyp](ctx, ch, iKeys, pools[iTyp], release, sums[iTyp])
				})
			}
		}
	}()

	if err := m.scan(egCtx, "*", func(sKeys []string) error {
		keyCh <- sKeys
		return nil
	}); err != nil {
		ctx.Cancel()
		wg.Wait()
		_ = eg.Wait()
		return err
	}

	close(keyCh)
	wg.Wait()
	if err := eg.Wait(); err != nil {
		return err
	}

	logger.Debugf("dump result: %s", printSums(sums))
	return nil
}

func (m *redisMeta) dumpSustained(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	keys, err := m.rdb.ZRange(ctx, m.allSessions(), 0, -1).Result()
	if err != nil {
		return err
	}

	sustained := make([]*pb.Sustained, 0, len(keys))
	for _, k := range keys {
		sid, _ := strconv.ParseUint(k, 10, 64)
		var ss []string
		ss, err = m.rdb.SMembers(ctx, m.sustained(sid)).Result()
		if err != nil {
			return err
		}
		if len(ss) > 0 {
			inodes := make([]uint64, 0, len(ss))
			for _, s := range ss {
				inode, _ := strconv.ParseUint(s, 10, 64)
				inodes = append(inodes, inode)
			}
			sustained = append(sustained, &pb.Sustained{Sid: sid, Inodes: inodes})
		}
	}

	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Sustained: sustained}})
}

func (m *redisMeta) dumpDelFiles(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	zs, err := m.rdb.ZRangeWithScores(ctx, m.delfiles(), 0, -1).Result()
	if err != nil {
		return err
	}

	delFiles := make([]*pb.DelFile, 0, min(len(zs), redisBatchSize))
	for i, z := range zs {
		parts := strings.Split(z.Member.(string), ":")
		if len(parts) != 2 {
			logger.Warnf("invalid delfile string: %s", z.Member.(string))
			continue
		}
		inode, _ := strconv.ParseUint(parts[0], 10, 64)
		length, _ := strconv.ParseUint(parts[1], 10, 64)
		delFiles = append(delFiles, &pb.DelFile{Inode: inode, Length: length, Expire: int64(z.Score)})
		if len(delFiles) >= redisBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Delfiles: delFiles}}); err != nil {
				return err
			}
			delFiles = make([]*pb.DelFile, 0, min(len(zs)-i-1, redisBatchSize))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Delfiles: delFiles}})
}

func (m *redisMeta) dumpSliceRef(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	sliceRefs := make([]*pb.SliceRef, 0, 1024)
	var key string
	var val int
	var inErr error
	if err := m.hscan(ctx, m.sliceRefs(), func(keys []string) error {
		for i := 0; i < len(keys); i += 2 {
			key = keys[i]
			val, inErr = strconv.Atoi(keys[i+1])
			if inErr != nil {
				logger.Errorf("invalid value: %s", keys[i+1])
				continue
			}
			if val >= 1 {
				ps := strings.Split(key, "_")
				if len(ps) == 2 {
					id, _ := strconv.ParseUint(ps[0][1:], 10, 64)
					size, _ := strconv.ParseUint(ps[1], 10, 32)
					sr := &pb.SliceRef{Id: id, Size: uint32(size), Refs: int64(val) + 1} // Redis sliceRef is one smaller than sql
					sliceRefs = append(sliceRefs, sr)
					if len(sliceRefs) >= redisBatchSize {
						if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{SliceRefs: sliceRefs}}); err != nil {
							return err
						}
						sliceRefs = make([]*pb.SliceRef, 0, 1024)
					}
				}
			}
		}
		return nil
	}); err != nil {
		return err
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{SliceRefs: sliceRefs}})
}

func (m *redisMeta) dumpACL(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	vals, err := m.rdb.HGetAll(ctx, m.aclKey()).Result()
	if err != nil {
		return err
	}

	acls := make([]*pb.Acl, 0, len(vals))
	for k, v := range vals {
		id, _ := strconv.ParseUint(k, 10, 32)
		acls = append(acls, &pb.Acl{
			Id:   uint32(id),
			Data: []byte(v),
		})
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Acls: acls}})
}

func (m *redisMeta) dumpQuota(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	quotas := make(map[Ino]*pb.Quota)
	vals, err := m.rdb.HGetAll(ctx, m.dirQuotaKey()).Result()
	if err != nil {
		return fmt.Errorf("get dirQuotaKey err: %w", err)
	}
	for k, v := range vals {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse quota inode: %s: %v", k, err)
			continue
		}
		if len(v) != 16 {
			logger.Warnf("invalid quota string: %s", hex.EncodeToString([]byte(v)))
			continue
		}
		space, inodes := m.parseQuota([]byte(v))
		quotas[Ino(inode)] = &pb.Quota{
			Inode:     inode,
			MaxSpace:  space,
			MaxInodes: inodes,
		}
	}

	vals, err = m.rdb.HGetAll(ctx, m.dirQuotaUsedInodesKey()).Result()
	if err != nil {
		return fmt.Errorf("get dirQuotaUsedInodesKey err: %w", err)
	}
	for k, v := range vals {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse used inodes inode: %s: %v", k, err)
			continue
		}
		if q, ok := quotas[Ino(inode)]; !ok {
			logger.Warnf("quota for used inodes not found: %d", inode)
		} else {
			q.UsedInodes, _ = strconv.ParseInt(v, 10, 64)
		}
	}

	vals, err = m.rdb.HGetAll(ctx, m.dirQuotaUsedSpaceKey()).Result()
	if err != nil {
		return fmt.Errorf("get dirQuotaUsedSpaceKey err: %w", err)
	}
	for k, v := range vals {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse used space inode: %s: %v", k, err)
			continue
		}
		if q, ok := quotas[Ino(inode)]; !ok {
			logger.Warnf("quota for used space not found: %d", inode)
		} else {
			q.UsedSpace, _ = strconv.ParseInt(v, 10, 64)
		}
	}

	qs := make([]*pb.Quota, 0, len(quotas))
	for _, q := range quotas {
		qs = append(qs, q)
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Quotas: qs}})
}

func (m *redisMeta) dumpDirStat(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	stats := make(map[Ino]*pb.Stat)
	vals, err := m.rdb.HGetAll(ctx, m.dirDataLengthKey()).Result()
	if err != nil {
		return fmt.Errorf("get dirDataLengthKey err: %w", err)
	}
	for k, v := range vals {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse length stat inode: %s: %v", k, err)
			continue
		}
		length, _ := strconv.ParseInt(v, 10, 64)
		stats[Ino(inode)] = &pb.Stat{
			Inode:      inode,
			DataLength: length,
		}
	}

	vals, err = m.rdb.HGetAll(ctx, m.dirUsedInodesKey()).Result()
	if err != nil {
		return fmt.Errorf("get dirUsedInodesKey err: %w", err)
	}
	for k, v := range vals {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse inodes stat inode: %s: %v", k, err)
			continue
		}
		inodes, _ := strconv.ParseInt(v, 10, 64)
		if q, ok := stats[Ino(inode)]; !ok {
			logger.Warnf("stat for used inodes not found: %d", inode)
		} else {
			q.UsedInodes = inodes
		}
	}

	vals, err = m.rdb.HGetAll(ctx, m.dirUsedSpaceKey()).Result()
	if err != nil {
		return fmt.Errorf("get dirUsedSpaceKey err: %w", err)
	}
	for k, v := range vals {
		inode, err := strconv.ParseUint(k, 10, 64)
		if err != nil {
			logger.Warnf("parse space stat inode: %s: %v", k, err)
			continue
		}
		space, _ := strconv.ParseInt(v, 10, 64)
		if q, ok := stats[Ino(inode)]; !ok {
			logger.Warnf("stat for used space not found: %d", inode)
		} else {
			q.UsedSpace = space
		}
	}

	ss := make([]*pb.Stat, 0, min(len(stats), redisBatchSize))
	cnt := 0
	for _, s := range stats {
		cnt++
		ss = append(ss, s)
		if len(ss) >= redisBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Dirstats: ss}}); err != nil {
				return err
			}
			ss = make([]*pb.Stat, 0, min(len(stats)-cnt, redisBatchSize))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Dirstats: ss}})
}

func (m *redisMeta) dumpNodes(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error {
	vals, err := m.rdb.MGet(ctx, keys...).Result()
	if err != nil {
		return err
	}
	nodes := make([]*pb.Node, 0, len(vals))
	var inode uint64
	for idx, v := range vals {
		if v == nil {
			continue
		}
		inode, _ = strconv.ParseUint(keys[idx][len(m.prefix)+1:], 10, 64)
		node := pools[0].Get().(*pb.Node)
		node.Inode = inode
		node.Data = []byte(v.(string))
		nodes = append(nodes, node)
	}
	sum.Add(uint64(len(nodes)))
	return dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Nodes: nodes}, rel})
}

func (m *redisMeta) dumpEdges(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error {
	edges := make([]*pb.Edge, 0, redisBatchSize)
	for _, key := range keys {
		parent, _ := strconv.ParseUint(key[len(m.prefix)+1:], 10, 64)
		var pe *pb.Edge
		if err := m.hscan(ctx, m.entryKey(Ino(parent)), func(keys []string) error {
			for i := 0; i < len(keys); i += 2 {
				pe = pools[0].Get().(*pb.Edge)
				pe.Parent = parent
				pe.Name = []byte(keys[i])
				typ, ino := m.parseEntry([]byte(keys[i+1]))
				pe.Type, pe.Inode = uint32(typ), uint64(ino)
				edges = append(edges, pe)

				if len(edges) >= redisBatchSize {
					if err := dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Edges: edges}, rel}); err != nil {
						return err
					}
					sum.Add(uint64(len(edges)))
					edges = make([]*pb.Edge, 0, redisBatchSize)
				}
			}
			return nil
		}); err != nil {
			return err
		}
	}

	sum.Add(uint64(len(edges)))
	return dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Edges: edges}, rel})
}

func (m *redisMeta) dumpChunks(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error {
	pipe := m.rdb.Pipeline()
	inos := make([]uint64, 0, len(keys))
	idxs := make([]uint32, 0, len(keys))
	for _, key := range keys {
		ps := strings.Split(key, "_")
		if len(ps) != 2 {
			logger.Warnf("invalid chunk key: %s", key)
			continue
		}
		ino, _ := strconv.ParseUint(ps[0][len(m.prefix)+1:], 10, 64)
		idx, _ := strconv.ParseUint(ps[1], 10, 32)
		pipe.LRange(ctx, m.chunkKey(Ino(ino), uint32(idx)), 0, -1)
		inos = append(inos, ino)
		idxs = append(idxs, uint32(idx))
	}

	cmds, err := pipe.Exec(ctx)
	if err != nil {
		return fmt.Errorf("chunk pipeline exec err: %w", err)
	}

	chunks := make([]*pb.Chunk, 0, len(cmds))
	for k, cmd := range cmds {
		vals, err := cmd.(*redis.StringSliceCmd).Result()
		if err != nil {
			return fmt.Errorf("get chunk result err: %w", err)
		}
		if len(vals) == 0 {
			continue
		}

		pc := pools[0].Get().(*pb.Chunk)
		pc.Inode = inos[k]
		pc.Index = idxs[k]

		pc.Slices = pools[1].Get().([]byte)
		if len(pc.Slices) < len(vals)*sliceBytes {
			pc.Slices = make([]byte, len(vals)*sliceBytes)
		}
		pc.Slices = pc.Slices[:len(vals)*sliceBytes]

		for i, val := range vals {
			if len(val) != sliceBytes {
				logger.Errorf("corrupt slice: len=%d, val=%v", len(val), []byte(val))
				continue
			}
			copy(pc.Slices[i*sliceBytes:], []byte(val))
		}
		chunks = append(chunks, pc)
	}
	sum.Add(uint64(len(chunks)))
	return dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Chunks: chunks}, rel})
}

func (m *redisMeta) dumpSymlinks(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error {
	vals, err := m.rdb.MGet(ctx, keys...).Result()
	if err != nil {
		return err
	}
	syms := make([]*pb.Symlink, 0, len(vals))
	var ps *pb.Symlink
	for idx, v := range vals {
		if v == nil {
			continue
		}
		ps = pools[0].Get().(*pb.Symlink)
		ps.Inode, err = strconv.ParseUint(keys[idx][len(m.prefix)+1:], 10, 64)
		if err != nil {
			continue // key "setting"
		}
		ps.Target = unescape(v.(string))
		syms = append(syms, ps)
	}

	sum.Add(uint64(len(syms)))
	return dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Symlinks: syms}, rel})
}

func (m *redisMeta) dumpXattrs(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error {
	xattrs := make([]*pb.Xattr, 0, len(keys))
	pipe := m.rdb.Pipeline()
	for _, key := range keys {
		pipe.HGetAll(ctx, key)
	}
	cmds, err := pipe.Exec(ctx)
	if err != nil {
		return err
	}

	var xattr *pb.Xattr
	for idx, cmd := range cmds {
		inode, _ := strconv.ParseUint(keys[idx][len(m.prefix)+1:], 10, 64)
		res, err := cmd.(*redis.MapStringStringCmd).Result()
		if err != nil {
			return err
		}

		for k, v := range res {
			xattr = pools[0].Get().(*pb.Xattr)
			xattr.Inode = inode
			xattr.Name = k
			xattr.Value = []byte(v)
			xattrs = append(xattrs, xattr)
		}
	}
	sum.Add(uint64(len(xattrs)))
	return dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Xattrs: xattrs}, rel})
}

func (m *redisMeta) dumpParents(ctx context.Context, ch chan<- *dumpedResult, keys []string, pools []*sync.Pool, rel func(p proto.Message), sum *atomic.Uint64) error {
	parents := make([]*pb.Parent, 0, len(keys))
	pipe := m.rdb.Pipeline()
	for _, key := range keys {
		pipe.HGetAll(ctx, key)
	}
	cmds, err := pipe.Exec(ctx)
	if err != nil {
		return err
	}

	var pp *pb.Parent
	for idx, cmd := range cmds {
		inode, _ := strconv.ParseUint(keys[idx][len(m.prefix)+1:], 10, 64)
		res, err := cmd.(*redis.MapStringStringCmd).Result()
		if err != nil {
			return err
		}

		for k, v := range res {
			pp = pools[0].Get().(*pb.Parent)
			parent, _ := strconv.ParseUint(k, 10, 64)
			cnt, _ := strconv.ParseInt(v, 10, 64)

			pp.Inode = inode
			pp.Parent = parent
			pp.Cnt = cnt
			parents = append(parents, pp)
		}
	}
	sum.Add(uint64(len(parents)))
	return dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Parents: parents}, rel})
}

func (m *redisMeta) load(ctx Context, typ int, opt *LoadOption, val proto.Message) error {
	switch typ {
	case segTypeFormat:
		return m.loadFormat(ctx, val)
	case segTypeCounter:
		return m.loadCounters(ctx, val)
	case segTypeNode:
		return m.loadNodes(ctx, val)
	case segTypeChunk:
		return m.loadChunks(ctx, val)
	case segTypeEdge:
		return m.loadEdges(ctx, val)
	case segTypeSymlink:
		return m.loadSymlinks(ctx, val)
	case segTypeSustained:
		return m.loadSustained(ctx, val)
	case segTypeDelFile:
		return m.loadDelFiles(ctx, val)
	case segTypeSliceRef:
		return m.loadSliceRefs(ctx, val)
	case segTypeAcl:
		return m.loadAcl(ctx, val)
	case segTypeXattr:
		return m.loadXattrs(ctx, val)
	case segTypeQuota:
		return m.loadQuota(ctx, val)
	case segTypeStat:
		return m.loadDirStats(ctx, val)
	case segTypeParent:
		return m.loadParents(ctx, val)
	default:
		logger.Warnf("skip segment type %d", typ)
		return nil
	}
}

func execPipe(ctx context.Context, pipe redis.Pipeliner) error {
	if pipe.Len() == 0 {
		return nil
	}
	cmds, err := pipe.Exec(ctx)
	if err != nil {
		for i, cmd := range cmds {
			if cmd.Err() != nil {
				return fmt.Errorf("failed command %d %+v: %w", i, cmd, cmd.Err())
			}
		}
	}
	return err
}

func (m *redisMeta) loadFormat(ctx Context, msg proto.Message) error {
	return m.rdb.Set(ctx, m.setting(), msg.(*pb.Format).Data, 0).Err()
}

func (m *redisMeta) loadCounters(ctx Context, msg proto.Message) error {
	cs := make(map[string]interface{})

	for _, c := range msg.(*pb.Batch).Counters {
		if c.Key == "nextInode" || c.Key == "nextChunk" {
			cs[m.counterKey(c.Key)] = c.Value - 1
		} else {
			cs[m.counterKey(c.Key)] = c.Value
		}
	}
	return m.rdb.MSet(ctx, cs).Err()
}

func (m *redisMeta) loadNodes(ctx Context, msg proto.Message) error {
	batch := msg.(*pb.Batch)
	pipe := m.rdb.Pipeline()
	for _, pn := range batch.Nodes {
		pipe.Set(ctx, m.inodeKey(Ino(pn.Inode)), pn.Data, 0)
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadEdges(ctx Context, msg proto.Message) error {
	batch := msg.(*pb.Batch)
	pipe := m.rdb.Pipeline()
	for _, edge := range batch.Edges {
		buff := utils.NewBuffer(9)
		buff.Put8(uint8(edge.Type))
		buff.Put64(edge.Inode)
		pipe.HSet(ctx, m.entryKey(Ino(edge.Parent)), edge.Name, buff.Bytes())
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadChunks(ctx Context, msg proto.Message) error {
	batch := msg.(*pb.Batch)
	pipe := m.rdb.Pipeline()
	for _, chk := range batch.Chunks {
		slices := make([]string, 0, len(chk.Slices))
		for off := 0; off < len(chk.Slices); off += sliceBytes {
			slices = append(slices, string(chk.Slices[off:off+sliceBytes]))
		}
		pipe.RPush(ctx, m.chunkKey(Ino(chk.Inode), chk.Index), slices)

		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadSymlinks(ctx Context, msg proto.Message) error {
	syms := make(map[string]interface{}, redisBatchSize)
	for _, ps := range msg.(*pb.Batch).Symlinks {
		syms[m.symKey(Ino(ps.Inode))] = ps.Target

		if len(syms) >= redisBatchSize {
			if err := m.rdb.MSet(ctx, syms).Err(); err != nil {
				return err
			}
			for k := range syms {
				delete(syms, k)
			}
		}
	}
	if len(syms) == 0 {
		return nil
	}
	return m.rdb.MSet(ctx, syms).Err()
}

func (m *redisMeta) loadSustained(ctx Context, msg proto.Message) error {
	pipe := m.rdb.Pipeline()
	for _, ps := range msg.(*pb.Batch).Sustained {
		inodes := make([]interface{}, len(ps.Inodes))
		for i, inode := range ps.Inodes {
			inodes[i] = inode
		}
		pipe.SAdd(ctx, m.sustained(ps.Sid), inodes...)
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadDelFiles(ctx Context, msg proto.Message) error {
	batch := msg.(*pb.Batch)
	mbs := make([]redis.Z, 0, len(batch.Delfiles))
	for _, pd := range batch.Delfiles {
		mbs = append(mbs, redis.Z{
			Score:  float64(pd.Expire),
			Member: m.toDelete(Ino(pd.Inode), pd.Length),
		})
	}
	if len(mbs) == 0 {
		return nil
	}
	return m.rdb.ZAdd(ctx, m.delfiles(), mbs...).Err()
}

func (m *redisMeta) loadSliceRefs(ctx Context, msg proto.Message) error {
	slices := make(map[string]interface{})
	for _, p := range msg.(*pb.Batch).SliceRefs {
		slices[m.sliceKey(p.Id, p.Size)] = strconv.Itoa(int(p.Refs - 1))
	}
	if len(slices) == 0 {
		return nil
	}
	return m.rdb.HSet(ctx, m.sliceRefs(), slices).Err()
}

var loadLock sync.Mutex
var maxAclId uint32

func (m *redisMeta) loadAcl(ctx Context, msg proto.Message) error {
	batch := msg.(*pb.Batch)
	acls := make(map[string]interface{}, len(batch.Acls))
	for _, pa := range batch.Acls {
		loadLock.Lock()
		if pa.Id > maxAclId {
			maxAclId = pa.Id
		}
		loadLock.Unlock()
		acls[strconv.FormatUint(uint64(pa.Id), 10)] = pa.Data
	}
	if len(acls) == 0 {
		return nil
	}

	if err := m.rdb.HSet(ctx, m.aclKey(), acls).Err(); err != nil {
		return err
	}
	return m.rdb.Set(ctx, m.counterKey(aclCounter), maxAclId, 0).Err()
}

func (m *redisMeta) loadXattrs(ctx Context, msg proto.Message) error {
	pipe := m.rdb.Pipeline()
	xm := make(map[uint64]map[string]interface{}) // {inode: {name: value}}
	for _, px := range msg.(*pb.Batch).Xattrs {
		if _, ok := xm[px.Inode]; !ok {
			xm[px.Inode] = make(map[string]interface{})
		}
		xm[px.Inode][px.Name] = px.Value
	}

	for inode, xattrs := range xm {
		pipe.HSet(ctx, m.xattrKey(Ino(inode)), xattrs)
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadQuota(ctx Context, msg proto.Message) error {
	pipe := m.rdb.Pipeline()
	var inodeKey string
	for _, pq := range msg.(*pb.Batch).Quotas {
		inodeKey = Ino(pq.Inode).String()
		pipe.HSet(ctx, m.dirQuotaKey(), inodeKey, m.packQuota(pq.MaxSpace, pq.MaxInodes))
		pipe.HSet(ctx, m.dirQuotaUsedInodesKey(), inodeKey, pq.UsedInodes)
		pipe.HSet(ctx, m.dirQuotaUsedSpaceKey(), inodeKey, pq.UsedSpace)
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadDirStats(ctx Context, msg proto.Message) error {
	pipe := m.rdb.Pipeline()
	var inodeKey string
	for _, ps := range msg.(*pb.Batch).Dirstats {
		inodeKey = Ino(ps.Inode).String()
		pipe.HSet(ctx, m.dirDataLengthKey(), inodeKey, ps.DataLength)
		pipe.HSet(ctx, m.dirUsedInodesKey(), inodeKey, ps.UsedInodes)
		pipe.HSet(ctx, m.dirUsedSpaceKey(), inodeKey, ps.UsedSpace)
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) loadParents(ctx Context, msg proto.Message) error {
	pipe := m.rdb.Pipeline()
	for _, p := range msg.(*pb.Batch).Parents {
		pipe.HIncrBy(ctx, m.parentKey(Ino(p.Inode)), Ino(p.Parent).String(), p.Cnt)
		if pipe.Len() >= redisPipeLimit {
			if err := execPipe(ctx, pipe); err != nil {
				return err
			}
		}
	}
	return execPipe(ctx, pipe)
}

func (m *redisMeta) prepareLoad(ctx Context, opt *LoadOption) error {
	opt.check()
	if _, ok := m.rdb.(*redis.ClusterClient); ok {
		err := m.scan(ctx, "*", func(keys []string) error {
			return fmt.Errorf("found key with same prefix: %s", keys[0])
		})
		if err != nil {
			return err
		}
	} else {
		dbsize, err := m.rdb.DBSize(ctx).Result()
		if err != nil {
			return err
		}
		if dbsize > 0 {
			return fmt.Errorf("database redis://%s is not empty", m.addr)
		}
	}
	return nil
}


================================================
FILE: pkg/meta/redis_csc.go
================================================
//go:build !noredis
// +build !noredis

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"fmt"
	"os"
	"strconv"
	"strings"
	"time"
	"unsafe"

	"github.com/hashicorp/golang-lru/v2/expirable"
	"github.com/redis/go-redis/v9"
	"github.com/redis/go-redis/v9/push"
)

var entryMark cachedEntry

type cachedEntry struct {
	ino Ino
	Attr
}

func (e *cachedEntry) isMark() bool {
	return e.ino == 0
}

// redisCache support bcast mode client-side cache
// cache attrs and entries only, chunks are already cached in OpenCache
type redisCache struct {
	cli          *redis.Client
	prefix       string
	cap          int
	expiry       time.Duration
	preload      int
	subscription *redis.PubSub

	inodeCache *expirable.LRU[Ino, []byte]
	entryCache *expirable.LRU[string, *cachedEntry]
}

func newRedisCache(prefix string, cap int, expiry time.Duration, preload int) *redisCache {
	logger.Infof("Initializing Redis client-side cache with size %d and expiry %+v", cap, expiry)
	return &redisCache{
		prefix:     prefix,
		cap:        cap,
		expiry:     expiry,
		preload:    preload,
		inodeCache: expirable.NewLRU[Ino, []byte](cap, nil, expiry),
		entryCache: expirable.NewLRU[string, *cachedEntry](cap, nil, expiry),
	}
}

func (c *redisCache) init(cli redis.UniversalClient) error {
	ctx := context.WithValue(context.Background(), invalidConnKey{}, true)
	var err error
	if rc, ok := cli.(*redis.Client); ok {
		c.cli = rc
	} else if cc, ok := cli.(*redis.ClusterClient); ok {
		// For cluster mode, we should get the master node for our key
		if c.cli, err = cc.MasterForKey(ctx, c.prefix); err != nil {
			return err
		}
	}
	c.cli.Options().OnConnect = c.onInvalidateConnect
	// under the RESP3 protocol, "__redis__:invalidate" actually has no effect.
	// we use Pubsub channel to simplify connection management and receiving PUSH messages.
	c.subscription = c.cli.Subscribe(ctx, "__redis__:invalidate")
	_ = c.subscription.Channel()
	// handle PUSH notifications for invalidation in c.HandlePushNotification
	if err = c.cli.RegisterPushNotificationHandler("invalidate", c, true); err != nil {
		c.close()
		return err
	}
	// handle client cmd to avoid race conditions
	c.cli.AddHook(c)
	return nil
}

const (
	keyTypOther = iota
	keyTypInode
	keyTypEntry
)

func (c *redisCache) parse(key string) int {
	if strings.HasPrefix(key, c.prefix+"i") {
		return keyTypInode
	}
	if strings.HasPrefix(key, c.prefix+"d") {
		return keyTypEntry
	}
	return keyTypOther
}

func (c *redisCache) entryName(parent Ino, name string) string {
	return fmt.Sprintf("%d%d%s", parent, os.PathSeparator, name)
}

func (c *redisCache) HandlePushNotification(ctx context.Context, handlerCtx push.NotificationHandlerContext, notification []interface{}) error {
	if len(notification) != 2 || notification[0] == nil || notification[1] == nil {
		return nil
	}
	if typ, ok := notification[0].(string); !ok || typ != "invalidate" {
		return nil
	}
	iKeys := notification[1].([]interface{})
	var key string
	for _, iKey := range iKeys {
		key = iKey.(string)
		typ := c.parse(key)
		switch typ {
		case keyTypInode:
			inodeStr := key[len(c.prefix)+1:]
			inode, err := strconv.ParseUint(inodeStr, 10, 64)
			if err == nil {
				c.inodeCache.Remove(Ino(inode))
			}
		case keyTypEntry:
			parentStr := key[len(c.prefix)+1:]
			// invalidate all entries related to this directory
			prefix := fmt.Sprintf("%s%d", parentStr, os.PathSeparator)
			for _, k := range c.entryCache.Keys() {
				if strings.HasPrefix(k, prefix) {
					c.entryCache.Remove(k)
				}
			}
		}
	}
	return nil
}

func (c *redisCache) DialHook(next redis.DialHook) redis.DialHook { return nil }

var inodeMark []byte

func (c *redisCache) beforeProcess(cmd redis.Cmder, skip bool) bool {
	name, args := cmd.Name(), cmd.Args()
	var key string
	var ok bool
	if len(args) < 2 {
		return true
	}
	if key, ok = args[1].(string); !ok {
		return true
	}
	typ := c.parse(key)

	if name == "get" && typ == keyTypInode {
		num, err := strconv.ParseUint(key[len(c.prefix)+1:], 10, 64)
		if err == nil {
			inode := Ino(num)
			if data, ok := c.inodeCache.Get(inode); ok {
				if !skip && len(data) > 0 {
					rsp := cmd.(*redis.StringCmd)
					rsp.SetErr(nil)
					rsp.SetVal(bytesToString(data))
					return false
				}
			}
			c.inodeCache.AddIf(inode, inodeMark, func(oldVal []byte, exists bool) bool {
				return !exists
			})
			// request to Redis server
		}
	}
	return true
}

func (c *redisCache) afterProcess(cmd redis.Cmder) {
	name, args := cmd.Name(), cmd.Args()
	var key string
	var ok bool
	if len(args) < 2 {
		return
	}
	if key, ok = args[1].(string); !ok {
		return
	}
	typ := c.parse(key)

	switch name {
	case "get":
		if typ == keyTypInode {
			if data, err := cmd.(*redis.StringCmd).Bytes(); err == nil {
				num, err := strconv.ParseUint(key[len(c.prefix)+1:], 10, 64)
				if err != nil {
					return
				}
				_, _ = c.inodeCache.AddIf(Ino(num), data, func(oldVal []byte, exists bool) bool {
					return exists && len(oldVal) == 0
				})
			}
		}
	case "set":
		if typ == keyTypInode {
			if cmd.(*redis.StatusCmd).Err() == nil {
				if num, err := strconv.ParseUint(key[len(c.prefix)+1:], 10, 64); err == nil {
					_ = c.inodeCache.Remove(Ino(num))
				}
			}
		}
	case "hdel":
		if typ == keyTypEntry {
			if err := cmd.(*redis.IntCmd).Err(); err == nil {
				for i := 2; i < len(args); i++ {
					_ = c.entryCache.Remove(fmt.Sprintf("%s%d%s", key[len(c.prefix)+1:], os.PathSeparator, args[i]))
				}
			}
		}
	case "hset":
		if typ == keyTypEntry {
			if err := cmd.(*redis.IntCmd).Err(); err == nil {
				for i := 2; i < len(args); i += 2 {
					_ = c.entryCache.Remove(fmt.Sprintf("%s%d%s", key[len(c.prefix)+1:], os.PathSeparator, args[i]))
				}
			}
		}
	}
}

func (c *redisCache) ProcessHook(next redis.ProcessHook) redis.ProcessHook {
	return func(ctx context.Context, cmd redis.Cmder) error {
		if !c.beforeProcess(cmd, false) {
			return nil
		}
		err := next(ctx, cmd)
		c.afterProcess(cmd)
		return err
	}
}

func (c *redisCache) ProcessPipelineHook(next redis.ProcessPipelineHook) redis.ProcessPipelineHook {
	return func(ctx context.Context, cmds []redis.Cmder) error {
		for _, cmd := range cmds {
			_ = c.beforeProcess(cmd, true)
		}
		err := next(ctx, cmds)
		for _, cmd := range cmds {
			c.afterProcess(cmd)
		}
		return err
	}
}

func (c *redisCache) close() {
	if c.subscription != nil {
		if err := c.subscription.Close(); err != nil {
			logger.Warnf("failed closing Redis cache subscription: %v", err)
		}
		c.subscription = nil
	}
	if c.cli != nil {
		c.cli.Options().OnConnect = nil
	}
	c.cli = nil
}

type invalidConnKey struct{}

func (c *redisCache) onInvalidateConnect(ctx context.Context, cn *redis.Conn) error {
	if ctx.Value(invalidConnKey{}) == nil {
		return nil
	}
	// clear all caches on reconnect
	c.inodeCache.Purge()
	c.entryCache.Purge()
	// use the pubsub connection to handle tracking and invalidate
	_ = cn.Do(ctx, "CLIENT", "TRACKING", "OFF").Err()
	if err := cn.Do(ctx, "CLIENT", "TRACKING", "ON", "BCAST", "PREFIX", c.prefix+"i", "PREFIX", c.prefix+"d").Err(); err != nil {
		logger.Warnf("Failed to enable Redis client-side caching on new connection: %v", err)
		return err
	}
	return nil
}

func (m *redisMeta) preloadCache() {
	if m.cache == nil {
		return
	}
	if m.cache.preload <= 0 {
		return
	}
	start := time.Now()
	ctx := Background()
	attr := &Attr{}
	if eno := m.doGetAttr(ctx, m.root, attr); eno != 0 {
		logger.Warnf("failed to get root inode %d attribute: %d", m.root, eno)
		return
	}

	var entries []*Entry
	if eno := m.doReaddir(ctx, m.root, 1, &entries, m.cache.preload); eno != 0 {
		logger.Warnf("failed to read root %d directory: %d", m.root, eno)
		return
	}
	for _, entry := range entries {
		m.cache.entryCache.Add(m.cache.entryName(m.root, string(entry.Name)), &cachedEntry{
			ino:  entry.Inode,
			Attr: *entry.Attr,
		})
	}
	logger.Infof("preload %d inodes in %v", m.cache.inodeCache.Len(), time.Since(start))
}

func bytesToString(b []byte) string {
	return *(*string)(unsafe.Pointer(&b))
}


================================================
FILE: pkg/meta/redis_csc_test.go
================================================
//go:build !noredis

package meta

import (
	"context"
	"testing"
	"time"

	"github.com/stretchr/testify/require"
)

func mockRedisCSCMeta(t *testing.T) *redisMeta {
	m, err := newRedisMeta("redis", "127.0.0.1:6379/10?client-cache=true", testConfig())
	require.NoError(t, err, "failed to create redis meta")
	require.Equal(t, "redis", m.Name(), "meta name should be redis")
	return m.(*redisMeta)
}

func TestRedisCache(t *testing.T) {
	ctx := context.Background()
	m := mockRedisCSCMeta(t)
	_ = m.rdb.FlushAll(ctx)
	defer m.Shutdown()
	defer m.cache.close()

	var err error
	t.Run("invalidation handling", func(t *testing.T) {
		cache := m.cache
		ino := Ino(100)
		attr := &Attr{Typ: TypeFile, Mode: 0644}
		cache.inodeCache.Add(ino, attr.Marshal())
		if _, ok := cache.inodeCache.Get(ino); !ok {
			t.Fatal("inode should be in cache")
		}

		err = m.rdb.Set(ctx, m.inodeKey(ino), m.marshal(&Attr{Mode: 0755}), 0).Err()
		require.NoError(t, err, "failed to set key %d", ino)
		dumIno := Ino(101)
		err = m.rdb.Set(ctx, m.inodeKey(dumIno), m.marshal(&Attr{Mode: 0755}), 0).Err()
		require.NoError(t, err, "failed to set key %d", dumIno)
		time.Sleep(3 * time.Second)
		if _, ok := cache.inodeCache.Get(Ino(100)); ok {
			t.Fatal("inode should be invalidated and removed from cache")
		}

		cache.entryCache.Add(cache.entryName(101, "file"), &cachedEntry{})
		m.rdb.HSet(ctx, m.entryKey(100), "file", "content").Err()
	})
	t.Run("cache expiration", func(t *testing.T) {
		shortExpiry := 50 * time.Millisecond
		cache := newRedisCache("jfs", 1000, shortExpiry, 0)
		attr := &Attr{Typ: TypeFile, Mode: 0644}
		cache.inodeCache.Add(Ino(102), attr.Marshal())
		time.Sleep(3 * shortExpiry)
		if _, ok := cache.inodeCache.Get(Ino(102)); ok {
			t.Fatal("inode should be expired")
		}
	})

	t.Run("inode hook", func(t *testing.T) {
		cache := m.cache
		ino := Ino(103)
		attr := &Attr{Typ: TypeFile, Length: 10}
		cache.inodeCache.Add(ino, attr.Marshal())

		data, err := m.rdb.Get(ctx, m.inodeKey(ino)).Bytes()
		require.NoError(t, err, "failed to get inode")
		attr2 := &Attr{}
		attr2.Unmarshal(data)
		attr2.Full = false
		require.Equal(t, *attr, *attr2)

		attr3 := &Attr{Typ: TypeFile, Length: 20}
		err = m.rdb.Set(ctx, m.inodeKey(ino), attr3.Marshal(), 0).Err()
		require.NoError(t, err)
		_, ok := cache.inodeCache.Get(ino)
		require.False(t, ok)
	})

	t.Run("entry hook", func(t *testing.T) {
		cache := m.cache
		ino := Ino(104)
		name1, name2 := cache.entryName(ino, "f1"), cache.entryName(ino, "f2")
		cache.entryCache.Add(name1, &cachedEntry{})
		cache.entryCache.Add(name2, &cachedEntry{})

		err := m.rdb.HSet(ctx, m.entryKey(ino), "f1", "c1", "f2", "c2").Err()
		require.NoError(t, err)

		_, ok := cache.entryCache.Get(name1)
		require.False(t, ok)
		_, ok = cache.entryCache.Get(name2)
		require.False(t, ok)

		cache.entryCache.Add(name1, &cachedEntry{})
		cache.entryCache.Add(name2, &cachedEntry{})
		err = m.rdb.HDel(ctx, m.entryKey(ino), "f1", "f2").Err()
		require.NoError(t, err)

		_, ok = cache.entryCache.Get(name1)
		require.False(t, ok)
		_, ok = cache.entryCache.Get(name2)
		require.False(t, ok)
	})
}


================================================
FILE: pkg/meta/redis_lock.go
================================================
//go:build !noredis
// +build !noredis

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"strconv"
	"strings"
	"syscall"
	"time"

	"github.com/redis/go-redis/v9"
)

func (r *redisMeta) Flock(ctx Context, inode Ino, owner uint64, ltype uint32, block bool) syscall.Errno {
	ikey := r.flockKey(inode)
	lkey := r.ownerKey(owner)
	ctx = ctx.WithValue(txMethodKey{}, "Flock"+strconv.Itoa(int(ltype)))
	if ltype == F_UNLCK {
		return errno(r.txn(ctx, func(tx *redis.Tx) error {
			lkeys, err := tx.HKeys(ctx, ikey).Result()
			if err != nil {
				return err
			}
			_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				pipe.HDel(ctx, ikey, lkey)
				if len(lkeys) == 1 && lkeys[0] == lkey {
					pipe.SRem(ctx, r.lockedKey(r.sid), ikey)
				}
				return nil
			})
			return err
		}, ikey))
	}
	var err error
	for {
		err = r.txn(ctx, func(tx *redis.Tx) error {
			owners, err := tx.HGetAll(ctx, ikey).Result()
			if err != nil {
				return err
			}
			delete(owners, lkey)
			if ltype == F_RDLCK {
				for _, v := range owners {
					if v == "W" {
						return syscall.EAGAIN
					}
				}
				_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
					pipe.HSet(ctx, ikey, lkey, "R")
					return nil
				})
				return err
			}
			if len(owners) > 0 {
				return syscall.EAGAIN
			}
			_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				pipe.HSet(ctx, ikey, lkey, "W")
				pipe.SAdd(ctx, r.lockedKey(r.sid), ikey)
				return nil
			})
			return err
		}, ikey)

		if !block || err != syscall.EAGAIN {
			break
		}
		if ltype == F_WRLCK {
			time.Sleep(time.Millisecond * 1)
		} else {
			time.Sleep(time.Millisecond * 10)
		}
		if ctx.Canceled() {
			return syscall.EINTR
		}
	}
	return errno(err)
}

func (r *redisMeta) Getlk(ctx Context, inode Ino, owner uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno {
	if *ltype == F_UNLCK {
		*start = 0
		*end = 0
		*pid = 0
		return 0
	}
	lkey := r.ownerKey(owner)
	owners, err := r.rdb.HGetAll(ctx, r.plockKey(inode)).Result()
	if err != nil {
		return errno(err)
	}
	delete(owners, lkey) // exclude itself
	for k, d := range owners {
		ls := loadLocks([]byte(d))
		for _, l := range ls {
			// find conflicted locks
			if (*ltype == F_WRLCK || l.Type == F_WRLCK) && *end >= l.Start && *start <= l.End {
				*ltype = l.Type
				*start = l.Start
				*end = l.End
				sid, _ := strconv.Atoi(strings.Split(k, "_")[0])
				if uint64(sid) == r.sid {
					*pid = l.Pid
				} else {
					*pid = 0
				}
				return 0
			}
		}
	}
	*ltype = F_UNLCK
	*start = 0
	*end = 0
	*pid = 0
	return 0
}

func (r *redisMeta) Setlk(ctx Context, inode Ino, owner uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno {
	ikey := r.plockKey(inode)
	lkey := r.ownerKey(owner)
	ctx = ctx.WithValue(txMethodKey{}, "Setlk"+strconv.Itoa(int(ltype)))
	var err error
	lock := plockRecord{ltype, pid, start, end}
	for {
		err = r.txn(ctx, func(tx *redis.Tx) error {
			if ltype == F_UNLCK {
				d, err := tx.HGet(ctx, ikey, lkey).Result()
				if err != nil && err != redis.Nil {
					return err
				}
				ls := loadLocks([]byte(d))
				if len(ls) == 0 {
					return nil
				}
				ls = updateLocks(ls, lock)
				var lkeys []string
				if len(ls) == 0 {
					lkeys, err = tx.HKeys(ctx, ikey).Result()
					if err != nil {
						return err
					}
				}
				_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
					if len(ls) == 0 {
						pipe.HDel(ctx, ikey, lkey)
						if len(lkeys) == 1 && lkeys[0] == lkey {
							pipe.SRem(ctx, r.lockedKey(r.sid), ikey)
						}
					} else {
						pipe.HSet(ctx, ikey, lkey, dumpLocks(ls))
					}
					return nil
				})
				return err
			}
			owners, err := tx.HGetAll(ctx, ikey).Result()
			if err != nil {
				return err
			}
			ls := loadLocks([]byte(owners[lkey]))
			delete(owners, lkey)
			for _, d := range owners {
				ls := loadLocks([]byte(d))
				for _, l := range ls {
					// find conflicted locks
					if (ltype == F_WRLCK || l.Type == F_WRLCK) && end >= l.Start && start <= l.End {
						return syscall.EAGAIN
					}
				}
			}
			ls = updateLocks(ls, lock)
			_, err = tx.TxPipelined(ctx, func(pipe redis.Pipeliner) error {
				pipe.HSet(ctx, ikey, lkey, dumpLocks(ls))
				pipe.SAdd(ctx, r.lockedKey(r.sid), ikey)
				return nil
			})
			return err
		}, ikey)

		if !block || err != syscall.EAGAIN {
			break
		}
		if ltype == F_WRLCK {
			time.Sleep(time.Millisecond * 1)
		} else {
			time.Sleep(time.Millisecond * 10)
		}
		if ctx.Canceled() {
			return syscall.EINTR
		}
	}
	return errno(err)
}

func (r *redisMeta) ListLocks(ctx context.Context, inode Ino) ([]PLockItem, []FLockItem, error) {
	fKey := r.flockKey(inode)
	pKey := r.plockKey(inode)

	rawFLocks, err := r.rdb.HGetAll(ctx, fKey).Result()
	if err != nil {
		return nil, nil, err
	}
	flocks := make([]FLockItem, 0, len(rawFLocks))
	for k, v := range rawFLocks {
		owner, err := parseOwnerKey(k)
		if err != nil {
			return nil, nil, err
		}
		flocks = append(flocks, FLockItem{*owner, v})
	}

	rawPLocks, err := r.rdb.HGetAll(ctx, pKey).Result()
	if err != nil {
		return nil, nil, err
	}
	plocks := make([]PLockItem, 0)
	for k, d := range rawPLocks {
		owner, err := parseOwnerKey(k)
		if err != nil {
			return nil, nil, err
		}
		ls := loadLocks([]byte(d))
		for _, l := range ls {
			plocks = append(plocks, PLockItem{*owner, l})
		}
	}
	return plocks, flocks, nil
}


================================================
FILE: pkg/meta/slice.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import "github.com/juicedata/juicefs/pkg/utils"

type slice struct {
	id    uint64
	size  uint32
	off   uint32
	len   uint32
	pos   uint32
	left  *slice
	right *slice
}

func newSlice(pos uint32, id uint64, cleng, off, len uint32) *slice {
	if len == 0 {
		return nil
	}
	s := &slice{}
	s.pos = pos
	s.id = id
	s.size = cleng
	s.off = off
	s.len = len
	s.left = nil
	s.right = nil
	return s
}

func (s *slice) read(buf []byte) {
	rb := utils.ReadBuffer(buf)
	s.pos = rb.Get32()
	s.id = rb.Get64()
	s.size = rb.Get32()
	s.off = rb.Get32()
	s.len = rb.Get32()
}

func (s *slice) cut(pos uint32) (left, right *slice) {
	if s == nil {
		return nil, nil
	}
	if pos <= s.pos {
		if s.left == nil {
			s.left = newSlice(pos, 0, 0, 0, s.pos-pos)
		}
		left, s.left = s.left.cut(pos)
		return left, s
	} else if pos < s.pos+s.len {
		l := pos - s.pos
		right = newSlice(pos, s.id, s.size, s.off+l, s.len-l)
		right.right = s.right
		s.len = l
		s.right = nil
		return s, right
	} else {
		if s.right == nil {
			s.right = newSlice(s.pos+s.len, 0, 0, 0, pos-s.pos-s.len)
		}
		s.right, right = s.right.cut(pos)
		return s, right
	}
}

func (s *slice) visit(f func(*slice)) {
	if s == nil {
		return
	}
	s.left.visit(f)
	right := s.right
	f(s) // s could be freed
	right.visit(f)
}

const sliceBytes = 24

func marshalSlice(pos uint32, id uint64, size, off, len uint32) []byte {
	w := utils.NewBuffer(sliceBytes)
	w.Put32(pos)
	w.Put64(id)
	w.Put32(size)
	w.Put32(off)
	w.Put32(len)
	return w.Bytes()
}

func readSlices(vals []string) []*slice {
	slices := make([]slice, len(vals))
	ss := make([]*slice, len(vals))
	for i, val := range vals {
		if len(val) != sliceBytes {
			logger.Errorf("corrupt slice: len=%d, val=%v", len(val), []byte(val))
			return nil
		}
		s := &slices[i]
		s.read([]byte(val))
		ss[i] = s
	}
	return ss
}

func readSliceBuf(buf []byte) []*slice {
	if len(buf)%sliceBytes != 0 {
		logger.Errorf("corrupt slices: len=%d", len(buf))
		return nil
	}
	nSlices := len(buf) / sliceBytes
	slices := make([]slice, nSlices)
	ss := make([]*slice, nSlices)
	for i := 0; i < len(buf); i += sliceBytes {
		s := &slices[i/sliceBytes]
		s.read(buf[i:])
		ss[i/sliceBytes] = s
	}
	return ss
}

func buildSlice(ss []*slice) []Slice {
	var root *slice
	for i := range ss {
		s := new(slice)
		*s = *ss[i]
		var right *slice
		s.left, right = root.cut(s.pos)
		_, s.right = right.cut(s.pos + s.len)
		root = s
	}
	var pos uint32
	var chunk []Slice
	root.visit(func(s *slice) {
		if s.pos > pos {
			chunk = append(chunk, Slice{Size: s.pos - pos, Len: s.pos - pos})
			pos = s.pos
		}
		chunk = append(chunk, Slice{Id: s.id, Size: s.size, Off: s.off, Len: s.len})
		pos += s.len
	})
	return chunk
}

func compactChunk(ss []*slice) (uint32, uint32, []Slice) {
	var chunk = buildSlice(ss)
	var pos uint32
	n := len(chunk)
	for n > 1 {
		if chunk[0].Id == 0 {
			pos += chunk[0].Len
			chunk = chunk[1:]
			n--
		} else if chunk[n-1].Id == 0 {
			chunk = chunk[:n-1]
			n--
		} else {
			break
		}
	}
	if n == 1 && chunk[0].Id == 0 {
		chunk[0].Len = 1
	}
	var size uint32
	for _, c := range chunk {
		size += c.Len
	}
	return pos, size, chunk
}

func skipSome(chunk []*slice) int {
	var skipped int
	var total = len(chunk)
OUT:
	for skipped < total {
		ss := chunk[skipped:]
		pos, size, c := compactChunk(ss)
		first := ss[0]
		if first.len < (1<<20) || first.len*5 < size || size == 0 {
			// it's too small
			break
		}
		isFirst := func(pos uint32, s Slice) bool {
			return pos == first.pos && s.Id == first.id && s.Off == first.off && s.Len == first.len
		}
		if !isFirst(pos, c[0]) {
			// it's not the first slice, compact it
			break
		}
		for _, s := range ss[1:] {
			if *s == *first {
				break OUT
			}
		}
		skipped++
	}
	return skipped
}


================================================
FILE: pkg/meta/sql.go
================================================
//go:build !nosqlite || !nomysql || !nopg
// +build !nosqlite !nomysql !nopg

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bufio"
	"bytes"
	"context"
	"database/sql"
	"encoding/json"
	"fmt"
	"io"
	"net/url"
	"runtime"
	"runtime/debug"
	"slices"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/dustin/go-humanize"
	"xorm.io/xorm"
	"xorm.io/xorm/log"
	"xorm.io/xorm/names"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
)

const MaxFieldsCountOfTable = 18 // node table

type setting struct {
	Name  string `xorm:"pk"`
	Value string `xorm:"varchar(4096) notnull"`
}

type counter struct {
	Name  string `xorm:"pk"`
	Value int64  `xorm:"notnull"`
}

type edge struct {
	Id     int64  `xorm:"pk bigserial"`
	Parent Ino    `xorm:"unique(edge) notnull"`
	Name   []byte `xorm:"unique(edge) varbinary(255) notnull"`
	Inode  Ino    `xorm:"index notnull"`
	Type   uint8  `xorm:"notnull"`
}

type node struct {
	Inode        Ino    `xorm:"pk"`
	Type         uint8  `xorm:"notnull"`
	Flags        uint8  `xorm:"notnull"`
	Mode         uint16 `xorm:"notnull"`
	Uid          uint32 `xorm:"notnull"`
	Gid          uint32 `xorm:"notnull"`
	Atime        int64  `xorm:"notnull"`
	Mtime        int64  `xorm:"notnull"`
	Ctime        int64  `xorm:"notnull"`
	Atimensec    int16  `xorm:"notnull default 0"`
	Mtimensec    int16  `xorm:"notnull default 0"`
	Ctimensec    int16  `xorm:"notnull default 0"`
	Nlink        uint32 `xorm:"notnull"`
	Length       uint64 `xorm:"notnull"`
	Rdev         uint32
	Parent       Ino
	AccessACLId  uint32 `xorm:"'access_acl_id'"`
	DefaultACLId uint32 `xorm:"'default_acl_id'"`
}

func (n *node) setAtime(ns int64) {
	n.Atime = ns / 1e3
	n.Atimensec = int16(ns % 1e3)
}

func (n *node) getMtime() int64 {
	return n.Mtime*1e3 + int64(n.Mtimensec)
}

func (n *node) setMtime(ns int64) {
	n.Mtime = ns / 1e3
	n.Mtimensec = int16(ns % 1e3)
}

func (n *node) setCtime(ns int64) {
	n.Ctime = ns / 1e3
	n.Ctimensec = int16(ns % 1e3)
}

func getACLIdColName(aclType uint8) string {
	switch aclType {
	case aclAPI.TypeAccess:
		return "access_acl_id"
	case aclAPI.TypeDefault:
		return "default_acl_id"
	}
	return ""
}

type acl struct {
	Id          uint32 `xorm:"pk autoincr"`
	Owner       uint16
	Group       uint16
	Mask        uint16
	Other       uint16
	NamedUsers  []byte
	NamedGroups []byte
}

func newSQLAcl(r *aclAPI.Rule) *acl {
	a := &acl{
		Owner: r.Owner,
		Group: r.Group,
		Mask:  r.Mask,
		Other: r.Other,
	}
	a.NamedUsers = r.NamedUsers.Encode()
	a.NamedGroups = r.NamedGroups.Encode()
	return a
}

func (a *acl) toRule() *aclAPI.Rule {
	r := &aclAPI.Rule{}
	r.Owner = a.Owner
	r.Group = a.Group
	r.Other = a.Other
	r.Mask = a.Mask
	r.NamedUsers.Decode(a.NamedUsers)
	r.NamedGroups.Decode(a.NamedGroups)
	return r
}

type delegationToken struct {
	Id    uint32 `xorm:"pk autoincr"`
	Token []byte
}

type namedNode struct {
	node `xorm:"extends"`
	Name []byte `xorm:"varbinary(255)"`
}

type chunk struct {
	Id     int64  `xorm:"pk bigserial"`
	Inode  Ino    `xorm:"unique(chunk) notnull"`
	Indx   uint32 `xorm:"unique(chunk) notnull"`
	Slices []byte `xorm:"blob notnull"`
}

type sliceRef struct {
	Id   uint64 `xorm:"pk chunkid"`
	Size uint32 `xorm:"notnull"`
	Refs int    `xorm:"index notnull"`
}

type delslices struct {
	Id      uint64 `xorm:"pk chunkid"`
	Deleted int64  `xorm:"notnull"` // timestamp
	Slices  []byte `xorm:"blob notnull"`
}

type symlink struct {
	Inode  Ino    `xorm:"pk"`
	Target []byte `xorm:"varbinary(4096) notnull"`
}

type xattr struct {
	Id    int64  `xorm:"pk bigserial"`
	Inode Ino    `xorm:"unique(name) notnull"`
	Name  string `xorm:"unique(name) notnull"`
	Value []byte `xorm:"blob notnull"`
}

type flock struct {
	Id    int64  `xorm:"pk bigserial"`
	Inode Ino    `xorm:"notnull unique(flock)"`
	Sid   uint64 `xorm:"notnull unique(flock)"`
	Owner int64  `xorm:"notnull unique(flock)"`
	Ltype byte   `xorm:"notnull"`
}

type plock struct {
	Id      int64  `xorm:"pk bigserial"`
	Inode   Ino    `xorm:"notnull unique(plock)"`
	Sid     uint64 `xorm:"notnull unique(plock)"`
	Owner   int64  `xorm:"notnull unique(plock)"`
	Records []byte `xorm:"blob notnull"`
}

type session struct {
	Sid       uint64 `xorm:"pk"`
	Heartbeat int64  `xorm:"notnull"`
	Info      []byte `xorm:"blob"`
}

type session2 struct {
	Sid    uint64 `xorm:"pk"`
	Expire int64  `xorm:"notnull"`
	Info   []byte `xorm:"blob"`
}

type sustained struct {
	Id    int64  `xorm:"pk bigserial"`
	Sid   uint64 `xorm:"unique(sustained) notnull"`
	Inode Ino    `xorm:"unique(sustained) notnull"`
}

type delfile struct {
	Inode  Ino    `xorm:"pk notnull"`
	Length uint64 `xorm:"notnull"`
	Expire int64  `xorm:"notnull"`
}

type dirStats struct {
	Inode      Ino   `xorm:"pk notnull"`
	DataLength int64 `xorm:"notnull"`
	UsedSpace  int64 `xorm:"notnull"`
	UsedInodes int64 `xorm:"notnull"`
}

type detachedNode struct {
	Inode Ino   `xorm:"pk notnull"`
	Added int64 `xorm:"notnull"`
}

type dirQuota struct {
	Inode      Ino   `xorm:"pk"`
	MaxSpace   int64 `xorm:"notnull"`
	MaxInodes  int64 `xorm:"notnull"`
	UsedSpace  int64 `xorm:"notnull"`
	UsedInodes int64 `xorm:"notnull"`
}

type userGroupQuota struct {
	Qtype      uint32 `xorm:"pk notnull"` // 1 for user, 2 for group
	Qkey       uint64 `xorm:"pk notnull"` // uid or gid
	MaxSpace   int64  `xorm:"notnull"`
	MaxInodes  int64  `xorm:"notnull"`
	UsedSpace  int64  `xorm:"notnull"`
	UsedInodes int64  `xorm:"notnull"`
}

type dbMeta struct {
	*baseMeta
	db    *xorm.Engine
	spool *sync.Pool
	snap  *dbSnap

	noReadOnlyTxn bool
	statement     map[string]string
	tablePrefix   string
}

var _ Meta = (*dbMeta)(nil)
var _ engine = (*dbMeta)(nil)

type dbSnap struct {
	node    map[Ino]*node
	symlink map[Ino]*symlink
	xattr   map[Ino][]*xattr
	edges   map[Ino][]*edge
	chunk   map[string]*chunk
}

func recoveryMysqlPwd(addr string) string {
	colonIndex := strings.Index(addr, ":")
	atIndex := strings.LastIndex(addr, "@")
	if colonIndex != -1 && colonIndex < atIndex {
		pwd := addr[colonIndex+1 : atIndex]
		if parse, err := url.Parse("mysql://root:" + pwd + "@127.0.0.1"); err == nil {
			if originPwd, ok := parse.User.Password(); ok {
				addr = fmt.Sprintf("%s:%s%s", addr[:colonIndex], originPwd, addr[atIndex:])
			}
		}
	}
	return addr
}

func extractCustomConfig[T string | int](value *url.Values, key string, defaultV T) (T, error) {
	if value == nil {
		return defaultV, nil
	}
	if v := value.Get(key); v != "" {
		value.Del(key)
		var result T
		switch any(defaultV).(type) {
		case int:
			parsedInt, err := strconv.Atoi(v)
			if err != nil {
				return defaultV, fmt.Errorf("failed to parse value as int: %v", err)
			}
			result = any(parsedInt).(T)
		case string:
			result = any(v).(T)
		default:
			return defaultV, fmt.Errorf("unsupported type: %T", defaultV)
		}
		return result, nil
	} else {
		return defaultV, nil
	}
}

var setTransactionIsolation func(dns string) (string, error)

type prefixMapper struct {
	mapper names.Mapper
	prefix string
}

func (m prefixMapper) Obj2Table(name string) string {
	if name == "sliceRef" {
		return m.prefix + "chunk_ref"
	}
	return m.prefix + m.mapper.Obj2Table(name)
}

func (m prefixMapper) Table2Obj(name string) string {
	if name == m.prefix+"chunk_ref" {
		return "sliceRef"
	}
	return m.mapper.Table2Obj(name[len(m.prefix):])
}
func (m *dbMeta) sqlConv(sql string) string {
	return m.statement[sql]
}

func (m *dbMeta) initStatement() {
	m.statement["SELECT length FROM node WHERE inode IN (SELECT inode FROM sustained)"] =
		fmt.Sprintf("SELECT length FROM %snode WHERE inode IN (SELECT inode FROM %ssustained)", m.tablePrefix, m.tablePrefix)
	m.statement["update counter set value=value + ? where name='totalInodes'"] =
		fmt.Sprintf("update %scounter set value=value + ? where name='totalInodes'", m.tablePrefix)
	m.statement["update counter set value= value + ? where name='usedSpace'"] =
		fmt.Sprintf("update %scounter set value= value + ? where name='usedSpace'", m.tablePrefix)
	m.statement["update chunk set slices=slices || ? where inode=? AND indx=?"] =
		fmt.Sprintf("update %schunk set slices=slices || ? where inode=? AND indx=?", m.tablePrefix)
	m.statement["update chunk set slices=concat(slices, ?) where inode=? AND indx=?"] =
		fmt.Sprintf("update %schunk set slices=concat(slices, ?) where inode=? AND indx=?", m.tablePrefix)
	m.statement["update chunk_ref set refs=refs+1 where chunkid = ? AND size = ?"] =
		fmt.Sprintf("update %schunk_ref set refs=refs+1 where chunkid = ? AND size = ?", m.tablePrefix)
	m.statement["update chunk_ref set refs=refs-1 where chunkid=? AND size=?"] =
		fmt.Sprintf("update %schunk_ref set refs=refs-1 where chunkid=? AND size=?", m.tablePrefix)
	m.statement["update dir_quota set used_space=used_space+?, used_inodes=used_inodes+? where inode=?"] =
		fmt.Sprintf("update %sdir_quota set used_space=used_space+?, used_inodes=used_inodes+? where inode=?", m.tablePrefix)
	m.statement["update user_group_quota set used_space=used_space+?, used_inodes=used_inodes+? where qtype=? and qkey=?"] =
		fmt.Sprintf("update %suser_group_quota set used_space=used_space+?, used_inodes=used_inodes+? where qtype=? and qkey=?", m.tablePrefix)

	m.statement[`
			 INSERT INTO chunk (inode, indx, slices)
			 VALUES (?, ?, ?)
			 ON CONFLICT (inode, indx)
			 DO UPDATE SET slices=chunk.slices || ?`] =
		fmt.Sprintf(`
			 INSERT INTO %schunk (inode, indx, slices)
			 VALUES (?, ?, ?)
			 ON CONFLICT (inode, indx)
			 DO UPDATE SET slices=%schunk.slices || ?`, m.tablePrefix, m.tablePrefix)
	m.statement[`
			 INSERT INTO chunk (inode, indx, slices)
			 VALUES (?, ?, ?)
			 ON DUPLICATE KEY UPDATE
			 slices=concat(slices, ?)`] =
		fmt.Sprintf(`
			 INSERT INTO %schunk (inode, indx, slices)
			 VALUES (?, ?, ?)
			 ON DUPLICATE KEY UPDATE
			 slices=concat(slices, ?)`, m.tablePrefix)
	m.statement[`
			 INSERT INTO chunk_ref (chunkid, size, refs)
			 VALUES (?, ?, ?)
			 ON CONFLICT (chunkid)
			 DO UPDATE SET size=?, refs=?`] =
		fmt.Sprintf(`
			 INSERT INTO %schunk_ref (chunkid, size, refs)
			 VALUES (?, ?, ?)
			 ON CONFLICT (chunkid)
			 DO UPDATE SET size=?, refs=?`, m.tablePrefix)
	m.statement[`
			 INSERT INTO chunk_ref (chunkid, size, refs)
			 VALUES (?, ?, ?)
			 ON DUPLICATE KEY UPDATE
			 size=?, refs=?`] =
		fmt.Sprintf(`
			 INSERT INTO %schunk_ref (chunkid, size, refs)
			 VALUES (?, ?, ?)
			 ON DUPLICATE KEY UPDATE
			 size=?, refs=?`, m.tablePrefix)
	m.statement["edge.inode=node.inode"] = fmt.Sprintf("%sedge.inode=%snode.inode", m.tablePrefix, m.tablePrefix)
	m.statement["edge.id"] = fmt.Sprintf("%sedge.id", m.tablePrefix)
	m.statement["edge.name"] = fmt.Sprintf("%sedge.name", m.tablePrefix)
	m.statement["edge.type"] = fmt.Sprintf("%sedge.type", m.tablePrefix)
	m.statement["edge.*"] = fmt.Sprintf("%sedge.*", m.tablePrefix)
	m.statement["node.*"] = fmt.Sprintf("%snode.*", m.tablePrefix)
	m.statement[`INSERT INTO chunk_ref (chunkid, size, refs) VALUES (?,?,?) ON CONFLICT DO NOTHING`] =
		fmt.Sprintf(`INSERT INTO %schunk_ref (chunkid, size, refs) VALUES (?,?,?) ON CONFLICT DO NOTHING`, m.tablePrefix)
	m.statement[`INSERT IGNORE INTO chunk_ref (chunkid, size, refs) VALUES (?,?,?)`] =
		fmt.Sprintf(`INSERT IGNORE INTO %schunk_ref (chunkid, size, refs) VALUES (?,?,?)`, m.tablePrefix)
}

func newSQLMeta(driver, addr string, conf *Config) (Meta, error) {
	var searchPath string

	baseUrl, queryStr, _ := strings.Cut(addr, "?")
	var query url.Values
	var err error
	query, err = url.ParseQuery(queryStr)
	if err != nil {
		return nil, err
	}
	var vOpenConns, vIdleConns, vIdleTime, vLifeTime int
	if vOpenConns, err = extractCustomConfig(&query, "max_open_conns", 0); err != nil {
		return nil, err
	}
	if vIdleConns, err = extractCustomConfig(&query, "max_idle_conns", runtime.GOMAXPROCS(-1)*2); err != nil {
		return nil, err
	}
	if vIdleTime, err = extractCustomConfig(&query, "max_idle_time", 300); err != nil {
		return nil, err
	}
	if vLifeTime, err = extractCustomConfig(&query, "max_life_time", 0); err != nil {
		return nil, err
	}
	var tablePrefix string
	if tablePrefix, err = extractCustomConfig(&query, "table_prefix", ""); err != nil {
		return nil, err
	}
	if tablePrefix == "" {
		tablePrefix = "jfs_"
	} else {
		tablePrefix = "jfs_" + tablePrefix + "_"
	}

	if driver == "sqlite3" {
		if !query.Has("cache") {
			query.Add("cache", "shared")
		}
		if !query.Has("_journal") && !query.Has("_journal_mode") {
			query.Add("_journal", "WAL")
		}
		if !query.Has("_timeout") && !query.Has("_busy_timeout") {
			query.Add("_timeout", "5000")
		}
	}

	if encode := query.Encode(); encode != "" {
		addr = fmt.Sprintf("%s?%s", baseUrl, encode)
	} else {
		addr = baseUrl
	}

	if driver == "postgres" {
		addr = driver + "://" + addr
		driver = "pgx"

		parse, err := url.Parse(addr)
		if err != nil {
			return nil, fmt.Errorf("parse url %s failed: %s", addr, err)
		}
		searchPath = parse.Query().Get("search_path")
		if searchPath != "" {
			if len(strings.Split(searchPath, ",")) > 1 {
				return nil, fmt.Errorf("currently, only one schema is supported in search_path")
			}
		}
	}

	// escaping is not necessary for mysql password https://github.com/go-sql-driver/mysql#password
	if driver == "mysql" && setTransactionIsolation != nil {
		addr = recoveryMysqlPwd(addr)
		var err error
		if addr, err = setTransactionIsolation(addr); err != nil {
			return nil, err
		}
	}

	if driver == "sqlite3" {
		DirBatchNum["db"] = 4096 // SQLITE_MAX_VARIABLE_NUMBER limit
	}

	engine, err := xorm.NewEngine(driver, addr)
	if err != nil {
		return nil, fmt.Errorf("unable to use data source %s: %s", driver, err)
	}
	switch logger.Level { // make xorm less verbose
	case logrus.TraceLevel:
		engine.SetLogLevel(log.LOG_DEBUG)
	case logrus.DebugLevel:
		engine.SetLogLevel(log.LOG_INFO)
	case logrus.InfoLevel, logrus.WarnLevel:
		engine.SetLogLevel(log.LOG_WARNING)
	case logrus.ErrorLevel:
		engine.SetLogLevel(log.LOG_ERR)
	default:
		engine.SetLogLevel(log.LOG_OFF)
	}
	start := time.Now()
	if err = engine.Ping(); err != nil {
		return nil, fmt.Errorf("ping database: %s", err)
	}
	if time.Since(start) > time.Millisecond*5 {
		logger.Warnf("The latency to database is too high: %s", time.Since(start))
	}
	if searchPath != "" {
		engine.SetSchema(searchPath)
	}
	if vOpenConns > 0 {
		engine.DB().SetMaxOpenConns(vOpenConns)
	}
	if vLifeTime > 0 {
		engine.DB().SetConnMaxLifetime(time.Second * time.Duration(vLifeTime))
	}
	engine.DB().SetMaxIdleConns(vIdleConns)
	engine.DB().SetConnMaxIdleTime(time.Second * time.Duration(vIdleTime))
	engine.SetTableMapper(prefixMapper{mapper: engine.GetTableMapper(), prefix: tablePrefix})
	m := &dbMeta{
		baseMeta:    newBaseMeta(addr, conf),
		db:          engine,
		statement:   make(map[string]string),
		tablePrefix: tablePrefix,
	}
	m.initStatement()
	m.spool = &sync.Pool{
		New: func() interface{} {
			s := engine.NewSession()
			runtime.SetFinalizer(s, func(s *xorm.Session) {
				_ = s.Close()
			})
			return s
		},
	}
	m.en = m
	return m, nil
}

func (m *dbMeta) Shutdown() error {
	return m.db.Close()
}

func (m *dbMeta) Name() string {
	name := m.db.DriverName()
	if name == "pgx" {
		name = "postgres"
	}
	return name
}

func (m *dbMeta) doDeleteSlice(id uint64, size uint32) error {
	return m.txn(func(s *xorm.Session) error {
		_, err := s.Delete(&sliceRef{Id: id})
		return err
	})
}

func (m *dbMeta) syncTable(beans ...interface{}) error {
	err := m.db.Sync2(beans...)
	if err != nil && strings.Contains(err.Error(), "Duplicate key") {
		err = nil
	}
	return err
}

func (m *dbMeta) syncAllTables() error {
	if err := m.syncTable(new(setting), new(counter)); err != nil {
		return fmt.Errorf("create table setting, counter: %s", err)
	}
	if err := m.syncTable(new(edge)); err != nil {
		return fmt.Errorf("create table edge: %s", err)
	}
	if err := m.syncTable(new(node), new(symlink), new(xattr)); err != nil {
		return fmt.Errorf("create table node, symlink, xattr: %s", err)
	}
	if err := m.syncTable(new(chunk), new(sliceRef), new(delslices)); err != nil {
		return fmt.Errorf("create table chunk, chunk_ref, delslices: %s", err)
	}
	if err := m.syncTable(new(session2), new(sustained), new(delfile)); err != nil {
		return fmt.Errorf("create table session2, sustaind, delfile: %s", err)
	}
	if err := m.syncTable(new(flock), new(plock), new(dirQuota), new(userGroupQuota)); err != nil {
		return fmt.Errorf("create table flock, plock, dirQuota, userGroupQuota: %s", err)
	}
	if err := m.syncTable(new(dirStats)); err != nil {
		return fmt.Errorf("create table dirStats: %s", err)
	}
	if err := m.syncTable(new(detachedNode)); err != nil {
		return fmt.Errorf("create table detachedNode: %s", err)
	}
	if err := m.syncTable(new(acl)); err != nil {
		return fmt.Errorf("create table acl: %s", err)
	}
	if err := m.syncTable(new(delegationToken)); err != nil {
		return fmt.Errorf("create table delegationToken: %s", err)
	}
	return nil
}

func (m *dbMeta) doInit(format *Format, force bool) error {
	if err := m.syncAllTables(); err != nil {
		return err
	}
	var s = setting{Name: "format"}
	var ok bool
	err := m.simpleTxn(Background(), func(ses *xorm.Session) (err error) {
		ok, err = ses.Get(&s)
		return err
	})
	if err != nil {
		return err
	}

	if ok {
		var old Format
		err = json.Unmarshal([]byte(s.Value), &old)
		if err != nil {
			return fmt.Errorf("json: %s", err)
		}
		if !old.DirStats && format.DirStats {
			// remove dir stats as they are outdated
			_, err = m.db.Where("TRUE").Delete(new(dirStats))
			if err != nil {
				return errors.Wrap(err, "drop table dirStats")
			}
		}
		if !old.UserGroupQuota && format.UserGroupQuota {
			// remove user group quota as they are outdated
			_, err = m.db.Where("TRUE").Delete(new(userGroupQuota))
			if err != nil {
				return errors.Wrap(err, "drop table userGroupQuota")
			}
		}
		if err = format.update(&old, force); err != nil {
			return errors.Wrap(err, "update format")
		}
	}

	data, err := json.MarshalIndent(format, "", "")
	if err != nil {
		return fmt.Errorf("json: %s", err)
	}

	m.fmt = format
	n := &node{
		Type:   TypeDirectory,
		Nlink:  2,
		Length: 4 << 10,
		Parent: RootInode,
	}
	now := time.Now().UnixNano()
	n.setAtime(now)
	n.setMtime(now)
	n.setCtime(now)
	return m.txn(func(s *xorm.Session) error {
		if format.TrashDays > 0 {
			ok2, err := s.ForUpdate().Get(&node{Inode: TrashInode})
			if err != nil {
				return err
			}
			if !ok2 {
				n.Inode = TrashInode
				n.Mode = 0555
				if err = mustInsert(s, n); err != nil {
					return err
				}
			}
		}
		if ok {
			_, err = s.Update(&setting{"format", string(data)}, &setting{Name: "format"})
			return err
		} else {
			var set = &setting{"format", string(data)}
			if n, err := s.Insert(set); err != nil {
				return err
			} else if n == 0 {
				return fmt.Errorf("format is not inserted")
			}
		}

		n.Inode = RootInode
		n.Mode = 0777
		var cs = []counter{
			{"nextInode", 2}, // 1 is root
			{"nextChunk", 1},
			{"nextSession", 0},
			{"usedSpace", 0},
			{"totalInodes", 0},
			{"nextCleanupSlices", 0},
		}
		return mustInsert(s, n, &cs)
	})
}

func (m *dbMeta) cacheACLs(ctx Context) error {
	if !m.getFormat().EnableACL {
		return nil
	}
	return m.simpleTxn(ctx, func(s *xorm.Session) error {
		return s.Table(&acl{}).Iterate(new(acl), func(idx int, bean interface{}) error {
			a := bean.(*acl)
			m.aclCache.Put(a.Id, a.toRule())
			return nil
		})
	})
}

func (m *dbMeta) Reset() error {
	m.Lock()
	defer m.Unlock()
	return m.db.DropTables(&setting{}, &counter{},
		&node{}, &edge{}, &symlink{}, &xattr{},
		&chunk{}, &sliceRef{}, &delslices{},
		&session{}, &session2{}, &sustained{}, &delfile{},
		&flock{}, &plock{}, &dirStats{}, &dirQuota{}, &userGroupQuota{}, &detachedNode{}, &acl{}, &delegationToken{})
}

func (m *dbMeta) doLoad() (data []byte, err error) {
	err = m.simpleTxn(Background(), func(ses *xorm.Session) error {
		if ok, err := ses.IsTableExist(&setting{}); err != nil {
			return err
		} else if !ok {
			return nil
		}
		s := setting{Name: "format"}
		ok, err := ses.Get(&s)
		if err == nil && ok {
			data = []byte(s.Value)
		}
		return err
	})
	return
}

func (m *dbMeta) doNewSession(sinfo []byte, update bool) error {
	// add new table
	err := m.syncTable(new(session2), new(delslices), new(dirStats), new(detachedNode), new(dirQuota), new(userGroupQuota), new(acl), new(delegationToken))
	if err != nil {
		return fmt.Errorf("update table session2, delslices, dirstats, detachedNode, dirQuota, userGroupQuota, acl: %s", err)
	}
	// add node table
	if err = m.syncTable(new(node)); err != nil {
		return fmt.Errorf("update table node: %s", err)
	}
	// add primary key
	if err = m.syncTable(new(edge), new(chunk), new(xattr), new(sustained)); err != nil {
		return fmt.Errorf("update table edge, chunk, xattr, sustained: %s", err)
	}
	// update the owner from uint64 to int64
	if err = m.syncTable(new(flock), new(plock)); err != nil {
		return fmt.Errorf("update table flock, plock: %s", err)
	}

	for {
		beans := session2{Sid: m.sid, Expire: m.expireTime(), Info: sinfo}
		if update {
			return m.txn(func(s *xorm.Session) error {
				_, err = s.Cols("expire", "info").Update(&beans, &session2{Sid: beans.Sid})
				return err
			})
		} else {
			if err = m.txn(func(s *xorm.Session) error {
				return mustInsert(s, &beans)
			}); err == nil {
				break
			}

			if isDuplicateEntryErr(err) {
				logger.Warnf("session id %d is already used", m.sid)
				if v, e := m.incrCounter("nextSession", 1); e == nil {
					m.sid = uint64(v)
					continue
				} else {
					return fmt.Errorf("get session ID: %s", e)
				}
			} else {
				return fmt.Errorf("insert new session %d: %s", m.sid, err)
			}
		}
	}
	return nil
}

func (m *dbMeta) getSession(row interface{}, detail bool) (*Session, error) {
	var s Session
	var info []byte
	switch row := row.(type) {
	case *session2:
		s.Sid = row.Sid
		s.Expire = time.Unix(row.Expire, 0)
		info = row.Info
	case *session:
		s.Sid = row.Sid
		s.Expire = time.Unix(row.Heartbeat, 0).Add(time.Minute * 5)
		info = row.Info
		if info == nil { // legacy client has no info
			info = []byte("{}")
		}
	default:
		return nil, fmt.Errorf("invalid type: %T", row)
	}
	if err := json.Unmarshal(info, &s); err != nil {
		return nil, fmt.Errorf("corrupted session info; json error: %s", err)
	}
	if detail {
		var (
			srows []sustained
			frows []flock
			prows []plock
		)
		err := m.roTxn(Background(), func(ses *xorm.Session) error {
			if err := ses.Find(&srows, &sustained{Sid: s.Sid}); err != nil {
				return fmt.Errorf("find sustained %d: %s", s.Sid, err)
			}
			s.Sustained = make([]Ino, 0, len(srows))
			for _, srow := range srows {
				s.Sustained = append(s.Sustained, srow.Inode)
			}

			if err := ses.Find(&frows, &flock{Sid: s.Sid}); err != nil {
				return fmt.Errorf("find flock %d: %s", s.Sid, err)
			}
			s.Flocks = make([]Flock, 0, len(frows))
			for _, frow := range frows {
				s.Flocks = append(s.Flocks, Flock{frow.Inode, uint64(frow.Owner), string(frow.Ltype)})
			}

			if err := ses.Find(&prows, &plock{Sid: s.Sid}); err != nil {
				return fmt.Errorf("find plock %d: %s", s.Sid, err)
			}
			s.Plocks = make([]Plock, 0, len(prows))
			for _, prow := range prows {
				s.Plocks = append(s.Plocks, Plock{prow.Inode, uint64(prow.Owner), loadLocks(prow.Records)})
			}
			return nil
		})
		if err != nil {
			return nil, err
		}
	}
	return &s, nil
}

func (m *dbMeta) GetSession(sid uint64, detail bool) (s *Session, err error) {
	err = m.roTxn(Background(), func(ses *xorm.Session) error {
		if ok, err := ses.IsTableExist(&session2{}); err != nil {
			return err
		} else if ok {
			row := session2{Sid: sid}
			if ok, err = ses.Get(&row); err != nil {
				return err
			} else if ok {
				s, err = m.getSession(&row, detail)
				return err
			}
		}
		if ok, err := ses.IsTableExist(&session{}); err != nil {
			return err
		} else if ok {
			row := session{Sid: sid}
			if ok, err = ses.Get(&row); err != nil {
				return err
			} else if ok {
				s, err = m.getSession(&row, detail)
				return err
			}
		}
		return fmt.Errorf("session not found: %d", sid)
	})
	return
}

func (m *dbMeta) ListSessions() ([]*Session, error) {
	var sessions []*Session
	err := m.roTxn(Background(), func(ses *xorm.Session) error {
		if ok, err := ses.IsTableExist(&session2{}); err != nil {
			return err
		} else if ok {
			var rows []session2
			if err = ses.Find(&rows); err != nil {
				return err
			}
			sessions = make([]*Session, 0, len(rows))
			for i := range rows {
				s, err := m.getSession(&rows[i], false)
				if err != nil {
					logger.Errorf("get session: %s", err)
					continue
				}
				sessions = append(sessions, s)
			}
		}
		if ok, err := ses.IsTableExist(&session{}); err != nil {
			logger.Errorf("Check legacy session table: %s", err)
		} else if ok {
			var lrows []session
			if err = ses.Find(&lrows); err != nil {
				logger.Errorf("Scan legacy sessions: %s", err)
				return nil
			}
			for i := range lrows {
				s, err := m.getSession(&lrows[i], false)
				if err != nil {
					logger.Errorf("Get legacy session: %s", err)
					continue
				}
				sessions = append(sessions, s)
			}
		}
		return nil
	})
	return sessions, err
}

func (m *dbMeta) getCounter(name string) (v int64, err error) {
	err = m.simpleTxn(Background(), func(s *xorm.Session) error {
		c := counter{Name: name}
		_, err := s.Get(&c)
		if err == nil {
			v = c.Value
		}
		return err
	})
	return
}

func (m *dbMeta) incrCounter(name string, value int64) (v int64, err error) {
	err = m.txn(func(s *xorm.Session) error {
		v, err = m.incrSessionCounter(s, name, value)
		return err
	})
	return
}

func (m *dbMeta) incrSessionCounter(s *xorm.Session, name string, value int64) (v int64, err error) {
	var c = counter{Name: name}
	ok, err := s.ForUpdate().Get(&c)
	if err != nil {
		return
	}
	v = c.Value + value
	if value > 0 {
		c.Value = v
		if ok {
			_, err = s.Cols("value").Update(&c, &counter{Name: name})
		} else {
			err = mustInsert(s, &c)
		}
	}
	return
}

func (m *dbMeta) setIfSmall(name string, value, diff int64) (bool, error) {
	var changed bool
	err := m.txn(func(s *xorm.Session) error {
		changed = false
		c := counter{Name: name}
		ok, err := s.ForUpdate().Get(&c)
		if err != nil {
			return err
		}
		if c.Value > value-diff {
			return nil
		} else {
			changed = true
			c.Value = value
			if ok {
				_, err = s.Cols("value").Update(&c, &counter{Name: name})
			} else {
				err = mustInsert(s, &c)
			}
			return err
		}
	})

	return changed, err
}

func mustInsert(s *xorm.Session, beans ...interface{}) error {
	for start, end, size := 0, 0, len(beans); end < size; start = end {
		end = start + 200
		if end > size {
			end = size
		}
		if n, err := s.Insert(beans[start:end]...); err != nil {
			return err
		} else if d := end - start - int(n); d > 0 {
			return fmt.Errorf("%d records not inserted: %+v", d, beans[start:end])
		}
	}
	return nil
}

var errBusy error

func (m *dbMeta) shouldRetry(err error) bool {
	if m.Name() == "mysql" && err == syscall.EBUSY {
		// Retry transaction when parent node update return 0 rows in MySQL
		return true
	}

	msg := strings.ToLower(err.Error())
	if strings.Contains(msg, "too many connections") || strings.Contains(msg, "too many clients") {
		logger.Warnf("transaction failed: %s, will retry it. please increase the max number of connections in your database, or use a connection pool.", msg)
		return true
	}
	switch m.Name() {
	case "sqlite3":
		return errors.Is(err, errBusy) || strings.Contains(msg, "database is locked")
	case "mysql":
		// MySQL, MariaDB or TiDB
		// error 1020 for MariaDB when conflict
		return strings.Contains(msg, "try restarting transaction") || strings.Contains(msg, "try again later") ||
			strings.Contains(msg, "duplicate entry") || strings.Contains(msg, "error 1020 (hy000)") ||
			strings.Contains(msg, "invalid connection") || strings.Contains(msg, "bad connection") || errors.Is(err, io.EOF) // could not send data to client: No buffer space available
	case "postgres":
		if e, ok := err.(interface{ SafeToRetry() bool }); ok {
			return e.SafeToRetry()
		}
		return strings.Contains(msg, "current transaction is aborted") || strings.Contains(msg, "deadlock detected") ||
			strings.Contains(msg, "duplicate key value") || strings.Contains(msg, "could not serialize access") ||
			strings.Contains(msg, "bad connection") || errors.Is(err, io.EOF) // could not send data to client: No buffer space available
	default:
		return false
	}
}

func (m *dbMeta) txn(f func(s *xorm.Session) error, inodes ...Ino) error {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	start := time.Now()
	defer func() { m.txDist.Observe(time.Since(start).Seconds()) }()

	if m.Name() == "sqlite3" {
		// sqlite only allow one writer at a time
		inodes = []Ino{1}
	}

	defer m.txBatchLock(inodes...)()
	var (
		lastErr error
		method  txMethod
	)
	for i := 0; i < 50; i++ {
		_, err := m.db.Transaction(func(s *xorm.Session) (interface{}, error) {
			return nil, f(s)
		})
		if eno, ok := err.(syscall.Errno); ok && eno == 0 {
			err = nil
		}
		if err != nil && m.shouldRetry(err) {
			m.txRestart.WithLabelValues(method.name(context.TODO())).Add(1)
			logger.Debugf("Transaction failed, restart it (tried %d): %s", i+1, err)
			lastErr = err
			time.Sleep(time.Millisecond * time.Duration(i*i))
			continue
		} else if err == nil && i > 1 {
			logger.Warnf("Transaction succeeded after %d tries (%s), inodes: %v, method: %s, last error: %s", i+1, time.Since(start), inodes, method.name(context.TODO()), lastErr)
		}
		return err
	}
	logger.Warnf("Already tried 50 times, returning: %s", lastErr)
	return lastErr
}

func (m *dbMeta) roTxn(ctx context.Context, f func(s *xorm.Session) error) error {
	start := time.Now()
	defer func() { m.txDist.Observe(time.Since(start).Seconds()) }()
	s := m.db.NewSession()
	defer s.Close()
	var opt sql.TxOptions
	if !m.noReadOnlyTxn {
		opt.ReadOnly = true
		opt.Isolation = sql.LevelRepeatableRead
	}

	var maxRetry int
	val := ctx.Value(txMaxRetryKey{})
	if val == nil {
		maxRetry = 50
	} else {
		maxRetry = val.(int)
	}
	var (
		lastErr error
		method  txMethod
	)
	for i := 0; i < maxRetry; i++ {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}
		err := s.BeginTx(&opt)
		if err != nil && opt.ReadOnly && (strings.Contains(err.Error(), "READ") || strings.Contains(err.Error(), "driver does not support read-only transactions")) {
			logger.Warnf("the database does not support read-only transaction")
			m.noReadOnlyTxn = true
			opt = sql.TxOptions{} // use default level
			err = s.BeginTx(&opt)
		}
		if err != nil {
			logger.Debugf("Start transaction failed, try again (tried %d): %s", i+1, err)
			lastErr = err
			time.Sleep(time.Millisecond * time.Duration(i*i))
			continue
		}
		err = f(s)
		if eno, ok := err.(syscall.Errno); ok && eno == 0 {
			err = nil
		}
		_ = s.Rollback()
		if err != nil && m.shouldRetry(err) {
			m.txRestart.WithLabelValues(method.name(ctx)).Add(1)
			logger.Debugf("Read transaction failed, restart it (tried %d): %s", i+1, err)
			lastErr = err
			time.Sleep(time.Millisecond * time.Duration(i*i))
			continue
		} else if err == nil && i > 1 {
			logger.Warnf("Read transaction succeeded after %d tries (%s), method: %s, last error: %s", i+1, time.Since(start), method.name(ctx), lastErr)
		}
		return err
	}
	logger.Warnf("Already tried %d times, returning: %s", maxRetry, lastErr)
	return lastErr
}

func (m *dbMeta) simpleTxn(ctx context.Context, f func(s *xorm.Session) error) error {
	start := time.Now()
	defer func() { m.txDist.Observe(time.Since(start).Seconds()) }()
	s := m.spool.Get().(*xorm.Session)
	defer m.spool.Put(s)

	var (
		maxRetry = 50
		lastErr  error
		method   txMethod
	)
	for i := 0; i < maxRetry; i++ {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}
		err := f(s)
		if eno, ok := err.(syscall.Errno); ok && eno == 0 {
			err = nil
		}
		if err != nil && m.shouldRetry(err) {
			m.txRestart.WithLabelValues(method.name(ctx)).Add(1)
			logger.Debugf("Read transaction failed, restart it (tried %d): %s", i+1, err)
			lastErr = err
			time.Sleep(time.Millisecond * time.Duration(i*i))
			continue
		} else if err == nil && i > 1 {
			logger.Warnf("Simple transaction succeeded after %d tries (%s), method: %s, last error: %s", i+1, time.Since(start), method.name(ctx), lastErr)
		}
		return err
	}
	logger.Warnf("Already tried %d times, returning: %s", maxRetry, lastErr)
	return lastErr
}

func (m *dbMeta) parseAttr(n *node, attr *Attr) {
	if attr == nil || n == nil {
		return
	}
	attr.Typ = n.Type
	attr.Mode = n.Mode
	attr.Flags = n.Flags
	attr.Uid = n.Uid
	attr.Gid = n.Gid
	attr.Atime = n.Atime / 1e6
	attr.Atimensec = uint32(n.Atime%1e6*1000) + uint32(n.Atimensec)
	attr.Mtime = n.Mtime / 1e6
	attr.Mtimensec = uint32(n.Mtime%1e6*1000) + uint32(n.Mtimensec)
	attr.Ctime = n.Ctime / 1e6
	attr.Ctimensec = uint32(n.Ctime%1e6*1000) + uint32(n.Ctimensec)
	attr.Nlink = n.Nlink
	attr.Length = n.Length
	attr.Rdev = n.Rdev
	attr.Parent = n.Parent
	attr.Full = true
	attr.AccessACL = n.AccessACLId
	attr.DefaultACL = n.DefaultACLId
}

func (m *dbMeta) parseNode(attr *Attr, n *node) {
	if attr == nil || n == nil {
		return
	}
	n.Type = attr.Typ
	n.Mode = attr.Mode
	n.Flags = attr.Flags
	n.Uid = attr.Uid
	n.Gid = attr.Gid
	n.setAtime(attr.Atime*1e9 + int64(attr.Atimensec))
	n.setMtime(attr.Mtime*1e9 + int64(attr.Mtimensec))
	n.setCtime(attr.Ctime*1e9 + int64(attr.Ctimensec))
	n.Nlink = attr.Nlink
	n.Length = attr.Length
	n.Rdev = attr.Rdev
	n.Parent = attr.Parent
	n.AccessACLId = attr.AccessACL
	n.DefaultACLId = attr.DefaultACL
}

func (m *dbMeta) updateStats(space int64, inodes int64) {
	atomic.AddInt64(&m.newSpace, space)
	atomic.AddInt64(&m.newInodes, inodes)
}

func (m *dbMeta) doSyncVolumeStat(ctx Context) error {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	var used, inode int64
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		total, err := s.SumsInt(&dirStats{}, "used_space", "used_inodes")
		used += total[0]
		inode += total[1]
		return err
	}); err != nil {
		return err
	}
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		queryResultMap, err := s.QueryString(m.sqlConv("SELECT length FROM node WHERE inode IN (SELECT inode FROM sustained)"))
		if err != nil {
			return err
		}
		for _, v := range queryResultMap {
			value, err := strconv.ParseInt(v["length"], 10, 64)
			if err != nil {
				logger.Warnf("parse sustained length: %s err: %s", v["length"], err)
				continue
			}
			used += align4K(uint64(value))
			inode += 1
		}
		return nil
	}); err != nil {
		return err
	}

	if err := m.scanTrashEntry(ctx, func(_ Ino, length uint64) {
		used += align4K(length)
		inode += 1
	}); err != nil {
		return err
	}
	logger.Debugf("Used space: %s, inodes: %d", humanize.IBytes(uint64(used)), inode)
	return m.txn(func(s *xorm.Session) error {
		if _, err := s.Cols("value").Update(&counter{Value: inode}, &counter{Name: totalInodes}); err != nil {
			return fmt.Errorf("update totalInodes: %s", err)
		}
		_, err := s.Cols("value").Update(&counter{Value: used}, &counter{Name: usedSpace})
		return err
	})
}

func (m *dbMeta) doFlushStats() {
	newSpace := atomic.LoadInt64(&m.newSpace)
	newInodes := atomic.LoadInt64(&m.newInodes)
	if newSpace != 0 || newInodes != 0 {
		err := m.txn(func(s *xorm.Session) error {
			if _, err := s.Exec(m.sqlConv("update counter set value=value + ? where name='totalInodes'"), newInodes); err != nil {
				return err
			}
			_, err := s.Exec(m.sqlConv("update counter set value= value + ? where name='usedSpace'"), newSpace)
			return err
		})
		if err != nil && !strings.Contains(err.Error(), "attempt to write a readonly database") {
			logger.Warnf("update stats: %s", err)
		}
		if err == nil {
			atomic.AddInt64(&m.newSpace, -newSpace)
			atomic.AddInt64(&m.usedSpace, newSpace)
			atomic.AddInt64(&m.newInodes, -newInodes)
			atomic.AddInt64(&m.usedInodes, newInodes)
		}
	}
}

func (m *dbMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno {
	return errno(m.simpleTxn(ctx, func(s *xorm.Session) error {
		s = s.Table(&edge{})
		nn := namedNode{node: node{Parent: parent}, Name: []byte(name)}
		var exist bool
		var err error
		if attr != nil {
			s = s.Join("INNER", &node{}, m.sqlConv("edge.inode=node.inode"))
			exist, err = s.Select(m.sqlConv("node.*")).Get(&nn)
		} else {
			exist, err = s.Select("*").Get(&nn)
		}
		if err != nil {
			return err
		}
		if !exist {
			return syscall.ENOENT
		}
		*inode = nn.Inode
		m.parseAttr(&nn.node, attr)
		m.of.Update(nn.Inode, attr)
		return nil
	}))
}

func (m *dbMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	return errno(m.simpleTxn(ctx, func(s *xorm.Session) error {
		var n = node{Inode: inode}
		ok, err := s.Get(&n)
		if err != nil {
			return err
		} else if !ok {
			return syscall.ENOENT
		}
		m.parseAttr(&n, attr)
		return nil
	}))
}

func (m *dbMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr, oldAttr *Attr) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		var cur = node{Inode: inode}
		ok, err := s.ForUpdate().Get(&cur)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		var curAttr Attr
		m.parseAttr(&cur, &curAttr)
		if oldAttr != nil {
			*oldAttr = curAttr
		}
		if curAttr.Parent > TrashInode {
			return syscall.EPERM
		}
		now := time.Now()

		rule, err := m.getACL(s, curAttr.AccessACL)
		if err != nil {
			return err
		}

		rule = rule.Dup()
		dirtyAttr, st := m.mergeAttr(ctx, inode, set, &curAttr, attr, now, rule)
		if st != 0 {
			return st
		}
		if dirtyAttr == nil {
			return nil
		}

		dirtyAttr.AccessACL, err = m.insertACL(s, rule)
		if err != nil {
			return err
		}

		var dirtyNode node
		m.parseNode(dirtyAttr, &dirtyNode)
		dirtyNode.setCtime(now.UnixNano())
		_, err = s.Cols("flags", "mode", "uid", "gid", "atime", "mtime", "ctime",
			"atimensec", "mtimensec", "ctimensec", "access_acl_id", "default_acl_id").
			Update(&dirtyNode, &node{Inode: inode})
		if err == nil {
			m.parseAttr(&dirtyNode, attr)
		}
		return err
	}, inode))
}

func (m *dbMeta) appendSlice(s *xorm.Session, inode Ino, indx uint32, buf []byte) error {
	var r sql.Result
	var err error
	driver := m.Name()
	if driver == "sqlite3" || driver == "postgres" {
		r, err = s.Exec(m.sqlConv("update chunk set slices=slices || ? where inode=? AND indx=?"), buf, inode, indx)
	} else {
		r, err = s.Exec(m.sqlConv("update chunk set slices=concat(slices, ?) where inode=? AND indx=?"), buf, inode, indx)
	}
	if err == nil {
		if n, _ := r.RowsAffected(); n == 0 {
			err = mustInsert(s, &chunk{Inode: inode, Indx: indx, Slices: buf})
		}
	}
	return err
}

func (m *dbMeta) upsertSlice(s *xorm.Session, inode Ino, indx uint32, buf []byte, insert *bool) error {
	var err error
	driver := m.Name()
	if driver == "sqlite3" || driver == "postgres" {
		_, err = s.Exec(m.sqlConv(`
			 INSERT INTO chunk (inode, indx, slices)
			 VALUES (?, ?, ?)
			 ON CONFLICT (inode, indx)
			 DO UPDATE SET slices=chunk.slices || ?`), inode, indx, buf, buf)
	} else {
		var r sql.Result
		r, err = s.Exec(m.sqlConv(`
			 INSERT INTO chunk (inode, indx, slices)
			 VALUES (?, ?, ?)
			 ON DUPLICATE KEY UPDATE
			 slices=concat(slices, ?)`), inode, indx, buf, buf)
		if err != nil {
			return err
		}
		n, _ := r.RowsAffected()
		*insert = n == 1 // https://dev.mysql.com/doc/refman/5.7/en/insert-on-duplicate.html
	}
	return err
}

func (m *dbMeta) doTruncate(ctx Context, inode Ino, flags uint8, length uint64, delta *dirStat, attr *Attr, skipPermCheck bool) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		*delta = dirStat{}
		nodeAttr := node{Inode: inode}
		ok, err := s.ForUpdate().Get(&nodeAttr)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if nodeAttr.Type != TypeFile || nodeAttr.Flags&(FlagImmutable|FlagAppend) != 0 || (flags == 0 && nodeAttr.Parent > TrashInode) {
			return syscall.EPERM
		}
		m.parseAttr(&nodeAttr, attr)
		if !skipPermCheck {
			if st := m.Access(ctx, inode, MODE_MASK_W, attr); st != 0 {
				return st
			}
		}
		if length == nodeAttr.Length {
			return nil
		}
		delta.length = int64(length) - int64(nodeAttr.Length)
		delta.space = align4K(length) - align4K(nodeAttr.Length)
		if err := m.checkQuota(ctx, delta.space, 0, nodeAttr.Uid, nodeAttr.Gid, m.getParents(s, inode, nodeAttr.Parent)...); err != 0 {
			return err
		}
		var zeroChunks []chunk
		var left, right = nodeAttr.Length, length
		if left > right {
			right, left = left, right
		}
		if right/ChunkSize-left/ChunkSize > 1 {
			err := s.Where("inode = ? AND indx > ? AND indx < ?", inode, left/ChunkSize, right/ChunkSize).Cols("indx").ForUpdate().Find(&zeroChunks)
			if err != nil {
				return err
			}
		}

		l := uint32(right - left)
		if right > (left/ChunkSize+1)*ChunkSize {
			l = ChunkSize - uint32(left%ChunkSize)
		}
		if err = m.appendSlice(s, inode, uint32(left/ChunkSize), marshalSlice(uint32(left%ChunkSize), 0, 0, 0, l)); err != nil {
			return err
		}
		buf := marshalSlice(0, 0, 0, 0, ChunkSize)
		for _, c := range zeroChunks {
			if err = m.appendSlice(s, inode, c.Indx, buf); err != nil {
				return err
			}
		}
		if right > (left/ChunkSize+1)*ChunkSize && right%ChunkSize > 0 {
			if err = m.appendSlice(s, inode, uint32(right/ChunkSize), marshalSlice(0, 0, 0, 0, uint32(right%ChunkSize))); err != nil {
				return err
			}
		}
		nodeAttr.Length = length
		now := time.Now().UnixNano()
		nodeAttr.setMtime(now)
		nodeAttr.setCtime(now)
		if _, err = s.Cols("length", "mtime", "ctime", "mtimensec", "ctimensec").Update(&nodeAttr, &node{Inode: nodeAttr.Inode}); err != nil {
			return err
		}
		m.parseAttr(&nodeAttr, attr)
		return nil
	}, inode))
}

func (m *dbMeta) doFallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64, delta *dirStat, attr *Attr) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		*delta = dirStat{}
		nodeAttr := node{Inode: inode}
		ok, err := s.ForUpdate().Get(&nodeAttr)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if nodeAttr.Type == TypeFIFO {
			return syscall.EPIPE
		}
		if nodeAttr.Type != TypeFile || (nodeAttr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		var t Attr
		m.parseAttr(&nodeAttr, &t)
		if st := m.Access(ctx, inode, MODE_MASK_W, &t); st != 0 {
			return st
		}
		if (nodeAttr.Flags&FlagAppend) != 0 && (mode&^fallocKeepSize) != 0 {
			return syscall.EPERM
		}
		length := nodeAttr.Length
		if off+size > nodeAttr.Length {
			if mode&fallocKeepSize == 0 {
				length = off + size
			}
		}

		old := nodeAttr.Length
		delta.length = int64(length) - int64(old)
		delta.space = align4K(length) - align4K(old)
		if err := m.checkQuota(ctx, delta.space, 0, nodeAttr.Uid, nodeAttr.Gid, m.getParents(s, inode, nodeAttr.Parent)...); err != 0 {
			return err
		}
		now := time.Now().UnixNano()
		nodeAttr.Length = length
		nodeAttr.setMtime(now)
		nodeAttr.setCtime(now)
		if _, err := s.Cols("length", "mtime", "ctime", "mtimensec", "ctimensec").Update(&nodeAttr, &node{Inode: inode}); err != nil {
			return err
		}
		if mode&(fallocZeroRange|fallocPunchHole) != 0 && off < old {
			off, size := off, size
			if off+size > old {
				size = old - off
			}
			for size > 0 {
				indx := uint32(off / ChunkSize)
				coff := off % ChunkSize
				l := size
				if coff+size > ChunkSize {
					l = ChunkSize - coff
				}
				err = m.appendSlice(s, inode, indx, marshalSlice(uint32(coff), 0, 0, 0, uint32(l)))
				if err != nil {
					return err
				}
				off += l
				size -= l
			}
		}
		m.parseAttr(&nodeAttr, attr)
		return nil
	}, inode))
}

func (m *dbMeta) doReadlink(ctx Context, inode Ino, noatime bool) (atime int64, target []byte, err error) {
	if noatime {
		err = m.simpleTxn(ctx, func(s *xorm.Session) error {
			var l = symlink{Inode: inode}
			ok, err := s.Get(&l)
			if err == nil && ok {
				target = l.Target
			}
			return err
		})
		return
	}

	attr := &Attr{}
	now := time.Now()
	err = m.txn(func(s *xorm.Session) error {
		nodeAttr := node{Inode: inode}
		ok, e := s.ForUpdate().Get(&nodeAttr)
		if e != nil {
			return e
		}
		if !ok {
			return syscall.ENOENT
		}
		if nodeAttr.Type != TypeSymlink {
			return syscall.EINVAL
		}
		l := symlink{Inode: inode}
		ok, e = s.Get(&l)
		if e != nil {
			return e
		}
		if !ok {
			return syscall.EIO
		}
		m.parseAttr(&nodeAttr, attr)
		target = l.Target
		if !m.atimeNeedsUpdate(attr, now) {
			atime = attr.Atime*int64(time.Second) + int64(attr.Atimensec)
			return nil
		}
		nodeAttr.setAtime(now.UnixNano())
		atime = now.UnixNano()
		_, e = s.Cols("atime", "atimensec").Update(&nodeAttr, &node{Inode: inode})
		return e
	}, inode)
	return
}

func (m *dbMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, path string, inode *Ino, attr *Attr) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		var pn = node{Inode: parent}
		ok, err := s.Get(&pn)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if pn.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		var pattr Attr
		m.parseAttr(&pn, &pattr)
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pn.Flags & FlagImmutable) != 0 {
			return syscall.EPERM
		}
		var e = edge{Parent: parent, Name: []byte(name)}
		ok, err = s.Get(&e)
		if err != nil {
			return err
		}
		var foundIno Ino
		var foundType uint8
		if ok {
			foundType, foundIno = e.Type, e.Inode
		} else if m.conf.CaseInsensi {
			if entry := m.resolveCase(ctx, parent, name); entry != nil {
				foundType, foundIno = entry.Attr.Typ, entry.Inode
			}
		}
		if foundIno != 0 {
			if _type == TypeFile || _type == TypeDirectory {
				foundNode := node{Inode: foundIno}
				ok, err = s.Get(&foundNode)
				if err != nil {
					return err
				} else if ok {
					m.parseAttr(&foundNode, attr)
				} else if attr != nil {
					*attr = Attr{Typ: foundType, Parent: parent} // corrupt entry
				}
				*inode = foundIno
			}
			return syscall.EEXIST
		} else if parent == TrashInode {
			if next, err := m.incrSessionCounter(s, "nextTrash", 1); err != nil {
				return err
			} else {
				*inode = TrashInode + Ino(next)
			}
		}

		n := node{Inode: *inode}
		m.parseNode(attr, &n)
		mode &= 07777
		if pattr.DefaultACL != aclAPI.None && _type != TypeSymlink {
			// inherit default acl
			if _type == TypeDirectory {
				n.DefaultACLId = pattr.DefaultACL
			}

			// set access acl by parent's default acl
			rule, err := m.getACL(s, pattr.DefaultACL)
			if err != nil {
				return err
			}

			if rule.IsMinimal() {
				// simple acl as default
				n.Mode = mode & (0xFE00 | rule.GetMode())
			} else {
				cRule := rule.ChildAccessACL(mode)
				id, err := m.insertACL(s, cRule)
				if err != nil {
					return err
				}

				n.AccessACLId = id
				n.Mode = (mode & 0xFE00) | cRule.GetMode()
			}
		} else {
			n.Mode = mode & ^cumask
		}
		if (pn.Flags & FlagSkipTrash) != 0 {
			n.Flags |= FlagSkipTrash
		}

		var updateParent bool
		var nlinkAdjust int32
		now := time.Now().UnixNano()
		if parent != TrashInode {
			if _type == TypeDirectory {
				pn.Nlink++
				updateParent = true
				nlinkAdjust++
			}
			if updateParent || time.Duration(now-pn.getMtime()) >= m.conf.SkipDirMtime {
				pn.setMtime(now)
				pn.setCtime(now)
				updateParent = true
			}
		}
		n.setAtime(now)
		n.setMtime(now)
		n.setCtime(now)
		if ctx.Value(CtxKey("behavior")) == "Hadoop" || runtime.GOOS == "darwin" {
			n.Gid = pn.Gid
		} else if runtime.GOOS == "linux" && pn.Mode&02000 != 0 {
			n.Gid = pn.Gid
			if _type == TypeDirectory {
				n.Mode |= 02000
			} else if n.Mode&02010 == 02010 && ctx.Uid() != 0 {
				var found bool
				for _, gid := range ctx.Gids() {
					if gid == pn.Gid {
						found = true
					}
				}
				if !found {
					n.Mode &= ^uint16(02000)
				}
			}
		}

		if err = mustInsert(s, &edge{Parent: parent, Name: []byte(name), Inode: *inode, Type: _type}, &n); err != nil {
			return err
		}
		if _type == TypeSymlink {
			if err = mustInsert(s, &symlink{Inode: *inode, Target: []byte(path)}); err != nil {
				return err
			}
		}
		if _type == TypeDirectory {
			if err = mustInsert(s, &dirStats{Inode: *inode}); err != nil {
				return err
			}
		}
		if updateParent {
			if _n, err := s.SetExpr("nlink", fmt.Sprintf("nlink + (%d)", nlinkAdjust)).Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: pn.Inode}); err != nil || _n == 0 {
				if err == nil {
					logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, pn.Inode)
					if m.Name() == "mysql" {
						err = syscall.EBUSY
					} else {
						err = syscall.ENOENT
					}
				}
				if err != nil {
					return err
				}
			}
		}
		m.parseAttr(&n, attr)
		return nil
	}))
}

func (m *dbMeta) doUnlink(ctx Context, parent Ino, name string, attr *Attr, skipCheckTrash ...bool) syscall.Errno {
	var trash Ino
	if !(len(skipCheckTrash) == 1 && skipCheckTrash[0]) {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}
	var n node
	var opened bool
	var newSpace, newInode int64
	err := m.txn(func(s *xorm.Session) error {
		opened = false
		newSpace, newInode = 0, 0
		var pn = node{Inode: parent}
		ok, err := s.Get(&pn)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if pn.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		var pattr Attr
		m.parseAttr(&pn, &pattr)
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pn.Flags&FlagAppend) != 0 || (pn.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		var e = edge{Parent: parent, Name: []byte(name)}
		ok, err = s.Get(&e)
		if err != nil {
			return err
		}
		if !ok && m.conf.CaseInsensi {
			if ee := m.resolveCase(ctx, parent, name); ee != nil {
				ok = true
				e.Name = ee.Name
				e.Inode = ee.Inode
				e.Type = ee.Attr.Typ
			}
		}
		if !ok {
			return syscall.ENOENT
		}
		if e.Type == TypeDirectory {
			return syscall.EPERM
		}

		n = node{Inode: e.Inode}
		ok, err = s.ForUpdate().Get(&n)
		if err != nil {
			return err
		}
		now := time.Now().UnixNano()
		if ok {
			if ctx.Uid() != 0 && pn.Mode&01000 != 0 && ctx.Uid() != pn.Uid && ctx.Uid() != n.Uid {
				return syscall.EACCES
			}
			if (n.Flags&FlagAppend) != 0 || (n.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if (n.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			if trash > 0 && n.Nlink > 1 {
				if o, e := s.Get(&edge{Parent: trash, Name: []byte(m.trashEntry(parent, e.Inode, string(e.Name))), Inode: e.Inode, Type: e.Type}); e == nil && o {
					trash = 0
				}
			}
			n.setCtime(now)
			if trash == 0 {
				n.Nlink--
				if n.Type == TypeFile && n.Nlink == 0 && m.sid > 0 {
					opened = m.of.IsOpen(e.Inode)
				}
			} else if n.Parent > 0 {
				n.Parent = trash
			}
		} else {
			logger.Warnf("no attribute for inode %d (%d, %s)", e.Inode, parent, name)
			trash = 0
		}
		defer func() { m.of.InvalidateChunk(e.Inode, invalidateAttrOnly) }()

		var updateParent bool
		if !parent.IsTrash() && time.Duration(now-pn.getMtime()) >= m.conf.SkipDirMtime {
			pn.setMtime(now)
			pn.setCtime(now)
			updateParent = true
		}

		if _, err := s.Delete(&edge{Parent: parent, Name: e.Name}); err != nil {
			return err
		}

		if n.Nlink > 0 {
			if _, err := s.Cols("nlink", "ctime", "ctimensec", "parent").Update(&n, &node{Inode: e.Inode}); err != nil {
				return err
			}
			if trash > 0 {
				if err = mustInsert(s, &edge{Parent: trash, Name: []byte(m.trashEntry(parent, e.Inode, string(e.Name))), Inode: e.Inode, Type: e.Type}); err != nil {
					return err
				}
			}
		} else {
			switch e.Type {
			case TypeFile:
				if opened {
					if err = mustInsert(s, sustained{Sid: m.sid, Inode: e.Inode}); err != nil {
						return err
					}
					if _, err := s.Cols("nlink", "ctime", "ctimensec").Update(&n, &node{Inode: e.Inode}); err != nil {
						return err
					}
				} else {
					if err = mustInsert(s, delfile{e.Inode, n.Length, time.Now().Unix()}); err != nil {
						return err
					}
					if _, err := s.Delete(&node{Inode: e.Inode}); err != nil {
						return err
					}
					newSpace, newInode = -align4K(n.Length), -1
				}
			case TypeSymlink:
				if _, err := s.Delete(&symlink{Inode: e.Inode}); err != nil {
					return err
				}
				fallthrough
			default:
				if _, err := s.Delete(&node{Inode: e.Inode}); err != nil {
					return err
				}
				newSpace, newInode = -align4K(0), -1
			}
			if _, err := s.Delete(&xattr{Inode: e.Inode}); err != nil {
				return err
			}
		}
		if updateParent {
			var _n int64
			if _n, err = s.Cols("mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: pn.Inode}); err != nil || _n == 0 {
				if err == nil {
					logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, pn.Inode)
					if m.Name() == "mysql" {
						err = syscall.EBUSY
					} else {
						err = syscall.ENOENT
					}
				}
				if err != nil {
					return err
				}
			}
		}
		return err
	})
	if err == nil && trash == 0 {
		if n.Type == TypeFile && n.Nlink == 0 {
			m.fileDeleted(opened, parent.IsTrash(), n.Inode, n.Length)
		}
		m.updateStats(newSpace, newInode)
		m.updateUserGroupStat(ctx, n.Uid, n.Gid, newSpace, newInode)
	}
	if err == nil && attr != nil {
		m.parseAttr(&n, attr)
	}
	return errno(err)
}

func (m *dbMeta) doRmdir(ctx Context, parent Ino, name string, pinode *Ino, attr *Attr, skipCheckTrash ...bool) syscall.Errno {
	var trash Ino
	if !(len(skipCheckTrash) == 1 && skipCheckTrash[0]) {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}
	var n node
	err := m.txn(func(s *xorm.Session) error {
		var pn = node{Inode: parent}
		ok, err := s.Get(&pn)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if pn.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		var pattr Attr
		m.parseAttr(&pn, &pattr)
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if pn.Flags&FlagImmutable != 0 || pn.Flags&FlagAppend != 0 {
			return syscall.EPERM
		}
		var e = edge{Parent: parent, Name: []byte(name)}
		ok, err = s.Get(&e)
		if err != nil {
			return err
		}
		if !ok && m.conf.CaseInsensi {
			if ee := m.resolveCase(ctx, parent, name); ee != nil {
				ok = true
				e.Inode = ee.Inode
				e.Name = ee.Name
				e.Type = ee.Attr.Typ
			}
		}
		if !ok {
			return syscall.ENOENT
		}
		if e.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pinode != nil {
			*pinode = e.Inode
		}
		n = node{Inode: e.Inode}
		ok, err = s.ForUpdate().Get(&n)
		if err != nil {
			return err
		}
		if ok && attr != nil {
			m.parseAttr(&n, attr)
		}
		exist, err := s.Exist(&edge{Parent: e.Inode})
		if err != nil {
			return err
		}
		if exist {
			return syscall.ENOTEMPTY
		}
		if (n.Flags & FlagSkipTrash) != 0 {
			trash = 0
		}
		now := time.Now().UnixNano()
		if ok {
			if ctx.Uid() != 0 && pn.Mode&01000 != 0 && ctx.Uid() != pn.Uid && ctx.Uid() != n.Uid {
				return syscall.EACCES
			}
			if trash > 0 {
				n.setCtime(now)
				n.Parent = trash
			}
		} else {
			logger.Warnf("no attribute for inode %d (%d, %s)", e.Inode, parent, name)
			trash = 0
		}
		pn.Nlink--
		pn.setMtime(now)
		pn.setCtime(now)

		if _, err := s.Delete(&edge{Parent: parent, Name: e.Name}); err != nil {
			return err
		}
		if _, err := s.Delete(&dirStats{Inode: e.Inode}); err != nil {
			logger.Warnf("remove dir usage of ino(%d): %s", e.Inode, err)
			return err
		}
		if _, err = s.Delete(&dirQuota{Inode: e.Inode}); err != nil {
			return err
		}

		if trash > 0 {
			if _, err = s.Cols("ctime", "ctimensec", "parent").Update(&n, &node{Inode: n.Inode}); err != nil {
				return err
			}
			if err = mustInsert(s, &edge{Parent: trash, Name: []byte(m.trashEntry(parent, e.Inode, string(e.Name))), Inode: e.Inode, Type: e.Type}); err != nil {
				return err
			}
		} else {
			if _, err := s.Delete(&node{Inode: e.Inode}); err != nil {
				return err
			}
			if _, err := s.Delete(&xattr{Inode: e.Inode}); err != nil {
				return err
			}
		}
		if !parent.IsTrash() {
			_, err = s.SetExpr("nlink", "nlink - 1").Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: pn.Inode})
		}
		return err
	})
	if err == nil && trash == 0 {
		m.updateStats(-align4K(0), -1)
		m.updateUserGroupStat(ctx, n.Uid, n.Gid, -align4K(0), -1)
	}
	return errno(err)
}

func (m *dbMeta) getNodesForUpdate(s *xorm.Session, nodes ...*node) error {
	// sort them to avoid deadlock
	sort.Slice(nodes, func(i, j int) bool { return nodes[i].Inode < nodes[j].Inode })
	for i := range nodes {
		ok, err := s.ForUpdate().Get(nodes[i])
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
	}
	return nil
}

func (m *dbMeta) getNodes(s *xorm.Session, nodes ...*node) error {
	for i := range nodes {
		ok, err := s.Get(nodes[i])
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
	}
	return nil
}

func (m *dbMeta) doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode, tInode *Ino, attr, tAttr *Attr) syscall.Errno {
	var trash Ino
	if st := m.checkTrash(parentDst, &trash); st != 0 {
		return st
	}
	exchange := flags == RenameExchange
	var opened bool
	var dino Ino
	var dn node
	var newSpace, newInode int64
	parentLocks := []Ino{parentDst}
	if !parentSrc.IsTrash() { // there should be no conflict if parentSrc is in trash, relax lock to accelerate `restore` subcommand
		parentLocks = append(parentLocks, parentSrc)
	}
	err := m.txn(func(s *xorm.Session) error {
		opened = false
		dino = 0
		newSpace, newInode = 0, 0
		var spn = node{Inode: parentSrc}
		var dpn = node{Inode: parentDst}
		err := m.getNodes(s, &spn, &dpn)
		if err != nil {
			return err
		}
		if spn.Type != TypeDirectory || dpn.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		if (spn.Flags&FlagAppend) != 0 || (spn.Flags&FlagImmutable) != 0 || (dpn.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		var spattr, dpattr Attr
		m.parseAttr(&spn, &spattr)
		m.parseAttr(&dpn, &dpattr)
		if flags&RenameRestore == 0 && dpattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parentSrc, MODE_MASK_W|MODE_MASK_X, &spattr); st != 0 {
			return st
		}
		if st := m.Access(ctx, parentDst, MODE_MASK_W|MODE_MASK_X, &dpattr); st != 0 {
			return st
		}
		var se = edge{Parent: parentSrc, Name: []byte(nameSrc)}
		ok, err := s.Get(&se)
		if err != nil {
			return err
		}
		if !ok && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parentSrc, nameSrc); e != nil {
				if string(e.Name) != nameSrc || parentSrc != parentDst {
					ok = true
					se.Inode = e.Inode
					se.Type = e.Attr.Typ
					se.Name = e.Name
				}
			}
		}
		if !ok {
			return syscall.ENOENT
		}
		if parentSrc == parentDst && string(se.Name) == nameDst {
			if inode != nil {
				*inode = se.Inode
			}
			return nil
		}
		// TODO: check parentDst is a subdir of source node
		if se.Inode == parentDst || se.Inode == dpattr.Parent {
			return syscall.EPERM
		}
		var sn = node{Inode: se.Inode}
		ok, err = s.Get(&sn)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		var sattr Attr
		m.parseAttr(&sn, &sattr)
		if parentSrc != parentDst && spattr.Mode&0o1000 != 0 && ctx.Uid() != 0 &&
			ctx.Uid() != sattr.Uid && (ctx.Uid() != spattr.Uid || sattr.Typ == TypeDirectory) {
			return syscall.EACCES
		}
		if (sn.Flags&FlagAppend) != 0 || (sn.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}

		if st := m.Access(ctx, parentDst, MODE_MASK_W|MODE_MASK_X, &dpattr); st != 0 {
			return st
		}
		var de = edge{Parent: parentDst, Name: []byte(nameDst)}
		ok, err = s.Get(&de)
		if err != nil {
			return err
		}
		if !ok && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parentDst, nameDst); e != nil {
				if string(e.Name) != nameSrc || parentSrc != parentDst {
					ok = true
					de.Inode = e.Inode
					de.Type = e.Attr.Typ
					de.Name = e.Name
				}
			}
		}
		var supdate, dupdate bool
		var srcnlink, dstnlink int32
		now := time.Now().UnixNano()
		dn = node{Inode: de.Inode}
		if ok {
			if flags&RenameNoReplace != 0 {
				return syscall.EEXIST
			}
			dino = de.Inode
			ok, err := s.ForUpdate().Get(&dn)
			if err != nil {
				return err
			}
			if !ok { // corrupt entry
				logger.Warnf("no attribute for inode %d (%d, %s)", dino, parentDst, de.Name)
				trash = 0
			}
			if (dn.Flags&FlagAppend) != 0 || (dn.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if (dn.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			dn.setCtime(now)
			if exchange {
				if parentSrc != parentDst {
					if de.Type == TypeDirectory {
						dn.Parent = parentSrc
						dpn.Nlink--
						dstnlink--
						spn.Nlink++
						srcnlink++
						supdate, dupdate = true, true
					} else if dn.Parent > 0 {
						dn.Parent = parentSrc
					}
				}
			} else if de.Inode == se.Inode {
				return nil
			} else if se.Type == TypeDirectory && de.Type != TypeDirectory {
				return syscall.ENOTDIR
			} else if de.Type == TypeDirectory {
				if se.Type != TypeDirectory {
					return syscall.EISDIR
				}
				exist, err := s.Exist(&edge{Parent: de.Inode})
				if err != nil {
					return err
				}
				if exist {
					return syscall.ENOTEMPTY
				}
				dpn.Nlink--
				dstnlink--
				dupdate = true
				if trash > 0 {
					dn.Parent = trash
				}
			} else {
				if trash == 0 {
					dn.Nlink--
					if de.Type == TypeFile && dn.Nlink == 0 && m.sid > 0 {
						opened = m.of.IsOpen(dn.Inode)
					}
					defer func() { m.of.InvalidateChunk(dino, invalidateAttrOnly) }()
				} else if dn.Parent > 0 {
					dn.Parent = trash
				}
			}
			if ctx.Uid() != 0 && dpn.Mode&01000 != 0 && ctx.Uid() != dpn.Uid && ctx.Uid() != dn.Uid {
				return syscall.EACCES
			}
		} else {
			if exchange {
				return syscall.ENOENT
			}
		}
		if ctx.Uid() != 0 && spn.Mode&01000 != 0 && ctx.Uid() != spn.Uid && ctx.Uid() != sn.Uid {
			return syscall.EACCES
		}

		if parentSrc != parentDst {
			if se.Type == TypeDirectory {
				sn.Parent = parentDst
				spn.Nlink--
				srcnlink--
				dpn.Nlink++
				dstnlink++
				supdate, dupdate = true, true
			} else if sn.Parent > 0 {
				sn.Parent = parentDst
			}
		}
		if supdate || time.Duration(now-spn.getMtime()) >= m.conf.SkipDirMtime {
			spn.setMtime(now)
			spn.setCtime(now)
			supdate = true
		}
		if dupdate || time.Duration(now-dpn.getMtime()) >= m.conf.SkipDirMtime {
			dpn.setMtime(now)
			dpn.setCtime(now)
			dupdate = true
		}
		sn.setCtime(now)
		if inode != nil {
			*inode = sn.Inode
		}
		m.parseAttr(&sn, attr)
		if dino > 0 {
			*tInode = dino
			m.parseAttr(&dn, tAttr)
		}

		if exchange {
			if _, err := s.Cols("inode", "type").Update(&de, &edge{Parent: parentSrc, Name: se.Name}); err != nil {
				return err
			}
			if _, err := s.Cols("inode", "type").Update(&se, &edge{Parent: parentDst, Name: de.Name}); err != nil {
				return err
			}
			if _, err := s.Cols("ctime", "ctimensec", "parent").Update(dn, &node{Inode: dino}); err != nil {
				return err
			}
		} else {
			if n, err := s.Delete(&edge{Parent: parentSrc, Name: se.Name}); err != nil {
				return err
			} else if n != 1 {
				return fmt.Errorf("delete src failed")
			}
			if dino > 0 {
				if trash > 0 {
					if _, err := s.Cols("ctime", "ctimensec", "parent").Update(dn, &node{Inode: dino}); err != nil {
						return err
					}
					name := m.trashEntry(parentDst, dino, string(de.Name))
					if err = mustInsert(s, &edge{Parent: trash, Name: []byte(name), Inode: dino, Type: de.Type}); err != nil {
						return err
					}
				} else if de.Type != TypeDirectory && dn.Nlink > 0 {
					if _, err := s.Cols("ctime", "ctimensec", "nlink", "parent").Update(dn, &node{Inode: dino}); err != nil {
						return err
					}
				} else {
					if de.Type == TypeFile {
						if opened {
							if _, err := s.Cols("nlink", "ctime", "ctimensec").Update(&dn, &node{Inode: dino}); err != nil {
								return err
							}
							if err = mustInsert(s, sustained{Sid: m.sid, Inode: dino}); err != nil {
								return err
							}
						} else {
							if err = mustInsert(s, delfile{dino, dn.Length, time.Now().Unix()}); err != nil {
								return err
							}
							if _, err := s.Delete(&node{Inode: dino}); err != nil {
								return err
							}
							newSpace, newInode = -align4K(dn.Length), -1
						}
					} else {
						if de.Type == TypeSymlink {
							if _, err := s.Delete(&symlink{Inode: dino}); err != nil {
								return err
							}
						}
						if _, err := s.Delete(&node{Inode: dino}); err != nil {
							return err
						}
						newSpace, newInode = -align4K(0), -1
					}
					if _, err := s.Delete(&xattr{Inode: dino}); err != nil {
						return err
					}
				}
				if _, err := s.Delete(&edge{Parent: parentDst, Name: de.Name}); err != nil {
					return err
				}
				if de.Type == TypeDirectory {
					if _, err = s.Delete(&dirQuota{Inode: dino}); err != nil {
						return err
					}
				}
			}
			if err = mustInsert(s, &edge{Parent: parentDst, Name: de.Name, Inode: se.Inode, Type: se.Type}); err != nil {
				return err
			}
		}

		if _, err := s.Cols("ctime", "ctimensec", "parent").Update(&sn, &node{Inode: sn.Inode}); err != nil {
			return err
		}

		if parentDst != parentSrc && !parentSrc.IsTrash() && supdate {
			if dupdate && dpn.Inode < spn.Inode {
				if _n, err := s.SetExpr("nlink", fmt.Sprintf("nlink + (%d)", dstnlink)).Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&dpn, &node{Inode: parentDst}); err != nil || _n == 0 {
					if err == nil {
						logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, dpn.Inode)
						if m.Name() == "mysql" {
							err = syscall.EBUSY
						} else {
							err = syscall.ENOENT
						}
					}
					if err != nil {
						return err
					}
				}
				dupdate = false
			}

			if _n, err := s.SetExpr("nlink", fmt.Sprintf("nlink + (%d)", srcnlink)).Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&spn, &node{Inode: parentSrc}); err != nil || _n == 0 {
				if err == nil {
					logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, spn.Inode)
					if m.Name() == "mysql" {
						err = syscall.EBUSY
					} else {
						err = syscall.ENOENT
					}
				}
				if err != nil {
					return err
				}
			}
		}

		if dupdate {
			if _n, err := s.SetExpr("nlink", fmt.Sprintf("nlink + (%d)", dstnlink)).Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&dpn, &node{Inode: parentDst}); err != nil || _n == 0 {
				if err == nil {
					logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, dpn.Inode)
					if m.Name() == "mysql" {
						err = syscall.EBUSY
					} else {
						err = syscall.ENOENT
					}
				}
				if err != nil {
					return err
				}
			}
		}
		return err
	}, parentLocks...)
	if err == nil && !exchange && trash == 0 {
		if dino > 0 && dn.Type == TypeFile && dn.Nlink == 0 {
			m.fileDeleted(opened, false, dino, dn.Length)
		}
		m.updateStats(newSpace, newInode)
		m.updateUserGroupStat(ctx, dn.Uid, dn.Gid, newSpace, newInode)
	}
	return errno(err)
}

func (m *dbMeta) doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		var pn = node{Inode: parent}
		ok, err := s.Get(&pn)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if pn.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pn.Parent > TrashInode {
			return syscall.ENOENT
		}
		var pattr Attr
		m.parseAttr(&pn, &pattr)
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if pn.Flags&FlagImmutable != 0 {
			return syscall.EPERM
		}
		var e = edge{Parent: parent, Name: []byte(name)}
		ok, err = s.Get(&e)
		if err != nil {
			return err
		}
		if ok || !ok && m.conf.CaseInsensi && m.resolveCase(ctx, parent, name) != nil {
			return syscall.EEXIST
		}

		var n = node{Inode: inode}
		ok, err = s.ForUpdate().Get(&n)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if n.Type == TypeDirectory {
			return syscall.EPERM
		}
		if (n.Flags&FlagAppend) != 0 || (n.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}

		var updateParent bool
		now := time.Now().UnixNano()
		if time.Duration(now-pn.getMtime()) >= m.conf.SkipDirMtime {
			pn.setMtime(now)
			pn.setCtime(now)
			updateParent = true
		}
		n.Parent = 0
		n.Nlink++
		n.setCtime(now)

		if err = mustInsert(s, &edge{Parent: parent, Name: []byte(name), Inode: inode, Type: n.Type}); err != nil {
			return err
		}
		if _, err := s.Cols("nlink", "ctime", "ctimensec", "parent").Update(&n, node{Inode: inode}); err != nil {
			return err
		}
		if updateParent {
			if _n, err := s.Cols("mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: parent}); err != nil || _n == 0 {
				if err == nil {
					logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, pn.Inode)
					if m.Name() == "mysql" {
						err = syscall.EBUSY
					} else {
						err = syscall.ENOENT
					}
				}
				return err
			}
		}

		m.parseAttr(&n, attr)
		return err
	}, inode))
}

func (m *dbMeta) doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry, limit int) syscall.Errno {

	return errno(m.simpleTxn(ctx, func(s *xorm.Session) error {
		s = s.Table(&edge{})
		if plus != 0 {
			s = s.Join("INNER", &node{}, m.sqlConv("edge.inode=node.inode"))
		}
		if limit > 0 {
			s = s.Limit(limit, 0)
		}
		var nodes []namedNode
		if err := s.Find(&nodes, &edge{Parent: inode}); err != nil {
			return err
		}
		for _, n := range nodes {
			if len(n.Name) == 0 {
				logger.Errorf("Corrupt entry with empty name: inode %d parent %d", n.Inode, inode)
				continue
			}
			entry := &Entry{
				Inode: n.Inode,
				Name:  n.Name,
				Attr:  &Attr{},
			}
			if plus != 0 {
				m.parseAttr(&n.node, entry.Attr)
				m.of.Update(entry.Inode, entry.Attr)
			} else {
				entry.Attr.Typ = n.Type
			}
			*entries = append(*entries, entry)
		}
		return nil
	}))
}

func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []*Entry, delta *dirStat, skipCheckTrash ...bool) syscall.Errno {
	if len(entries) == 0 {
		return 0
	}

	var trash Ino
	if len(skipCheckTrash) == 0 || !skipCheckTrash[0] {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}

	type entryInfo struct {
		e         *edge
		trash     Ino
		n         *node  // n edges : 1 inode
		trashName string // cached trash entry name when hard links go to trash
	}
	var entryInfos []*entryInfo
	type dNode struct {
		opened bool
		length uint64
	}
	delNodes := make(map[Ino]*dNode)

	batchSize := m.getTxnBatchNum()
	for len(entries) > 0 {
		if batchSize > len(entries) {
			batchSize = len(entries)
		}
		batch := entries[:batchSize]
		entries = entries[batchSize:]
		var batchFsSpace, batchFsInodes int64
		var batchDirLength, batchDirSpace, batchDirInodes int64
		var deltas ugQuotaDeltas
		err := m.txn(func(s *xorm.Session) error {
			batchDirLength, batchDirSpace, batchDirInodes = 0, 0, 0
			batchFsSpace, batchFsInodes = 0, 0
			deltas = make(ugQuotaDeltas)
			pn := node{Inode: parent}
			ok, err := s.Get(&pn)
			if err != nil {
				return err
			}
			if !ok {
				return syscall.ENOENT
			}
			if pn.Type != TypeDirectory {
				return syscall.ENOTDIR
			}
			var pattr Attr
			m.parseAttr(&pn, &pattr)
			if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
				return st
			}
			if (pn.Flags&FlagAppend != 0) || (pn.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			now := time.Now().UnixNano()
			entryInfos = make([]*entryInfo, 0, len(batch))
			names := make([][]byte, 0, len(batch))
			for _, entry := range batch {
				names = append(names, entry.Name)
			}
			var foundEdges []edge
			if err := s.Where("parent=?", parent).In("name", names).Find(&foundEdges); err != nil {
				return err
			}
			entryMap := make(map[string]*edge)
			for i := range foundEdges {
				entryMap[string(foundEdges[i].Name)] = &foundEdges[i]
			}

			inodes := make([]Ino, 0, len(batch))
			inodeM := make(map[Ino]struct{}) // filter hardlinks
			for _, entry := range batch {
				e, ok := entryMap[string(entry.Name)]
				if !ok {
					continue
				}
				if e.Inode != entry.Inode || e.Type == TypeDirectory || (entry.Attr != nil && e.Type != entry.Attr.Typ) {
					continue
				}
				entryInfos = append(entryInfos, &entryInfo{e: e, trash: trash})
				if _, exists := inodeM[entry.Inode]; !exists {
					inodeM[entry.Inode] = struct{}{}
					inodes = append(inodes, entry.Inode)
				}
			}

			if len(inodes) > 0 {
				var nodes []node
				if err := s.ForUpdate().In("inode", inodes).Find(&nodes); err != nil {
					return err
				}
				nodeMap := make(map[Ino]*node, len(nodes))
				// build quick lookup map from inode to *node
				for i := range nodes {
					nodeMap[nodes[i].Inode] = &nodes[i]
				}

				// iterate all target entries, apply basic checks and build info for each edge
				dumpNode := &node{}
				for _, info := range entryInfos {
					n, ok := nodeMap[info.e.Inode]
					if !ok {
						info.trash = 0
						info.n = dumpNode
						continue
					}
					if ctx.Uid() != 0 && pn.Mode&01000 != 0 && ctx.Uid() != pn.Uid && ctx.Uid() != n.Uid {
						return syscall.EACCES
					}
					if (n.Flags&FlagAppend) != 0 || (n.Flags&FlagImmutable) != 0 {
						return syscall.EPERM
					}
					if (n.Flags & FlagSkipTrash) != 0 {
						info.trash = 0
					}
					info.n = n
				}
			}

			for _, info := range entryInfos {
				if info.trash > 0 && info.n.Nlink > 1 {
					info.trashName = m.trashEntry(parent, info.e.Inode, string(info.e.Name))
					te := edge{
						Parent: info.trash,
						Name:   []byte(info.trashName),
						Inode:  info.n.Inode,
						Type:   info.n.Type,
					}
					if ok, err := s.Get(&te); err == nil && ok {
						info.trash = 0
					}
				}
				info.n.setCtime(now)
				if info.trash > 0 && info.n.Parent > 0 {
					info.n.Parent = info.trash
				}
				if info.trash == 0 && info.n.Nlink > 0 {
					info.n.Nlink--
				}
			}

			// check opened status for all inodes with Nlink == 0 after all decrements
			for _, info := range entryInfos {
				if info.n != nil && info.trash == 0 && info.n.Nlink == 0 && info.n.Type == TypeFile {
					opened := false
					if m.sid > 0 {
						opened = m.of.IsOpen(info.n.Inode)
					}
					delNodes[info.n.Inode] = &dNode{opened, info.n.Length}
				}
			}

			var updateParent bool
			if !parent.IsTrash() && time.Duration(now-pn.getMtime()) >= m.conf.SkipDirMtime {
				pn.setMtime(now)
				pn.setCtime(now)
				updateParent = true
			}

			nowUnix := time.Now().Unix()
			visited := make(map[Ino]bool)
			visited[0] = true // skip dummyNode

			// buffers for batched operations
			edgesDel := make([]edge, 0)
			sustainedIns := make([]interface{}, 0)
			delfilesIns := make([]interface{}, 0)
			nodesDel := make([]Ino, 0)
			symlinksDel := make([]Ino, 0)
			xattrsDel := make([]Ino, 0)
			edgesIns := make([]interface{}, 0)
			// walk each edge to decide whether to move to trash, decrement nlink or delete inode & xattrs
			for _, info := range entryInfos {
				edgesDel = append(edgesDel, edge{Parent: parent, Name: info.e.Name})
				if info.n.Inode != 0 {
					if info.n.Type == TypeFile {
						batchDirLength -= int64(info.n.Length)
						batchDirSpace -= align4K(info.n.Length)
					} else {
						batchDirSpace -= align4K(0)
					}
					batchDirInodes--
				}
				if !visited[info.n.Inode] {
					if info.n.Nlink > 0 {
						// inode still referenced somewhere: only update metadata
						if _, err := s.Cols("nlink", "ctime", "ctimensec", "parent").Update(info.n, &node{Inode: info.n.Inode}); err != nil {
							return err
						}
					} else {
						// last link removed: prepare to delete inode and related rows
						var entrySpace int64
						switch info.n.Type {
						case TypeFile:
							entrySpace = align4K(info.n.Length)
							if dnode, ok := delNodes[info.n.Inode]; ok && dnode.opened {
								sustainedIns = append(sustainedIns, &sustained{Sid: m.sid, Inode: info.e.Inode})
								if _, err := s.Cols("nlink", "ctime", "ctimensec").Update(info.n, &node{Inode: info.n.Inode}); err != nil {
									return err
								}
							} else {
								// regular, un-opened file: add to delfile and delete inode later
								delfilesIns = append(delfilesIns, &delfile{info.e.Inode, info.n.Length, nowUnix})
								nodesDel = append(nodesDel, info.e.Inode)
								batchFsSpace -= entrySpace
								batchFsInodes--
								deltas.add(&ugQuotaDelta{
									Uid:    info.n.Uid,
									Gid:    info.n.Gid,
									Space:  -entrySpace,
									Inodes: -1,
								})
							}
						case TypeSymlink:
							// symlink: record for batched delete from symlink table
							symlinksDel = append(symlinksDel, info.e.Inode)
							fallthrough
						default:
							// other non-file types: record for direct inode deletion
							nodesDel = append(nodesDel, info.e.Inode)
							if info.n.Type != TypeFile {
								entrySpace = align4K(0)
								batchFsSpace -= entrySpace
								batchFsInodes--
								deltas.add(&ugQuotaDelta{
									Uid:    info.n.Uid,
									Gid:    info.n.Gid,
									Space:  -entrySpace,
									Inodes: -1,
								})
							}
						}
						xattrsDel = append(xattrsDel, info.e.Inode)
					}
					m.of.InvalidateChunk(info.e.Inode, invalidateAttrOnly)
				}
				if info.n.Nlink > 0 && info.trash > 0 {
					// still has links and should be moved to trash; create new trash edge
					if info.trashName == "" {
						info.trashName = m.trashEntry(parent, info.e.Inode, string(info.e.Name))
					}
					edgesIns = append(edgesIns, &edge{
						Parent: info.trash,
						Name:   []byte(info.trashName),
						Inode:  info.n.Inode,
						Type:   info.n.Type})
				}
				visited[info.n.Inode] = true
			}

			if len(edgesDel) > 0 {
				query := s.Table(&edge{})
				for j, e := range edgesDel {
					if j == 0 {
						query = query.Where("parent = ? AND name = ?", e.Parent, e.Name)
					} else {
						query = query.Or("parent = ? AND name = ?", e.Parent, e.Name)
					}
				}
				if _, err := query.Delete(&edge{}); err != nil {
					return err
				}
			}

			// execute SQL statements in batches
			if len(sustainedIns) > 0 {
				if err := mustInsert(s, sustainedIns...); err != nil {
					return err
				}
			}
			if len(delfilesIns) > 0 {
				if err := mustInsert(s, delfilesIns...); err != nil {
					return err
				}
			}
			if len(nodesDel) > 0 {
				if _, err := s.In("inode", nodesDel).Delete(&node{}); err != nil {
					return err
				}
			}
			if len(symlinksDel) > 0 {
				if _, err := s.In("inode", symlinksDel).Delete(&symlink{}); err != nil {
					return err
				}
			}
			if len(xattrsDel) > 0 {
				if _, err := s.In("inode", xattrsDel).Delete(&xattr{}); err != nil {
					return err
				}
			}
			if len(edgesIns) > 0 {
				if err := mustInsert(s, edgesIns...); err != nil {
					return err
				}
			}

			// optionally update parent directory timestamps
			if updateParent {
				var _n int64
				if _n, err = s.Cols("mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: pn.Inode}); err != nil || _n == 0 {
					if err == nil {
						logger.Infof("Update parent node affected rows = %d should be 1 for inode = %d .", _n, pn.Inode)
						if m.Name() == "mysql" {
							err = syscall.EBUSY
						} else {
							err = syscall.ENOENT
						}
					}
					if err != nil {
						return err
					}
				}
			}

			return nil
		})

		if err != nil {
			return errno(err)
		}

		delta.length += batchDirLength
		delta.space += batchDirSpace
		delta.inodes += batchDirInodes
		m.updateStats(batchFsSpace, batchFsInodes)
		for _, q := range deltas {
			m.updateUserGroupStat(ctx, q.Uid, q.Gid, q.Space, q.Inodes)
		}
	}

	// outside of transaction: trigger data deletion callbacks
	for inode, info := range delNodes {
		m.fileDeleted(info.opened, parent.IsTrash(), inode, info.length)
	}
	return 0
}

func (m *dbMeta) doCleanStaleSession(sid uint64) error {
	var fail bool
	// release locks
	err := m.txn(func(s *xorm.Session) error {
		if _, err := s.Delete(flock{Sid: sid}); err != nil {
			return err
		}
		if _, err := s.Delete(plock{Sid: sid}); err != nil {
			return err
		}
		return nil
	})
	if err != nil {
		logger.Warnf("Delete flock/plock with sid %d: %s", sid, err)
		fail = true
	}

	var sus []sustained
	err = m.simpleTxn(Background(), func(ses *xorm.Session) error {
		sus = nil
		return ses.Find(&sus, &sustained{Sid: sid})
	})
	if err != nil {
		logger.Warnf("Scan sustained with sid %d: %s", sid, err)
		fail = true
	} else {
		for _, su := range sus {
			if err = m.doDeleteSustainedInode(sid, su.Inode); err != nil {
				logger.Warnf("Delete sustained inode %d of sid %d: %s", su.Inode, sid, err)
				fail = true
			}
		}
	}

	if fail {
		return fmt.Errorf("failed to clean up sid %d", sid)
	} else {
		return m.txn(func(s *xorm.Session) error {
			if n, err := s.Delete(&session2{Sid: sid}); err != nil {
				return err
			} else if n == 1 {
				return nil
			}
			ok, err := s.IsTableExist(&session{})
			if err == nil && ok {
				_, err = s.Delete(&session{Sid: sid})
			}
			return err
		})
	}
}

func (m *dbMeta) doFindStaleSessions(limit int) ([]uint64, error) {
	var sids []uint64
	_ = m.simpleTxn(Background(), func(ses *xorm.Session) error {
		var ss []session2
		err := ses.Where("Expire < ?", time.Now().Unix()).Limit(limit, 0).Find(&ss)
		if err != nil {
			return err
		}
		for _, s := range ss {
			sids = append(sids, s.Sid)
		}
		return nil
	})

	limit -= len(sids)
	if limit <= 0 {
		return sids, nil
	}

	err := m.simpleTxn(Background(), func(ses *xorm.Session) error {
		if ok, err := ses.IsTableExist(&session{}); err != nil {
			return err
		} else if ok {
			var ls []session
			err := ses.Where("Heartbeat < ?", time.Now().Add(time.Minute*-5).Unix()).Limit(limit, 0).Find(&ls)
			if err != nil {
				return err
			}
			for _, l := range ls {
				sids = append(sids, l.Sid)
			}
		}
		return nil
	})
	if err != nil {
		logger.Errorf("Check legacy session table: %s", err)
	}

	return sids, nil
}

func (m *dbMeta) doRefreshSession() error {
	return m.txn(func(ses *xorm.Session) error {
		n, err := ses.Cols("Expire").Update(&session2{Expire: m.expireTime()}, &session2{Sid: m.sid})
		if err == nil && n == 0 {
			logger.Warnf("Session %d was stale and cleaned up, but now it comes back again", m.sid)
			err = mustInsert(ses, &session2{m.sid, m.expireTime(), m.newSessionInfo()})
		}
		return err
	})
}

func (m *dbMeta) doDeleteSustainedInode(sid uint64, inode Ino) error {
	var n = node{Inode: inode}
	var newSpace int64
	err := m.txn(func(s *xorm.Session) error {
		newSpace = 0
		n = node{Inode: inode}
		ok, err := s.ForUpdate().Get(&n)
		if err != nil {
			return err
		}
		if !ok {
			return nil
		}
		newSpace = -align4K(n.Length)
		if err = mustInsert(s, &delfile{inode, n.Length, time.Now().Unix()}); err != nil {
			return err
		}
		_, err = s.Delete(&sustained{Sid: sid, Inode: inode})
		if err != nil {
			return err
		}
		_, err = s.Delete(&node{Inode: inode})
		return err
	}, inode)
	if err == nil && newSpace < 0 {
		m.updateStats(newSpace, -1)
		m.tryDeleteFileData(inode, n.Length, false)
		m.updateUserGroupStat(Background(), n.Uid, n.Gid, newSpace, 0)
	}
	return err
}

func (m *dbMeta) doRead(ctx Context, inode Ino, indx uint32) ([]*slice, syscall.Errno) {
	var c = chunk{Inode: inode, Indx: indx}
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		_, err := s.MustCols("indx").Get(&c)
		return err
	}); err != nil {
		return nil, errno(err)
	}
	return readSliceBuf(c.Slices), 0
}

func (m *dbMeta) doList(ctx Context, inode Ino) ([]*slice, syscall.Errno) {
	var chunks []chunk
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		return s.Cols("slices").Find(&chunks, &chunk{Inode: inode})
	}); err != nil {
		return nil, errno(err)
	}
	var slices []*slice
	for _, c := range chunks {
		ss := readSliceBuf(c.Slices)
		if ss == nil {
			continue
		}
		slices = append(slices, ss...)
	}
	return slices, 0
}

func (m *dbMeta) doWrite(ctx Context, inode Ino, indx uint32, off uint32, slice Slice, mtime time.Time, numSlices *int, delta *dirStat, attr *Attr) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		*delta = dirStat{}
		nodeAttr := node{Inode: inode}
		ok, err := s.ForUpdate().Get(&nodeAttr)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if nodeAttr.Type != TypeFile {
			return syscall.EPERM
		}
		newleng := uint64(indx)*ChunkSize + uint64(off) + uint64(slice.Len)
		if newleng > nodeAttr.Length {
			delta.length = int64(newleng - nodeAttr.Length)
			delta.space = align4K(newleng) - align4K(nodeAttr.Length)
			nodeAttr.Length = newleng
		}
		if err := m.checkQuota(ctx, delta.space, 0, nodeAttr.Uid, nodeAttr.Gid, m.getParents(s, inode, nodeAttr.Parent)...); err != 0 {
			return err
		}
		nodeAttr.setMtime(mtime.UnixNano())
		nodeAttr.setCtime(time.Now().UnixNano())
		m.parseAttr(&nodeAttr, attr)

		buf := marshalSlice(off, slice.Id, slice.Size, slice.Off, slice.Len)
		var insert bool // no compaction check for the first slice
		if err = m.upsertSlice(s, inode, indx, buf, &insert); err != nil {
			return err
		}
		if err = mustInsert(s, sliceRef{slice.Id, slice.Size, 1}); err != nil {
			return err
		}
		_, err = s.Cols("length", "mtime", "ctime", "mtimensec", "ctimensec").Update(&nodeAttr, &node{Inode: inode})
		if err == nil && !insert {
			ck := chunk{Inode: inode, Indx: indx}
			_, _ = s.MustCols("indx").Get(&ck)
			*numSlices = len(ck.Slices) / sliceBytes
		}
		return err
	}, inode))
}

func (m *dbMeta) CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied, outLength *uint64) syscall.Errno {
	defer m.timeit("CopyFileRange", time.Now())
	f := m.of.find(fout)
	if f != nil {
		f.Lock()
		defer f.Unlock()
	}
	var newLength, newSpace int64
	var nin, nout node
	defer func() { m.of.InvalidateChunk(fout, invalidateAllChunks) }()
	err := m.txn(func(s *xorm.Session) error {
		newLength, newSpace = 0, 0
		nin = node{Inode: fin}
		nout = node{Inode: fout}
		err := m.getNodesForUpdate(s, &nin, &nout)
		if err != nil {
			return err
		}
		if nin.Type != TypeFile {
			return syscall.EINVAL
		}
		if offIn >= nin.Length {
			if copied != nil {
				*copied = 0
			}
			return nil
		}
		size := size
		if offIn+size > nin.Length {
			size = nin.Length - offIn
		}
		if nout.Type != TypeFile {
			return syscall.EINVAL
		}
		if (nout.Flags&FlagImmutable) != 0 || (nout.Flags&FlagAppend) != 0 {
			return syscall.EPERM
		}

		newleng := offOut + size
		if newleng > nout.Length {
			newLength = int64(newleng - nout.Length)
			newSpace = align4K(newleng) - align4K(nout.Length)
			nout.Length = newleng
		}
		if err := m.checkQuota(ctx, newSpace, 0, nout.Uid, nout.Gid, m.getParents(s, fout, nout.Parent)...); err != 0 {
			return err
		}
		now := time.Now().UnixNano()
		nout.setMtime(now)
		nout.setCtime(now)
		if outLength != nil {
			*outLength = nout.Length
		}

		var cs []chunk
		err = s.Where("inode = ? AND indx >= ? AND indx <= ?", fin, offIn/ChunkSize, (offIn+size)/ChunkSize).ForUpdate().Find(&cs)
		if err != nil {
			return err
		}
		chunks := make(map[uint32][]*slice)
		for _, c := range cs {
			chunks[c.Indx] = readSliceBuf(c.Slices)
			if chunks[c.Indx] == nil {
				return syscall.EIO
			}
		}

		ses := s
		updateSlices := func(indx uint32, buf []byte, id uint64, size uint32) error {
			if err := m.appendSlice(ses, fout, indx, buf); err != nil {
				return err
			}
			if id > 0 {
				if _, err := ses.Exec(m.sqlConv("update chunk_ref set refs=refs+1 where chunkid = ? AND size = ?"), id, size); err != nil {
					return err
				}
			}
			return nil
		}
		coff := offIn / ChunkSize * ChunkSize
		for coff < offIn+size {
			if coff%ChunkSize != 0 {
				panic("coff")
			}
			// Add a zero chunk for hole
			ss := append([]*slice{{len: ChunkSize}}, chunks[uint32(coff/ChunkSize)]...)
			cs := buildSlice(ss)
			for _, s := range cs {
				pos := coff
				coff += uint64(s.Len)
				if pos < offIn+size && pos+uint64(s.Len) > offIn {
					if pos < offIn {
						dec := offIn - pos
						s.Off += uint32(dec)
						pos += dec
						s.Len -= uint32(dec)
					}
					if pos+uint64(s.Len) > offIn+size {
						dec := pos + uint64(s.Len) - (offIn + size)
						s.Len -= uint32(dec)
					}
					doff := pos - offIn + offOut
					indx := uint32(doff / ChunkSize)
					dpos := uint32(doff % ChunkSize)
					if dpos+s.Len > ChunkSize {
						if err := updateSlices(indx, marshalSlice(dpos, s.Id, s.Size, s.Off, ChunkSize-dpos), s.Id, s.Size); err != nil {
							return err
						}
						skip := ChunkSize - dpos
						if err := updateSlices(indx+1, marshalSlice(0, s.Id, s.Size, s.Off+skip, s.Len-skip), s.Id, s.Size); err != nil {
							return err
						}
					} else {
						if err := updateSlices(indx, marshalSlice(dpos, s.Id, s.Size, s.Off, s.Len), s.Id, s.Size); err != nil {
							return err
						}
					}
				}
			}
		}
		if _, err := s.Cols("length", "mtime", "ctime", "mtimensec", "ctimensec").Update(&nout, &node{Inode: fout}); err != nil {
			return err
		}
		if copied != nil {
			*copied = size
		}
		return nil
	}, fout)
	if err == nil {
		m.updateParentStat(ctx, fout, nout.Parent, newLength, newSpace)
		m.updateUserGroupStat(ctx, nout.Uid, nout.Gid, newSpace, 0)
	}
	return errno(err)
}

func (m *dbMeta) getParents(s *xorm.Session, inode, parent Ino) []Ino {
	if parent > 0 {
		return []Ino{parent}
	}
	var rows []edge
	if err := s.Find(&rows, &edge{Inode: inode}); err != nil {
		logger.Warnf("Scan edge key of inode %d: %s", inode, err)
		return nil
	}
	ps := make(map[Ino]struct{})
	for _, row := range rows {
		ps[row.Parent] = struct{}{}
	}
	pss := make([]Ino, 0, len(ps))
	for p := range ps {
		pss = append(pss, p)
	}
	return pss
}

func (m *dbMeta) doGetParents(ctx Context, inode Ino) map[Ino]int {
	var rows []edge
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		rows = nil
		return s.Find(&rows, &edge{Inode: inode})
	}); err != nil {
		logger.Warnf("Scan edge key of inode %d: %s", inode, err)
		return nil
	}
	ps := make(map[Ino]int)
	for _, row := range rows {
		ps[row.Parent]++
	}
	return ps
}

func (m *dbMeta) doUpdateDirStat(ctx Context, batch map[Ino]dirStat) error {
	table := m.db.GetTableMapper().Obj2Table("dirStats")
	fileLengthColumn := m.db.GetColumnMapper().Obj2Table("DataLength")
	usedSpaceColumn := m.db.GetColumnMapper().Obj2Table("UsedSpace")
	usedInodeColumn := m.db.GetColumnMapper().Obj2Table("UsedInodes")
	sql := fmt.Sprintf(
		"update `%s` set `%s` = `%s` + ?, `%s` = `%s` + ?, `%s` = `%s` + ? where `inode` = ?",
		table,
		fileLengthColumn, fileLengthColumn,
		usedSpaceColumn, usedSpaceColumn,
		usedInodeColumn, usedInodeColumn,
	)

	nonexist := make(map[Ino]bool, 0)

	for _, group := range m.groupBatch(batch, 1000) {
		err := m.txn(func(s *xorm.Session) error {
			for _, ino := range group {
				stat := batch[ino]
				ret, err := s.Exec(sql, stat.length, stat.space, stat.inodes, ino)
				if err != nil {
					return err
				}
				affected, err := ret.RowsAffected()
				if err != nil {
					return err
				}
				if affected == 0 {
					nonexist[ino] = true
				}
			}
			return nil
		})
		if err != nil {
			return err
		}
	}

	if len(nonexist) > 0 {
		m.parallelSyncDirStat(ctx, nonexist).Wait()
	}
	return nil
}

func (m *dbMeta) doSyncDirStat(ctx Context, ino Ino) (*dirStat, syscall.Errno) {
	if m.conf.ReadOnly {
		return nil, syscall.EROFS
	}
	stat, st := m.calcDirStat(ctx, ino)
	if st != 0 {
		return nil, st
	}
	err := m.txn(func(s *xorm.Session) error {
		exist, err := s.Exist(&node{Inode: ino})
		if err != nil {
			return err
		}
		if !exist {
			return syscall.ENOENT
		}
		record := &dirStats{ino, stat.length, stat.space, stat.inodes}
		_, err = s.Insert(record)
		if err != nil && isDuplicateEntryErr(err) {
			_, err = s.Cols("data_length", "used_space", "used_inodes").Update(record, &dirStats{Inode: ino})
		}
		return err
	})
	return stat, errno(err)
}

func (m *dbMeta) doGetDirStat(ctx Context, ino Ino, trySync bool) (*dirStat, syscall.Errno) {
	st := dirStats{Inode: ino}
	var exist bool
	var err error
	if err = m.simpleTxn(ctx, func(s *xorm.Session) error {
		exist, err = s.Get(&st)
		return err
	}); err != nil {
		return nil, errno(err)
	}
	if !exist {
		if trySync {
			return m.doSyncDirStat(ctx, ino)
		}
		return nil, 0
	}

	if trySync && (st.UsedSpace < 0 || st.UsedInodes < 0) {
		logger.Warnf(
			"dir usage of inode %d is invalid: space %d, inodes %d, try to fix",
			ino, st.UsedSpace, st.UsedInodes,
		)
		stat, eno := m.calcDirStat(ctx, ino)
		if eno != 0 {
			return nil, eno
		}
		st.DataLength, st.UsedSpace, st.UsedInodes = stat.length, stat.space, stat.inodes
		e := m.txn(func(s *xorm.Session) error {
			n, err := s.Cols("data_length", "used_space", "used_inodes").Update(&st, &dirStats{Inode: ino})
			if err == nil && n != 1 {
				err = errors.Errorf("update dir usage of inode %d: %d rows affected", ino, n)
			}
			return err
		})
		if e != nil {
			logger.Warn(e)
		}
	}
	return &dirStat{st.DataLength, st.UsedSpace, st.UsedInodes}, 0
}

func (m *dbMeta) doFindDeletedFiles(ts int64, limit int) (map[Ino]uint64, error) {
	files := make(map[Ino]uint64)
	err := m.simpleTxn(Background(), func(s *xorm.Session) error {
		var ds []delfile
		err := s.Where("expire < ?", ts).Limit(limit, 0).Find(&ds)
		if err != nil {
			return err
		}
		for _, d := range ds {
			files[d.Inode] = d.Length
		}
		return nil
	})
	return files, err
}

func (m *dbMeta) doCleanupSlices(ctx Context, count *uint64) error {
	var cks []sliceRef
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		cks = nil
		return s.Where("refs <= 0").Find(&cks)
	}); err != nil {
		return err
	}
	for _, ck := range cks {
		m.deleteSlice(ck.Id, ck.Size)
		if count != nil {
			*count++
		}
		if ctx.Canceled() {
			return ctx.Err()
		}
	}
	return nil
}

func (m *dbMeta) deleteChunk(inode Ino, indx uint32) error {
	var ss []*slice
	err := m.txn(func(s *xorm.Session) error {
		ss = ss[:0]
		var c = chunk{Inode: inode, Indx: indx}
		ok, err := s.ForUpdate().MustCols("indx").Get(&c)
		if err != nil {
			return err
		}
		if !ok {
			return nil
		}
		ss = readSliceBuf(c.Slices)
		if ss == nil {
			logger.Errorf("Corrupt value for inode %d chunk index %d, use `gc` to clean up leaked slices", inode, indx)
		}
		for _, sc := range ss {
			if sc.id == 0 {
				continue
			}
			_, err = s.Exec(m.sqlConv("update chunk_ref set refs=refs-1 where chunkid=? AND size=?"), sc.id, sc.size)
			if err != nil {
				return err
			}
		}
		c.Slices = nil
		n, err := s.Where("inode = ? AND indx = ?", inode, indx).Delete(&c)
		if err == nil && n == 0 {
			err = fmt.Errorf("chunk %d:%d changed, try restarting transaction", inode, indx)
		}
		return err
	})
	if err != nil {
		return fmt.Errorf("delete slice from chunk %s fail: %s, retry later", inode, err)
	}
	for _, s := range ss {
		if s.id == 0 {
			continue
		}
		var ref = sliceRef{Id: s.id}
		err := m.simpleTxn(Background(), func(s *xorm.Session) error {
			ok, err := s.Get(&ref)
			if err == nil && !ok {
				err = errors.New("not found")
			}
			return err
		})
		if err == nil && ref.Refs <= 0 {
			m.deleteSlice(s.id, s.size)
		}
	}
	return nil
}

func (m *dbMeta) doDeleteFileData(inode Ino, length uint64) {
	var indexes []chunk
	_ = m.simpleTxn(Background(), func(s *xorm.Session) error {
		indexes = nil
		return s.Cols("indx").Find(&indexes, &chunk{Inode: inode})
	})
	for _, c := range indexes {
		err := m.deleteChunk(inode, c.Indx)
		if err != nil {
			logger.Warnf("deleteChunk inode %d index %d error: %s", inode, c.Indx, err)
			return
		}
	}
	_ = m.txn(func(s *xorm.Session) error {
		_, err := s.Delete(delfile{Inode: inode})
		return err
	})
}

func (m *dbMeta) doCleanupDelayedSlices(ctx Context, edge int64) (int, error) {
	var count int
	var ss []Slice
	var result []delslices
	var batch int = 1e6
	for {
		_ = m.simpleTxn(ctx, func(s *xorm.Session) error {
			result = result[:0]
			return s.Where("deleted < ?", edge).Limit(batch, 0).Find(&result)
		})

		for _, ds := range result {
			if err := m.txn(func(ses *xorm.Session) error {
				ss = ss[:0]
				ds := delslices{Id: ds.Id}
				if ok, e := ses.ForUpdate().Get(&ds); e != nil {
					return e
				} else if !ok {
					return nil
				}
				m.decodeDelayedSlices(ds.Slices, &ss)
				if len(ss) == 0 {
					return fmt.Errorf("invalid value for delayed slices %d: %v", ds.Id, ds.Slices)
				}
				for _, s := range ss {
					if _, e := ses.Exec(m.sqlConv("update chunk_ref set refs=refs-1 where chunkid=? AND size=?"), s.Id, s.Size); e != nil {
						return e
					}
				}
				_, e := ses.Delete(&delslices{Id: ds.Id})
				return e
			}); err != nil {
				logger.Warnf("Cleanup delayed slices %d: %s", ds.Id, err)
				continue
			}
			for _, s := range ss {
				var ref = sliceRef{Id: s.Id}
				err := m.simpleTxn(ctx, func(s *xorm.Session) error {
					ok, err := s.Get(&ref)
					if err == nil && !ok {
						err = errors.New("not found")
					}
					return err
				})
				if err == nil && ref.Refs <= 0 {
					m.deleteSlice(s.Id, s.Size)
					count++
				}
				if ctx.Canceled() {
					return count, ctx.Err()
				}
			}
		}
		if len(result) < batch {
			break
		}
	}
	return count, nil
}

func (m *dbMeta) doCompactChunk(inode Ino, indx uint32, origin []byte, ss []*slice, skipped int, pos uint32, id uint64, size uint32, delayed []byte) syscall.Errno {
	st := errno(m.txn(func(s *xorm.Session) error {
		var c2 = chunk{Inode: inode, Indx: indx}
		_, err := s.ForUpdate().MustCols("indx").Get(&c2)
		if err != nil {
			return err
		}
		if len(c2.Slices) < len(origin) || !bytes.Equal(origin, c2.Slices[:len(origin)]) {
			logger.Infof("chunk %d:%d was changed %d -> %d", inode, indx, len(origin), len(c2.Slices))
			return syscall.EINVAL
		}

		c2.Slices = append(append(c2.Slices[:skipped*sliceBytes], marshalSlice(pos, id, size, 0, size)...), c2.Slices[len(origin):]...)
		if _, err := s.Cols("slices").Where("Inode = ? AND indx = ?", inode, indx).Update(c2); err != nil {
			return err
		}
		// create the key to tracking it
		if err = mustInsert(s, sliceRef{id, size, 1}); err != nil {
			return err
		}
		if delayed != nil {
			if len(delayed) > 0 {
				if err = mustInsert(s, &delslices{id, time.Now().Unix(), delayed}); err != nil {
					return err
				}
			}
		} else {
			for _, s_ := range ss {
				if s_.id == 0 {
					continue
				}
				if _, err := s.Exec(m.sqlConv("update chunk_ref set refs=refs-1 where chunkid=? AND size=?"), s_.id, s_.size); err != nil {
					return err
				}
			}
		}
		return nil
	}, inode))
	// there could be false-negative that the compaction is successful, double-check
	if st != 0 && st != syscall.EINVAL {
		var ok bool
		if err := m.simpleTxn(Background(), func(s *xorm.Session) error {
			var e error
			ok, e = s.Get(&sliceRef{Id: id})
			return e
		}); err == nil {
			if ok {
				st = 0
			} else {
				logger.Infof("compacted chunk %d was not used", id)
				st = syscall.EINVAL
			}
		}
	}

	if st == syscall.EINVAL {
		_ = m.txn(func(s *xorm.Session) error {
			return mustInsert(s, &sliceRef{id, size, 0})
		})
	} else if st == 0 && delayed == nil {
		for _, s := range ss {
			if s.id == 0 {
				continue
			}
			var ref = sliceRef{Id: s.id}
			var ok bool
			err := m.simpleTxn(Background(), func(s *xorm.Session) error {
				var e error
				ok, e = s.Get(&ref)
				return e
			})
			if err == nil && ok && ref.Refs <= 0 {
				m.deleteSlice(s.id, s.size)
			}
		}
	}
	return st
}

func dup(b []byte) []byte {
	r := make([]byte, len(b))
	copy(r, b)
	return r
}

func (m *dbMeta) scanAllChunks(ctx Context, ch chan<- cchunk, bar *utils.Bar) error {
	return m.roTxn(ctx, func(s *xorm.Session) error {
		return s.Table(&chunk{}).Iterate(new(chunk), func(idx int, bean interface{}) error {
			c := bean.(*chunk)
			if len(c.Slices) > sliceBytes {
				bar.IncrTotal(1)
				ch <- cchunk{c.Inode, c.Indx, len(c.Slices) / sliceBytes}
			}
			return nil
		})
	})
}

func (m *dbMeta) ListSlices(ctx Context, slices map[Ino][]Slice, scanPending, delete bool, showProgress func()) syscall.Errno {
	if delete {
		_ = m.doCleanupSlices(ctx, nil)
	}
	err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		var cs []chunk
		err := s.Find(&cs)
		if err != nil {
			return err
		}
		for _, c := range cs {
			ss := readSliceBuf(c.Slices)
			if ss == nil {
				logger.Errorf("Corrupt value for inode %d chunk index %d", c.Inode, c.Indx)
				continue
			}
			for _, s := range ss {
				if s.id > 0 {
					slices[c.Inode] = append(slices[c.Inode], Slice{Id: s.id, Size: s.size})
					if showProgress != nil {
						showProgress()
					}
				}
			}
		}
		return nil
	})
	if err != nil {
		return errno(err)
	}

	if scanPending {
		_ = m.simpleTxn(ctx, func(s *xorm.Session) error {
			var cks []sliceRef
			err := s.Where("refs <= 0").Find(&cks)
			if err != nil {
				return err
			}
			for _, ck := range cks {
				slices[0] = append(slices[0], Slice{Id: ck.Id, Size: ck.Size})
			}
			return nil
		})
	}

	if m.getFormat().TrashDays == 0 {
		return 0
	}
	return errno(m.scanTrashSlices(ctx, func(ss []Slice, _ int64) (bool, error) {
		slices[1] = append(slices[1], ss...)
		if showProgress != nil {
			for range ss {
				showProgress()
			}
		}
		return false, nil
	}))
}

func (m *dbMeta) scanTrashSlices(ctx Context, scan trashSliceScan) error {
	if scan == nil {
		return nil
	}
	var dss []delslices

	err := m.simpleTxn(ctx, func(tx *xorm.Session) error {
		if ok, err := tx.IsTableExist(&delslices{}); err != nil {
			return err
		} else if !ok {
			return nil
		}
		return tx.Find(&dss)
	})
	if err != nil {
		return err
	}
	var ss []Slice
	for _, ds := range dss {
		var clean bool
		err = m.txn(func(tx *xorm.Session) error {
			ss = ss[:0]
			del := delslices{Id: ds.Id}
			found, err := tx.Get(&del)
			if err != nil {
				return errors.Wrapf(err, "get delslices %d", ds.Id)
			}
			if !found {
				return nil
			}
			m.decodeDelayedSlices(del.Slices, &ss)
			clean, err = scan(ss, del.Deleted)
			if err != nil {
				return err
			}
			if clean {
				for _, s := range ss {
					if _, e := tx.Exec(m.sqlConv("update chunk_ref set refs=refs-1 where chunkid=? AND size=?"), s.Id, s.Size); e != nil {
						return e
					}
				}
				_, err = tx.Delete(del)
			}
			return err
		})
		if err != nil {
			return err
		}
		if clean {
			for _, s := range ss {
				var ref = sliceRef{Id: s.Id}
				err := m.simpleTxn(ctx, func(tx *xorm.Session) error {
					ok, err := tx.Get(&ref)
					if err == nil && !ok {
						err = errors.New("not found")
					}
					return err
				})
				if err == nil && ref.Refs <= 0 {
					m.deleteSlice(s.Id, s.Size)
				}
			}
		}
	}
	return nil
}

func (m *dbMeta) scanPendingSlices(ctx Context, scan pendingSliceScan) error {
	if scan == nil {
		return nil
	}
	var refs []sliceRef
	err := m.simpleTxn(ctx, func(tx *xorm.Session) error {
		if ok, err := tx.IsTableExist(&sliceRef{}); err != nil {
			return err
		} else if !ok {
			return nil
		}
		return tx.Where("refs <= 0").Find(&refs)
	})
	if err != nil {
		return errors.Wrap(err, "scan slice refs")
	}
	for _, ref := range refs {
		clean, err := scan(ref.Id, ref.Size)
		if err != nil {
			return errors.Wrap(err, "scan slice")
		}
		if clean {
			// TODO: m.deleteSlice(ref.Id, ref.Size)
			// avoid lint warning
			_ = clean
		}
	}
	return nil
}

func (m *dbMeta) scanPendingFiles(ctx Context, scan pendingFileScan) error {
	if scan == nil {
		return nil
	}

	var dfs []delfile
	if err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		if ok, err := s.IsTableExist(&delfile{}); err != nil {
			return err
		} else if !ok {
			return nil
		}
		return s.Find(&dfs)
	}); err != nil {
		return err
	}

	for _, ds := range dfs {
		if _, err := scan(ds.Inode, ds.Length, ds.Expire); err != nil {
			return err
		}
	}

	return nil
}

func (m *dbMeta) doRepair(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	n := &node{
		Inode:  inode,
		Type:   attr.Typ,
		Mode:   attr.Mode,
		Uid:    attr.Uid,
		Gid:    attr.Gid,
		Length: attr.Length,
		Parent: attr.Parent,
		Nlink:  attr.Nlink,
	}
	n.setAtime(attr.Atime*1e9 + int64(attr.Atimensec))
	n.setMtime(attr.Mtime*1e9 + int64(attr.Mtimensec))
	n.setCtime(attr.Ctime*1e9 + int64(attr.Ctimensec))
	return errno(m.txn(func(s *xorm.Session) error {
		n.Nlink = 2
		var rows []edge
		if err := s.Find(&rows, &edge{Parent: inode}); err != nil {
			return err
		}
		for _, row := range rows {
			if row.Type == TypeDirectory {
				n.Nlink++
			}
		}
		ok, err := s.ForUpdate().Get(&node{Inode: inode})
		if err == nil {
			if ok {
				updateColumns := []string{
					"type", "mode",
					"uid", "gid",
					"length", "parent", "nlink",
					"atime", "mtime", "ctime",
					"atimensec", "mtimensec", "ctimensec",
				}
				_, err = s.Cols(updateColumns...).Update(n, &node{Inode: inode})
			} else {
				err = mustInsert(s, n)
			}
		}
		return err
	}, inode))
}

func (m *dbMeta) GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno {
	defer m.timeit("GetXattr", time.Now())
	inode = m.checkRoot(inode)
	return errno(m.simpleTxn(ctx, func(s *xorm.Session) error {
		var x = xattr{Inode: inode, Name: name}
		ok, err := s.Get(&x)
		if err != nil {
			return err
		}
		if !ok {
			return ENOATTR
		}
		*vbuff = x.Value
		return nil
	}))
}

func (m *dbMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno {
	defer m.timeit("ListXattr", time.Now())
	inode = m.checkRoot(inode)
	return errno(m.roTxn(ctx, func(s *xorm.Session) error {
		var xs []xattr
		err := s.Where("inode = ?", inode).Find(&xs, &xattr{Inode: inode})
		if err != nil {
			return err
		}
		*names = nil
		for _, x := range xs {
			*names = append(*names, []byte(x.Name)...)
			*names = append(*names, 0)
		}

		var n = node{Inode: inode}
		ok, err := s.Get(&n)
		if err != nil {
			return err
		} else if !ok {
			return syscall.ENOENT
		}
		attr := &Attr{}
		m.parseAttr(&n, attr)
		setXAttrACL(names, attr.AccessACL, attr.DefaultACL)
		return nil
	}))
}

func (m *dbMeta) doSetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		var k = &xattr{Inode: inode, Name: name}
		var x = xattr{Inode: inode, Name: name, Value: value}
		ok, err := s.ForUpdate().Get(k)
		if err != nil {
			return err
		}
		existing := k.Value
		k.Value = nil
		switch flags {
		case XattrCreate:
			if ok {
				return syscall.EEXIST
			}
			err = mustInsert(s, &x)
		case XattrReplace:
			if !ok {
				return ENOATTR
			}
			_, err = s.Cols("value").Update(&x, k)
		default:
			if !ok {
				err = mustInsert(s, &x)
			} else if !bytes.Equal(existing, value) {
				_, err = s.Cols("value").Update(&x, k)
			}
		}
		return err
	}))
}

func (m *dbMeta) doRemoveXattr(ctx Context, inode Ino, name string) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		n, err := s.Delete(&xattr{Inode: inode, Name: name})
		if err != nil {
			return err
		} else if n == 0 {
			return ENOATTR
		} else {
			return nil
		}
	}))
}

func (m *dbMeta) doGetQuota(ctx Context, qtype uint32, key uint64) (*Quota, error) {
	if qtype != DirQuotaType && qtype != UserQuotaType && qtype != GroupQuotaType {
		return nil, errors.Errorf("invalid quota type %d", qtype)
	}

	var quota *Quota
	err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		if qtype == DirQuotaType {
			q := &dirQuota{Inode: Ino(key)}
			ok, e := s.Get(q)
			if e == nil && ok {
				quota = &Quota{
					MaxSpace:   q.MaxSpace,
					MaxInodes:  q.MaxInodes,
					UsedSpace:  q.UsedSpace,
					UsedInodes: q.UsedInodes}
			}
			return e
		} else {
			q := &userGroupQuota{Qtype: qtype, Qkey: key}
			ok, e := s.Get(q)
			if e == nil && ok {
				quota = &Quota{
					MaxSpace:   q.MaxSpace,
					MaxInodes:  q.MaxInodes,
					UsedSpace:  q.UsedSpace,
					UsedInodes: q.UsedInodes}
			}
			return e
		}
	})
	return quota, err
}

func updateQuotaFields(quota *Quota, exist bool, maxSpace, maxInodes *int64, usedSpace, usedInodes *int64) []string {
	updateColumns := make([]string, 0, 4)
	if quota.MaxSpace >= 0 {
		*maxSpace = quota.MaxSpace
		updateColumns = append(updateColumns, "max_space")
	}
	if quota.MaxInodes >= 0 {
		*maxInodes = quota.MaxInodes
		updateColumns = append(updateColumns, "max_inodes")
	}
	if quota.UsedSpace >= 0 {
		*usedSpace = quota.UsedSpace
		updateColumns = append(updateColumns, "used_space")
	} else if !exist {
		*usedSpace = 0
		updateColumns = append(updateColumns, "used_space")
	}
	if quota.UsedInodes >= 0 {
		*usedInodes = quota.UsedInodes
		updateColumns = append(updateColumns, "used_inodes")
	} else if !exist {
		*usedInodes = 0
		updateColumns = append(updateColumns, "used_inodes")
	}

	return updateColumns
}

func (m *dbMeta) doSetQuota(ctx Context, qtype uint32, key uint64, quota *Quota) (bool, error) {
	var created bool
	err := m.txn(func(s *xorm.Session) error {
		if qtype == DirQuotaType {
			origin := &dirQuota{Inode: Ino(key)}
			exist, e := s.ForUpdate().Get(origin)
			if e != nil {
				return e
			}
			created = !exist
			updateColumns := updateQuotaFields(quota, exist, &origin.MaxSpace, &origin.MaxInodes, &origin.UsedSpace, &origin.UsedInodes)
			if exist {
				_, e = s.Cols(updateColumns...).Update(origin, &dirQuota{Inode: Ino(key)})
			} else {
				e = mustInsert(s, origin)
			}
			return e
		} else if qtype == UserQuotaType || qtype == GroupQuotaType {
			origin := &userGroupQuota{Qtype: qtype, Qkey: key}
			exist, e := s.ForUpdate().Get(origin)
			if e != nil {
				return e
			}
			created = !exist
			updateColumns := updateQuotaFields(quota, exist, &origin.MaxSpace, &origin.MaxInodes, &origin.UsedSpace, &origin.UsedInodes)
			if exist {
				_, e = s.Cols(updateColumns...).Update(origin, &userGroupQuota{Qtype: qtype, Qkey: key})
			} else {
				e = mustInsert(s, origin)
			}
			return e
		} else {
			return errors.Errorf("invalid quota type %d", qtype)
		}
	})

	return created, err
}

func (m *dbMeta) doDelQuota(ctx Context, qtype uint32, key uint64) error {
	if qtype != DirQuotaType && qtype != UserQuotaType && qtype != GroupQuotaType {
		return errors.Errorf("invalid quota type %d", qtype)
	}

	return m.txn(func(s *xorm.Session) error {
		if qtype == DirQuotaType {
			_, e := s.Delete(&dirQuota{Inode: Ino(key)})
			return e
		} else {
			_, e := s.Cols("max_space", "max_inodes").
				Update(&userGroupQuota{MaxSpace: -1, MaxInodes: -1},
					&userGroupQuota{Qtype: qtype, Qkey: key})
			return e
		}
	})
}

func (m *dbMeta) doLoadQuotas(ctx Context) (map[uint64]*Quota, map[uint64]*Quota, map[uint64]*Quota, error) {
	var dirQuotasList []dirQuota
	var userGroupQuotasList []userGroupQuota

	err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		if e := s.Find(&dirQuotasList); e != nil {
			return e
		}
		if e := s.Find(&userGroupQuotasList); e != nil {
			return e
		}
		return nil
	})
	if err != nil {
		return nil, nil, nil, err
	}

	dirQuotas := make(map[uint64]*Quota)
	userQuotas := make(map[uint64]*Quota)
	groupQuotas := make(map[uint64]*Quota)

	// Load directory quotas
	for _, q := range dirQuotasList {
		quota := &Quota{
			MaxSpace:   q.MaxSpace,
			MaxInodes:  q.MaxInodes,
			UsedSpace:  q.UsedSpace,
			UsedInodes: q.UsedInodes,
		}
		dirQuotas[uint64(q.Inode)] = quota
	}

	// Load user and group quotas
	for _, q := range userGroupQuotasList {
		quota := &Quota{
			MaxSpace:   q.MaxSpace,
			MaxInodes:  q.MaxInodes,
			UsedSpace:  q.UsedSpace,
			UsedInodes: q.UsedInodes,
		}

		switch q.Qtype {
		case UserQuotaType:
			userQuotas[q.Qkey] = quota
		case GroupQuotaType:
			groupQuotas[q.Qkey] = quota
		}
	}

	return dirQuotas, userQuotas, groupQuotas, nil
}

func (m *dbMeta) doFlushQuotas(ctx Context, quotas []*iQuota) error {
	sort.Slice(quotas, func(i, j int) bool { return quotas[i].qkey < quotas[j].qkey })
	return m.txn(func(s *xorm.Session) error {
		for _, q := range quotas {
			if q.qtype == DirQuotaType {
				_, err := s.Exec(m.sqlConv("update dir_quota set used_space=used_space+?, used_inodes=used_inodes+? where inode=?"),
					q.quota.newSpace, q.quota.newInodes, q.qkey)
				if err != nil {
					return err
				}
			} else {
				ret, err := s.Exec(m.sqlConv("update user_group_quota set used_space=used_space+?, used_inodes=used_inodes+? where qtype=? and qkey=?"),
					q.quota.newSpace, q.quota.newInodes, q.qtype, q.qkey)
				if err != nil {
					return err
				}
				affected, err := ret.RowsAffected()
				if err != nil {
					return err
				}
				if affected == 0 {
					quota := &userGroupQuota{
						Qtype:      q.qtype,
						Qkey:       q.qkey,
						MaxSpace:   -1,
						MaxInodes:  -1,
						UsedSpace:  q.quota.newSpace,
						UsedInodes: q.quota.newInodes,
					}
					if err := mustInsert(s, quota); err != nil {
						return err
					}
				}
			}
		}
		return nil
	})
}

func (m *dbMeta) dumpEntry(s *xorm.Session, inode Ino, typ uint8, e *DumpedEntry, showProgress func(totalIncr, currentIncr int64)) error {
	n := &node{Inode: inode}
	ok, err := s.Get(n)
	if err != nil {
		return err
	}
	attr := &Attr{Typ: typ, Nlink: 1}
	if !ok {
		logger.Warnf("The entry of the inode was not found. inode: %d", inode)
		if attr.Typ == TypeDirectory {
			attr.Nlink = 2
		}
	} else {
		m.parseAttr(n, attr)
	}
	dumpAttr(attr, e.Attr)
	e.Attr.Inode = inode

	var rows []xattr
	if err = s.Find(&rows, &xattr{Inode: inode}); err != nil {
		return err
	}
	if len(rows) > 0 {
		xattrs := make([]*DumpedXattr, 0, len(rows))
		for _, x := range rows {
			xattrs = append(xattrs, &DumpedXattr{x.Name, string(x.Value)})
		}
		sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name })
		e.Xattrs = xattrs
	}

	accessACl, err := m.getACL(s, attr.AccessACL)
	if err != nil {
		return err
	}
	e.AccessACL = dumpACL(accessACl)
	defaultACL, err := m.getACL(s, attr.DefaultACL)
	if err != nil {
		return err
	}
	e.DefaultACL = dumpACL(defaultACL)

	if attr.Typ == TypeFile {
		for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ {
			c := &chunk{Inode: inode, Indx: indx}
			if ok, err = s.MustCols("indx").Get(c); err != nil {
				return err
			}
			if !ok {
				continue
			}
			ss := readSliceBuf(c.Slices)
			if ss == nil {
				logger.Errorf("Corrupt value for inode %d chunk index %d", inode, indx)
			}
			slices := make([]*DumpedSlice, 0, len(ss))
			for _, s := range ss {
				slices = append(slices, &DumpedSlice{Id: s.id, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len})
			}
			e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices})
		}
	} else if attr.Typ == TypeSymlink {
		l := &symlink{Inode: inode}
		ok, err = s.Get(l)
		if err != nil {
			return err
		}
		if !ok {
			logger.Warnf("no link target for inode %d", inode)
		}
		e.Symlink = string(l.Target)
	} else if attr.Typ == TypeDirectory {
		var edges []*edge
		err := s.Limit(1000, 0).Find(&edges, &edge{Parent: inode})
		if err != nil {
			return err
		}
		if showProgress != nil {
			showProgress(int64(len(edges)), 0)
		}
		if len(edges) < 1000 {
			e.Entries = make(map[string]*DumpedEntry, len(edges))
			for _, edge := range edges {
				name := string(edge.Name)
				ce := entryPool.Get()
				ce.Name = name
				ce.Attr.Inode = edge.Inode
				ce.Attr.Type = typeToString(edge.Type)
				e.Entries[name] = ce
			}
		}
	}
	return nil
}

func (m *dbMeta) dumpEntryFast(inode Ino, typ uint8) *DumpedEntry {
	e := &DumpedEntry{}
	n, ok := m.snap.node[inode]
	if !ok && inode != TrashInode {
		logger.Warnf("Corrupt inode: %d, missing attribute", inode)
	}

	attr := &Attr{Typ: typ, Nlink: 1}
	if !ok {
		logger.Warnf("The entry of the inode was not found. inode: %d", inode)
		if attr.Typ == TypeDirectory {
			attr.Nlink = 2
		}
	} else {
		m.parseAttr(n, attr)
	}
	e.Attr = &DumpedAttr{}
	dumpAttr(attr, e.Attr)
	e.Attr.Inode = inode

	rows, ok := m.snap.xattr[inode]
	if ok && len(rows) > 0 {
		xattrs := make([]*DumpedXattr, 0, len(rows))
		for _, x := range rows {
			xattrs = append(xattrs, &DumpedXattr{x.Name, string(x.Value)})
		}
		sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name })
		e.Xattrs = xattrs
	}

	if attr.AccessACL != aclAPI.None {
		e.AccessACL = dumpACL(m.aclCache.Get(attr.AccessACL))
	}
	if attr.DefaultACL != aclAPI.None {
		e.DefaultACL = dumpACL(m.aclCache.Get(attr.DefaultACL))
	}

	if attr.Typ == TypeFile {
		for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ {
			c, ok := m.snap.chunk[fmt.Sprintf("%d-%d", inode, indx)]
			if !ok {
				continue
			}
			ss := readSliceBuf(c.Slices)
			if ss == nil {
				logger.Errorf("Corrupt value for inode %d chunk index %d", inode, indx)
			}
			slices := make([]*DumpedSlice, 0, len(ss))
			for _, s := range ss {
				slices = append(slices, &DumpedSlice{Id: s.id, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len})
			}
			e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices})
		}
	} else if attr.Typ == TypeSymlink {
		l, ok := m.snap.symlink[inode]
		if !ok {
			logger.Warnf("no link target for inode %d", inode)
			l = &symlink{}
		}
		e.Symlink = string(l.Target)
	}
	return e
}

func (m *dbMeta) dumpDir(s *xorm.Session, inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth, threads int, showProgress func(totalIncr, currentIncr int64)) error {
	bwWrite := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}
	if tree.Entries == nil {
		// retry for large directory
		var edges []*edge
		err := s.Find(&edges, &edge{Parent: inode})
		if err != nil {
			return err
		}
		tree.Entries = make(map[string]*DumpedEntry, len(edges))
		for _, edge := range edges {
			name := string(edge.Name)
			ce := entryPool.Get()
			ce.Name = name
			ce.Attr.Inode = edge.Inode
			ce.Attr.Type = typeToString(edge.Type)
			tree.Entries[name] = ce
		}
		if showProgress != nil {
			showProgress(int64(len(edges))-1000, 0)
		}
	}
	var entries []*DumpedEntry
	for _, e := range tree.Entries {
		entries = append(entries, e)
	}
	sort.Slice(entries, func(i, j int) bool { return entries[i].Name < entries[j].Name })
	_ = tree.writeJsonWithOutEntry(bw, depth)

	ms := make([]sync.Mutex, threads)
	conds := make([]*sync.Cond, threads)
	ready := make([]bool, threads)
	var err error
	for c := 0; c < threads; c++ {
		conds[c] = sync.NewCond(&ms[c])
		if c < len(entries) {
			go func(c int) {
				for i := c; i < len(entries) && err == nil; i += threads {
					e := entries[i]
					er := m.roTxn(Background(), func(s *xorm.Session) error {
						return m.dumpEntry(s, e.Attr.Inode, 0, e, showProgress)
					})
					ms[c].Lock()
					ready[c] = true
					if er != nil {
						err = er
					}
					conds[c].Signal()
					for ready[c] && err == nil {
						conds[c].Wait()
					}
					ms[c].Unlock()
				}
			}(c)
		}
	}

	for i, e := range entries {
		c := i % threads
		ms[c].Lock()
		for !ready[c] && err == nil {
			conds[c].Wait()
		}
		ready[c] = false
		conds[c].Signal()
		ms[c].Unlock()
		if err != nil {
			return err
		}
		if e.Attr.Type == "directory" {
			err = m.dumpDir(s, e.Attr.Inode, e, bw, depth+2, threads, showProgress)
		} else {
			err = e.writeJSON(bw, depth+2)
		}
		if err != nil {
			return err
		}
		entries[i] = nil
		entryPool.Put(e)
		if i != len(entries)-1 {
			bwWrite(",")
		}
		if showProgress != nil {
			showProgress(0, 1)
		}
	}
	bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth)))
	return nil
}

func (m *dbMeta) dumpDirFast(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth int, showProgress func(totalIncr, currentIncr int64)) error {
	bwWrite := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}
	edges := m.snap.edges[inode]
	_ = tree.writeJsonWithOutEntry(bw, depth)
	sort.Slice(edges, func(i, j int) bool { return bytes.Compare(edges[i].Name, edges[j].Name) == -1 })

	for i, e := range edges {
		entry := m.dumpEntryFast(e.Inode, e.Type)
		if entry == nil {
			logger.Warnf("ignore broken entry %s (inode: %d) in %s", string(e.Name), e.Inode, inode)
			continue
		}

		entry.Name = string(e.Name)
		if e.Type == TypeDirectory {
			_ = m.dumpDirFast(e.Inode, entry, bw, depth+2, showProgress)
		} else {
			_ = entry.writeJSON(bw, depth+2)
		}
		if i != len(edges)-1 {
			bwWrite(",")
		}
		if showProgress != nil {
			showProgress(0, 1)
		}
	}
	bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth)))
	return nil
}

func (m *dbMeta) makeSnap(ses *xorm.Session, bar *utils.Bar) error {
	snap := &dbSnap{
		node:    make(map[Ino]*node),
		symlink: make(map[Ino]*symlink),
		xattr:   make(map[Ino][]*xattr),
		edges:   make(map[Ino][]*edge),
		chunk:   make(map[string]*chunk),
	}

	for _, s := range []interface{}{new(node), new(symlink), new(edge), new(xattr), new(chunk), new(acl)} {
		if count, err := ses.Count(s); err == nil {
			bar.IncrTotal(count)
		} else {
			return err
		}
	}
	if err := ses.Table(&node{}).Iterate(new(node), func(idx int, bean interface{}) error {
		n := bean.(*node)
		snap.node[n.Inode] = n
		bar.Increment()
		return nil
	}); err != nil {
		return err
	}

	if err := ses.Table(&symlink{}).Iterate(new(symlink), func(idx int, bean interface{}) error {
		s := bean.(*symlink)
		snap.symlink[s.Inode] = s
		bar.Increment()
		return nil
	}); err != nil {
		return err
	}
	if err := ses.Table(&edge{}).Iterate(new(edge), func(idx int, bean interface{}) error {
		e := bean.(*edge)
		snap.edges[e.Parent] = append(snap.edges[e.Parent], e)
		bar.Increment()
		return nil
	}); err != nil {
		return err
	}

	if err := ses.Table(&xattr{}).Iterate(new(xattr), func(idx int, bean interface{}) error {
		x := bean.(*xattr)
		snap.xattr[x.Inode] = append(snap.xattr[x.Inode], x)
		bar.Increment()
		return nil
	}); err != nil {
		return err
	}

	if err := ses.Table(&chunk{}).Iterate(new(chunk), func(idx int, bean interface{}) error {
		c := bean.(*chunk)
		snap.chunk[fmt.Sprintf("%d-%d", c.Inode, c.Indx)] = c
		bar.Increment()
		return nil
	}); err != nil {
		return err
	}

	if err := ses.Table(&acl{}).Iterate(new(acl), func(idx int, bean interface{}) error {
		a := bean.(*acl)
		m.aclCache.Put(a.Id, a.toRule())
		bar.Increment()
		return nil
	}); err != nil {
		return err
	}

	m.snap = snap
	return nil
}

func (m *dbMeta) DumpMeta(w io.Writer, root Ino, threads int, keepSecret, fast, skipTrash bool) (err error) {
	defer func() {
		if p := recover(); p != nil {
			debug.PrintStack()
			if e, ok := p.(error); ok {
				err = e
			} else {
				err = fmt.Errorf("DumpMeta error: %v", p)
			}
		}
	}()

	progress := utils.NewProgress(false)
	var tree, trash *DumpedEntry
	root = m.checkRoot(root)
	return m.roTxn(Background(), func(s *xorm.Session) error {
		if root == RootInode && fast {
			defer func() { m.snap = nil }()
			bar := progress.AddCountBar("Snapshot keys", 0)
			if err = m.makeSnap(s, bar); err != nil {
				return fmt.Errorf("Fetch all metadata from DB: %s", err)
			}
			bar.Done()
			tree = m.dumpEntryFast(root, TypeDirectory)
			if !skipTrash {
				trash = m.dumpEntryFast(TrashInode, TypeDirectory)
			}
		} else {
			tree = &DumpedEntry{
				Name: "FSTree",
				Attr: &DumpedAttr{
					Inode: root,
					Type:  typeToString(TypeDirectory),
				},
			}
			if err = m.dumpEntry(s, root, TypeDirectory, tree, nil); err != nil {
				return err
			}
			if root == RootInode && !skipTrash {
				trash = &DumpedEntry{
					Name: "Trash",
					Attr: &DumpedAttr{
						Inode: TrashInode,
						Type:  typeToString(TypeDirectory),
					},
				}
				if err = m.dumpEntry(s, TrashInode, TypeDirectory, trash, nil); err != nil {
					return err
				}
			}
		}
		if tree == nil {
			return errors.New("The entry of the root inode was not found")
		}
		tree.Name = "FSTree"

		var drows []delfile
		// the statement remembers the table of last Iterator
		if err := s.Table(&delfile{}).Find(&drows); err != nil {
			return err
		}
		dels := make([]*DumpedDelFile, 0, len(drows))
		for _, row := range drows {
			dels = append(dels, &DumpedDelFile{row.Inode, row.Length, row.Expire})
		}
		var crows []counter
		if err = s.Find(&crows); err != nil {
			return err
		}
		counters := &DumpedCounters{}
		for _, row := range crows {
			switch row.Name {
			case "usedSpace":
				counters.UsedSpace = row.Value
			case "totalInodes":
				counters.UsedInodes = row.Value
			case "nextInode":
				counters.NextInode = row.Value
			case "nextChunk":
				counters.NextChunk = row.Value
			case "nextSession":
				counters.NextSession = row.Value
			case "nextTrash":
				counters.NextTrash = row.Value
			}
		}

		var srows []sustained
		if err := s.Find(&srows); err != nil {
			return err
		}
		ss := make(map[uint64][]Ino)
		for _, row := range srows {
			ss[row.Sid] = append(ss[row.Sid], row.Inode)
		}
		sessions := make([]*DumpedSustained, 0, len(ss))
		for k, v := range ss {
			sessions = append(sessions, &DumpedSustained{k, v})
		}

		var qs []dirQuota
		if err := s.Find(&qs); err != nil {
			return err
		}
		// todo Add user/group quota
		dumpedQuotas := make(map[Ino]*DumpedQuota, len(qs))
		for _, q := range qs {
			dumpedQuotas[Ino(q.Inode)] = &DumpedQuota{q.MaxSpace, q.MaxInodes, 0, 0}
		}

		dm := DumpedMeta{
			Setting:   *m.getFormat(),
			Counters:  counters,
			Sustained: sessions,
			DelFiles:  dels,
			Quotas:    dumpedQuotas,
		}
		if !keepSecret && dm.Setting.SecretKey != "" {
			dm.Setting.SecretKey = "removed"
			logger.Warnf("Secret key is removed for the sake of safety")
		}
		if !keepSecret && dm.Setting.SessionToken != "" {
			dm.Setting.SessionToken = "removed"
			logger.Warnf("Session token is removed for the sake of safety")
		}
		bw, err := dm.writeJsonWithOutTree(w)
		if err != nil {
			return err
		}
		useTotal := root == RootInode && !skipTrash
		bar := progress.AddCountBar("Dumped entries", 1) // with root
		if useTotal {
			totalBean := &counter{Name: "totalInodes"}
			if _, err := s.Get(totalBean); err != nil {
				return err
			}
			bar.SetTotal(totalBean.Value)
		}
		bar.Increment()
		if trash != nil {
			trash.Name = "Trash"
			bar.IncrTotal(1)
			bar.Increment()
		}
		showProgress := func(totalIncr, currentIncr int64) {
			if !useTotal {
				bar.IncrTotal(totalIncr)
			}
			bar.IncrInt64(currentIncr)
		}
		if m.snap != nil {
			_ = m.dumpDirFast(root, tree, bw, 1, showProgress)
		} else {
			showProgress(int64(len(tree.Entries)), 0)
			if err = m.dumpDir(s, root, tree, bw, 1, threads, showProgress); err != nil {
				logger.Errorf("dump dir %d failed: %s", root, err)
				return fmt.Errorf("dump dir %d failed", root) // don't retry
			}
		}
		if trash != nil {
			if _, err = bw.WriteString(","); err != nil {
				return err
			}
			if m.snap != nil {
				_ = m.dumpDirFast(TrashInode, trash, bw, 1, showProgress)
			} else {
				showProgress(int64(len(trash.Entries)), 0)
				if err = m.dumpDir(s, TrashInode, trash, bw, 1, threads, showProgress); err != nil {
					logger.Errorf("dump trash failed: %s", err)
					return fmt.Errorf("dump trash failed") // don't retry
				}
			}
		}
		if _, err = bw.WriteString("\n}\n"); err != nil {
			return err
		}
		progress.Done()
		return bw.Flush()
	})
}

func (m *dbMeta) loadEntry(e *DumpedEntry, chs []chan interface{}, aclMaxId *uint32) {
	inode := e.Attr.Inode
	attr := e.Attr
	n := &node{
		Inode:  inode,
		Flags:  attr.Flags,
		Type:   typeFromString(attr.Type),
		Mode:   attr.Mode,
		Uid:    attr.Uid,
		Gid:    attr.Gid,
		Nlink:  attr.Nlink,
		Rdev:   attr.Rdev,
		Parent: e.Parents[0],
	} // Length not set
	n.setAtime(attr.Atime*1e9 + int64(attr.Atimensec))
	n.setMtime(attr.Mtime*1e9 + int64(attr.Mtimensec))
	n.setCtime(attr.Ctime*1e9 + int64(attr.Ctimensec))

	// chs: node, edge, chunk, chunkRef, xattr, others
	if n.Type == TypeFile {
		n.Length = attr.Length
		for _, c := range e.Chunks {
			if len(c.Slices) == 0 {
				continue
			}
			slices := make([]byte, 0, sliceBytes*len(c.Slices))
			for _, s := range c.Slices {
				slices = append(slices, marshalSlice(s.Pos, s.Id, s.Size, s.Off, s.Len)...)
			}
			chs[2] <- &chunk{Inode: inode, Indx: c.Index, Slices: slices}
		}
	} else if n.Type == TypeDirectory {
		n.Length = 4 << 10
		stat := &dirStats{Inode: inode}
		for name, c := range e.Entries {
			length := uint64(0)
			if typeFromString(c.Attr.Type) == TypeFile {
				length = c.Attr.Length
			}
			stat.DataLength += int64(length)
			stat.UsedSpace += align4K(length)
			stat.UsedInodes++

			chs[1] <- &edge{
				Parent: inode,
				Name:   unescape(name),
				Inode:  c.Attr.Inode,
				Type:   typeFromString(c.Attr.Type),
			}
		}
		chs[5] <- stat
	} else if n.Type == TypeSymlink {
		symL := unescape(e.Symlink)
		n.Length = uint64(len(symL))
		chs[5] <- &symlink{inode, symL}
	}
	for _, x := range e.Xattrs {
		chs[4] <- &xattr{Inode: inode, Name: x.Name, Value: unescape(x.Value)}
	}

	n.AccessACLId = m.saveACL(loadACL(e.AccessACL), aclMaxId)
	n.DefaultACLId = m.saveACL(loadACL(e.DefaultACL), aclMaxId)
	chs[0] <- n
}

func (m *dbMeta) getTxnBatchNum() int {
	switch m.Name() {
	case "sqlite3":
		return 999 / MaxFieldsCountOfTable
	case "mysql":
		return 65535 / MaxFieldsCountOfTable
	case "postgres":
		return 1000
	default:
		return 1000
	}
}

func (m *dbMeta) checkAddr() error {
	tables, err := m.db.DBMetas()
	if err != nil {
		return err
	}
	if len(tables) > 0 {
		addr := m.addr
		if !strings.Contains(addr, "://") {
			addr = fmt.Sprintf("%s://%s", m.Name(), addr)
		}
		return fmt.Errorf("database %s is not empty", addr)
	}
	return nil
}

func (m *dbMeta) LoadMeta(r io.Reader) error {
	if err := m.checkAddr(); err != nil {
		return err
	}
	if err := m.syncAllTables(); err != nil {
		return err
	}

	batch := m.getTxnBatchNum()
	chs := make([]chan interface{}, 6) // node, edge, chunk, chunkRef, xattr, others
	insert := func(index int, beans []interface{}) error {
		return m.txn(func(s *xorm.Session) error {
			var n int64
			var err error
			if index == len(chs)-1 { // multiple tables
				n, err = s.Insert(beans...)
			} else { // one table only
				n, err = s.Insert(beans)
			}
			if err == nil && int(n) != len(beans) {
				err = fmt.Errorf("only %d records inserted", n)
			}
			return err
		})
	}
	var wg sync.WaitGroup
	for i := range chs {
		chs[i] = make(chan interface{}, batch*2)
		wg.Add(1)
		go func(i int) {
			defer wg.Done()
			buffer := make([]interface{}, 0, batch)
			for bean := range chs[i] {
				buffer = append(buffer, bean)
				if len(buffer) >= batch {
					if err := insert(i, buffer); err != nil {
						logger.Fatalf("Write %d beans in channel %d: %s", len(buffer), i, err)
					}
					buffer = buffer[:0]
				}
			}
			if len(buffer) > 0 {
				if err := insert(i, buffer); err != nil {
					logger.Fatalf("Write %d beans in channel %d: %s", len(buffer), i, err)
				}
			}
		}(i)
	}

	var aclMaxId uint32 = 0
	dm, counters, parents, refs, err := loadEntries(r,
		func(e *DumpedEntry) { m.loadEntry(e, chs, &aclMaxId) },
		func(ck *chunkKey) { chs[3] <- &sliceRef{ck.id, ck.size, 1} })
	if err != nil {
		return err
	}
	m.loadDumpedQuotas(Background(), dm.Quotas)
	if err = m.loadDumpedACLs(Background()); err != nil {
		return err
	}

	format, _ := json.MarshalIndent(dm.Setting, "", "")
	chs[5] <- &setting{"format", string(format)}
	chs[5] <- &counter{usedSpace, counters.UsedSpace}
	chs[5] <- &counter{totalInodes, counters.UsedInodes}
	chs[5] <- &counter{"nextInode", counters.NextInode}
	chs[5] <- &counter{"nextChunk", counters.NextChunk}
	chs[5] <- &counter{"nextSession", counters.NextSession}
	chs[5] <- &counter{"nextTrash", counters.NextTrash}
	for _, d := range dm.DelFiles {
		chs[5] <- &delfile{d.Inode, d.Length, d.Expire}
	}
	for _, c := range chs {
		close(c)
	}
	wg.Wait()

	// update chunkRefs
	if err = m.txn(func(s *xorm.Session) error {
		for k, v := range refs {
			if v > 1 {
				if _, e := s.Cols("refs").Update(&sliceRef{Refs: int(v)}, &sliceRef{Id: k.id}); e != nil {
					return e
				}
			}
		}
		return nil
	}); err != nil {
		return err
	}

	// update nlinks and parents for hardlinks
	return m.txn(func(s *xorm.Session) error {
		for i, ps := range parents {
			if len(ps) > 1 {
				_, err := s.Cols("nlink", "parent").Update(&node{Nlink: uint32(len(ps))}, &node{Inode: i})
				if err != nil {
					return err
				}
			}
		}
		return nil
	})
}

type checkDupError func(error) bool

var dupErrorCheckers []checkDupError

func isDuplicateEntryErr(err error) bool {
	for _, check := range dupErrorCheckers {
		if check(err) {
			return true
		}
	}
	return false
}

func (m *dbMeta) validateCloneTarget(ctx Context, s xorm.Interface, ino Ino) (node, error) {
	pn := node{Inode: ino}
	ok, err := s.Get(&pn)
	if err != nil {
		return pn, err
	}
	if !ok {
		return pn, syscall.ENOENT
	}
	if pn.Type != TypeDirectory {
		return pn, syscall.ENOTDIR
	}
	if (pn.Flags & FlagImmutable) != 0 {
		return pn, syscall.EPERM
	}
	var pattr Attr
	m.parseAttr(&pn, &pattr)
	if st := m.Access(ctx, ino, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
		return pn, st
	}
	return pn, nil
}

func (m *dbMeta) doCloneEntry(ctx Context, srcIno Ino, parent Ino, name string, ino Ino, attr *Attr, cmode uint8, cumask uint16, top bool) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		n := node{Inode: srcIno}
		ok, err := s.ForUpdate().Get(&n)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		n.Inode = ino
		n.Parent = parent
		now := time.Now()

		m.parseAttr(&n, attr)
		if eno := m.Access(ctx, srcIno, MODE_MASK_R, attr); eno != 0 {
			return eno
		}

		if cmode&CLONE_MODE_PRESERVE_ATTR == 0 {
			n.Uid = ctx.Uid()
			n.Gid = ctx.Gid()
			n.Mode &= ^cumask
			ns := now.UnixNano()
			n.setAtime(ns)
			n.setMtime(ns)
			n.setCtime(ns)
		}
		// TODO: preserve hardlink
		if n.Type == TypeFile && n.Nlink > 1 {
			n.Nlink = 1
		}

		if top {
			pn, err := m.validateCloneTarget(ctx, s, parent)
			if err != nil {
				return err
			}
			if n.Type != TypeDirectory {
				now := time.Now().UnixNano()
				pn.setMtime(now)
				pn.setCtime(now)
				if _, err = s.Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: parent}); err != nil {
					return err
				}
			}
		}
		if top && n.Type == TypeDirectory {
			err = mustInsert(s, &n, &detachedNode{Inode: ino, Added: time.Now().Unix()})
		} else {
			err = mustInsert(s, &n, &edge{Parent: parent, Name: []byte(name), Inode: ino, Type: n.Type})
			if isDuplicateEntryErr(err) {
				return syscall.EEXIST
			}
		}
		if err != nil {
			return err
		}
		var xs []xattr
		if err = s.Where("inode = ?", srcIno).Find(&xs, &xattr{Inode: srcIno}); err != nil {
			return err
		}
		if len(xs) > 0 {
			for i := range xs {
				xs[i].Id = 0
				xs[i].Inode = ino
			}
			if err := mustInsert(s, &xs); err != nil {
				return err
			}
		}
		switch n.Type {
		case TypeDirectory:
			var st = dirStats{Inode: srcIno}
			if exist, err := s.Get(&st); err != nil {
				return err
			} else if exist {
				st.Inode = ino
				if err := mustInsert(s, &st); err != nil {
					return err
				}
			}
		case TypeFile:
			// copy chunks
			if n.Length != 0 {
				var cs []chunk
				if err = s.Where("inode = ?", srcIno).ForUpdate().Find(&cs); err != nil {
					return err
				}
				for i := range cs {
					cs[i].Id = 0
					cs[i].Inode = ino
				}
				if len(cs) != 0 {
					if err := mustInsert(s, cs); err != nil {
						return err
					}
				}
				// TODO: batch?
				for _, c := range cs {
					for _, sli := range readSliceBuf(c.Slices) {
						if sli.id > 0 {
							if _, err := s.Exec(m.sqlConv("update chunk_ref set refs=refs+1 where chunkid = ? AND size = ?"), sli.id, sli.size); err != nil {
								return err
							}
						}
					}
				}
			}
		case TypeSymlink:
			sym := symlink{Inode: srcIno}
			if exists, err := s.Get(&sym); err != nil {
				return err
			} else if !exists {
				return syscall.ENOENT
			}
			sym.Inode = ino
			return mustInsert(s, &sym)
		}
		return nil
	}, srcIno))
}

func (m *dbMeta) doBatchClone(ctx Context, srcParent Ino, dstParent Ino, entries []*Entry, cmode uint8, cumask uint16, result *batchCloneResult) syscall.Errno {
	if len(entries) == 0 {
		return 0
	}
	if _, err := m.validateCloneTarget(ctx, m.db, dstParent); err != nil {
		return errno(err)
	}

	type cloneInfo struct {
		srcIno  Ino
		dstIno  Ino
		name    []byte
		dstNode node
	}

	cloneInfos := make([]*cloneInfo, len(entries))
	srcInodes := make([]Ino, 0, len(entries))
	srcInodeSet := make(map[Ino]struct{}, len(entries))
	for i, e := range entries {
		dstIno, err := m.nextInode()
		if err != nil {
			return errno(err)
		}
		cloneInfos[i] = &cloneInfo{srcIno: e.Inode, dstIno: dstIno, name: e.Name}
		if _, exists := srcInodeSet[e.Inode]; !exists {
			srcInodeSet[e.Inode] = struct{}{}
			srcInodes = append(srcInodes, e.Inode)
		}
	}

	err := m.txn(func(s *xorm.Session) error {
		nowNano := time.Now().UnixNano()
		*result = batchCloneResult{deltas: make(ugQuotaDeltas)}

		pn, err := m.validateCloneTarget(ctx, s, dstParent)
		if err != nil {
			return err
		}

		var srcNodes []node
		if err := s.In("inode", srcInodes).ForUpdate().Find(&srcNodes); err != nil {
			return err
		}
		srcNodeMap := make(map[Ino]*node, len(srcNodes))
		for i := range srcNodes {
			srcNodeMap[srcNodes[i].Inode] = &srcNodes[i]
		}

		nodesIns := make([]interface{}, 0, len(entries))
		edgesIns := make([]interface{}, 0, len(entries))
		fileInodes := make([]Ino, 0)
		symlinkInodes := make([]Ino, 0)
		symlinkClones := make([]*cloneInfo, 0)
		for _, info := range cloneInfos {
			sn, ok := srcNodeMap[info.srcIno]
			if !ok {
				return syscall.ENOENT
			}
			if sn.Type == TypeDirectory {
				return syscall.EINVAL
			}
			var attr Attr
			m.parseAttr(sn, &attr)
			if st := m.Access(ctx, info.srcIno, MODE_MASK_R, &attr); st != 0 {
				return st
			}

			info.dstNode = *sn
			info.dstNode.Inode = info.dstIno
			info.dstNode.Parent = dstParent
			if cmode&CLONE_MODE_PRESERVE_ATTR == 0 {
				info.dstNode.Uid = ctx.Uid()
				info.dstNode.Gid = ctx.Gid()
				info.dstNode.Mode &= ^cumask
				info.dstNode.setAtime(nowNano)
				info.dstNode.setMtime(nowNano)
				info.dstNode.setCtime(nowNano)
			}
			if sn.Type == TypeFile && sn.Nlink > 1 {
				info.dstNode.Nlink = 1
			}

			nodesIns = append(nodesIns, &info.dstNode)
			edgesIns = append(edgesIns, &edge{
				Parent: dstParent,
				Name:   info.name,
				Inode:  info.dstIno,
				Type:   sn.Type,
			})

			switch sn.Type {
			case TypeFile:
				if sn.Length > 0 {
					fileInodes = append(fileInodes, info.srcIno)
				}
			case TypeSymlink:
				symlinkInodes = append(symlinkInodes, info.srcIno)
				symlinkClones = append(symlinkClones, info)
			}

			entrySpace := align4K(sn.Length)
			result.length += int64(sn.Length)
			result.space += entrySpace
			result.inodes++
			result.deltas.add(&ugQuotaDelta{
				Uid:    info.dstNode.Uid,
				Gid:    info.dstNode.Gid,
				Space:  entrySpace,
				Inodes: 1,
			})
		}

		if err := mustInsert(s, nodesIns...); err != nil {
			return err
		}
		if err := mustInsert(s, edgesIns...); err != nil {
			if isDuplicateEntryErr(err) {
				return syscall.EEXIST
			}
			return err
		}

		chunkRefCounts := make(map[uint64]int)
		if len(fileInodes) > 0 {
			var srcChunks []chunk
			if err := s.In("inode", fileInodes).ForUpdate().Find(&srcChunks); err != nil {
				return err
			}
			chunksByInode := make(map[Ino][]chunk, len(fileInodes))
			for _, c := range srcChunks {
				chunksByInode[c.Inode] = append(chunksByInode[c.Inode], c)
			}
			chunksIns := make([]interface{}, 0, len(srcChunks))
			for i := range cloneInfos {
				for _, c := range chunksByInode[cloneInfos[i].srcIno] {
					chunksIns = append(chunksIns, &chunk{
						Inode: cloneInfos[i].dstIno, Indx: c.Indx, Slices: c.Slices,
					})
					for _, sli := range readSliceBuf(c.Slices) {
						if sli.id > 0 {
							chunkRefCounts[sli.id]++
						}
					}
				}
			}
			if err := mustInsert(s, chunksIns...); err != nil {
				return err
			}
		}

		if len(symlinkInodes) > 0 {
			var srcSymlinks []symlink
			if err := s.In("inode", symlinkInodes).Find(&srcSymlinks); err != nil {
				return err
			}
			symlinkMap := make(map[Ino][]byte, len(srcSymlinks))
			for _, sl := range srcSymlinks {
				symlinkMap[sl.Inode] = sl.Target
			}
			symlinksIns := make([]interface{}, 0, len(symlinkClones))
			for i := range symlinkClones {
				if target, ok := symlinkMap[symlinkClones[i].srcIno]; ok {
					symlinksIns = append(symlinksIns, &symlink{Inode: symlinkClones[i].dstIno, Target: target})
				} else {
					return syscall.ENOENT
				}
			}
			if err := mustInsert(s, symlinksIns...); err != nil {
				return err
			}
		}

		var srcXattrs []xattr
		if err := s.In("inode", srcInodes).Find(&srcXattrs); err != nil {
			return err
		}
		if len(srcXattrs) > 0 {
			xattrsByInode := make(map[Ino][]xattr)
			for _, x := range srcXattrs {
				xattrsByInode[x.Inode] = append(xattrsByInode[x.Inode], x)
			}
			xattrsIns := make([]interface{}, 0, len(srcXattrs))
			for i := range cloneInfos {
				for _, x := range xattrsByInode[cloneInfos[i].srcIno] {
					xattrsIns = append(xattrsIns, &xattr{Inode: cloneInfos[i].dstIno, Name: x.Name, Value: x.Value})
				}
			}
			if err := mustInsert(s, xattrsIns...); err != nil {
				return err
			}
		}

		if err := func() error {
			if len(chunkRefCounts) == 0 {
				return nil
			}
			chunkIds := make([]uint64, 0, len(chunkRefCounts))
			for id := range chunkRefCounts {
				chunkIds = append(chunkIds, id)
			}
			slices.Sort(chunkIds)

			batchSize := m.getTxnBatchNum()
			for start := 0; start < len(chunkIds); start += batchSize {
				end := min(start+batchSize, len(chunkIds))
				batch := chunkIds[start:end]
				var sb strings.Builder
				args := make([]interface{}, 0, len(batch)*3)
				fmt.Fprintf(&sb, "UPDATE %schunk_ref SET refs = refs + CASE ", m.tablePrefix)
				for _, id := range batch {
					sb.WriteString("WHEN chunkid = ? THEN ? ")
					args = append(args, id, chunkRefCounts[id])
				}
				sb.WriteString("ELSE 0 END WHERE chunkid IN (")
				for i, id := range batch {
					if i > 0 {
						sb.WriteString(",")
					}
					sb.WriteString("?")
					args = append(args, id)
				}
				sb.WriteString(")")
				if _, err := s.Exec(append([]interface{}{sb.String()}, args...)...); err != nil {
					return err
				}
			}
			return nil
		}(); err != nil {
			return err
		}

		if cmode&CLONE_MODE_PRESERVE_ATTR == 0 {
			pn.setMtime(nowNano)
			pn.setCtime(nowNano)
			if _, err := s.Cols("mtime", "ctime", "mtimensec", "ctimensec").
				Update(&pn, &node{Inode: dstParent}); err != nil {
				return err
			}
		}
		return nil
	})
	if err != nil {
		return errno(err)
	}
	return 0
}

func (m *dbMeta) doFindDetachedNodes(t time.Time) []Ino {
	var inodes []Ino
	err := m.roTxn(Background(), func(s *xorm.Session) error {
		var nodes []detachedNode
		err := s.Where("added < ?", t.Unix()).Find(&nodes)
		for _, n := range nodes {
			inodes = append(inodes, n.Inode)
		}
		return err
	})
	if err != nil {
		logger.Errorf("Scan detached nodes error: %s", err)
	}
	return inodes
}

func (m *dbMeta) doCleanupDetachedNode(ctx Context, ino Ino) syscall.Errno {
	exist, err := m.db.Exist(&node{Inode: ino})
	if err != nil || !exist {
		return errno(err)
	}
	rmConcurrent := make(chan int, 10)
	if eno := m.emptyDir(ctx, ino, true, nil, rmConcurrent); eno != 0 {
		return eno
	}
	m.updateStats(-align4K(0), -1)
	return errno(m.txn(func(s *xorm.Session) error {
		if _, err := s.Delete(&node{Inode: ino}); err != nil {
			return err
		}
		if _, err := s.Delete(&dirStats{Inode: ino}); err != nil {
			return err
		}
		if _, err = s.Delete(&xattr{Inode: ino}); err != nil {
			return err
		}
		_, err = s.Delete(&detachedNode{Inode: ino})
		return err
	}, ino))
}

func (m *dbMeta) doAttachDirNode(ctx Context, parent Ino, inode Ino, name string) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		// must lock parent node first to avoid deadlock
		var n = node{Inode: parent}
		ok, err := s.ForUpdate().Get(&n)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		if n.Type != TypeDirectory {
			return syscall.ENOTDIR
		}
		if n.Parent > TrashInode {
			return syscall.ENOENT
		}
		if (n.Flags & FlagImmutable) != 0 {
			return syscall.EPERM
		}
		n.Nlink++
		now := time.Now().UnixNano()
		n.setMtime(now)
		n.setCtime(now)
		if _, err = s.Cols("nlink", "mtime", "ctime", "mtimensec", "ctimensec").Update(&n, &node{Inode: parent}); err != nil {
			return err
		}
		if err := mustInsert(s, &edge{Parent: parent, Name: []byte(name), Inode: inode, Type: TypeDirectory}); err != nil {
			if isDuplicateEntryErr(err) {
				return syscall.EEXIST
			}
			return err
		}
		_, err = s.Delete(&detachedNode{Inode: inode})
		return err
	}, parent))
}

func (m *dbMeta) doTouchAtime(ctx Context, inode Ino, attr *Attr, now time.Time) (bool, error) {
	var updated bool
	err := m.txn(func(s *xorm.Session) error {
		curNode := node{Inode: inode}
		ok, err := s.ForUpdate().Get(&curNode)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		m.parseAttr(&curNode, attr)
		if !m.atimeNeedsUpdate(attr, now) {
			return nil
		}
		curNode.setAtime(now.UnixNano())
		attr.Atime = curNode.Atime / 1e6
		attr.Atimensec = uint32(curNode.Atime%1e6*1000) + uint32(curNode.Atimensec)
		if _, err = s.Cols("atime", "atimensec").Update(&curNode, &node{Inode: inode}); err == nil {
			updated = true
		}
		return err
	}, inode)
	return updated, err
}

func (m *dbMeta) insertACL(s *xorm.Session, rule *aclAPI.Rule) (uint32, error) {
	if rule == nil {
		return aclAPI.None, nil
	}
	if err := m.tryLoadMissACLs(s); err != nil {
		logger.Warnf("Mknode: load miss acls error: %s", err)
	}
	var aclId uint32
	if aclId = m.aclCache.GetId(rule); aclId == aclAPI.None {
		// TODO conflicts from multiple clients are rare and result in only minor duplicates, thus not addressed for now.
		val := newSQLAcl(rule)
		if _, err := s.Insert(val); err != nil {
			return aclAPI.None, err
		}
		aclId = val.Id
		m.aclCache.Put(aclId, rule)
	}
	return aclId, nil
}

func (m *dbMeta) tryLoadMissACLs(s *xorm.Session) error {
	missIds := m.aclCache.GetMissIds()
	if len(missIds) > 0 {
		var acls []acl
		if err := s.In("id", missIds).Find(&acls); err != nil {
			return err
		}

		got := make(map[uint32]struct{}, len(acls))
		for _, data := range acls {
			got[data.Id] = struct{}{}
			m.aclCache.Put(data.Id, data.toRule())
		}
		if len(acls) < len(missIds) {
			for _, id := range missIds {
				if _, ok := got[id]; !ok {
					m.aclCache.Put(id, aclAPI.EmptyRule())
				}
			}
		}
	}
	return nil
}

func (m *dbMeta) getACL(s *xorm.Session, id uint32) (*aclAPI.Rule, error) {
	if id == aclAPI.None {
		return nil, nil
	}
	if cRule := m.aclCache.Get(id); cRule != nil {
		return cRule, nil
	}

	var aclVal = &acl{Id: id}
	if ok, err := s.Get(aclVal); err != nil {
		return nil, err
	} else if !ok {
		return nil, syscall.EIO
	}

	r := aclVal.toRule()
	m.aclCache.Put(id, r)
	return r, nil
}

func (m *dbMeta) doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		attr := &Attr{}
		n := &node{Inode: ino}
		if ok, err := s.ForUpdate().Get(n); err != nil {
			return err
		} else if !ok {
			return syscall.ENOENT
		}
		m.parseAttr(n, attr)

		if ctx.Uid() != 0 && ctx.Uid() != attr.Uid {
			return syscall.EPERM
		}

		if attr.Flags&FlagImmutable != 0 {
			return syscall.EPERM
		}

		oriACL, oriMode := getAttrACLId(attr, aclType), attr.Mode

		// https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/fs/fuse/acl.c#L143-L151
		// TODO: check linux capabilities
		if ctx.Uid() != 0 && !inGroup(ctx, attr.Gid) {
			// clear sgid
			attr.Mode &= 05777
		}

		if rule.IsEmpty() {
			// remove acl
			setAttrACLId(attr, aclType, aclAPI.None)
		} else if rule.IsMinimal() && aclType == aclAPI.TypeAccess {
			// remove acl
			setAttrACLId(attr, aclType, aclAPI.None)
			// set mode
			attr.Mode &= 07000
			attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7)
		} else {
			// set acl
			rule.InheritPerms(attr.Mode)
			aclId, err := m.insertACL(s, rule)
			if err != nil {
				return err
			}
			setAttrACLId(attr, aclType, aclId)

			// set mode
			if aclType == aclAPI.TypeAccess {
				attr.Mode &= 07000
				attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Mask & 7) << 3) | (rule.Other & 7)
			}
		}

		// update attr
		var updateCols []string
		if oriACL != getAttrACLId(attr, aclType) {
			updateCols = append(updateCols, getACLIdColName(aclType))
		}
		if oriMode != attr.Mode {
			updateCols = append(updateCols, "mode")
		}
		if len(updateCols) > 0 {
			updateCols = append(updateCols, "ctime", "ctimensec")

			var dirtyNode node
			m.parseNode(attr, &dirtyNode)
			dirtyNode.setCtime(time.Now().UnixNano())
			_, err := s.Cols(updateCols...).Update(&dirtyNode, &node{Inode: ino})
			return err
		}

		return nil
	}, ino))
}

func (m *dbMeta) doGetFacl(ctx Context, ino Ino, aclType uint8, aclId uint32, rule *aclAPI.Rule) syscall.Errno {
	return errno(m.roTxn(ctx, func(s *xorm.Session) error {
		if aclId == aclAPI.None {
			attr := &Attr{}
			n := &node{Inode: ino}
			if ok, err := s.Get(n); err != nil {
				return err
			} else if !ok {
				return syscall.ENOENT
			}
			m.parseAttr(n, attr)
			m.of.Update(ino, attr)
			aclId = getAttrACLId(attr, aclType)
		}

		a, err := m.getACL(s, aclId)
		if err != nil {
			return err
		}
		if a == nil {
			return ENOATTR
		}
		*rule = *a
		return nil
	}))
}

func (m *dbMeta) loadDumpedACLs(ctx Context) error {
	id2Rule := m.aclCache.GetAll()
	if len(id2Rule) == 0 {
		return nil
	}

	acls := make([]*acl, 0, len(id2Rule))
	for id, rule := range id2Rule {
		aclV := newSQLAcl(rule)
		aclV.Id = id
		acls = append(acls, aclV)
	}

	return m.txn(func(s *xorm.Session) error {
		n, err := s.Insert(acls)
		if err != nil {
			return err
		}
		if int(n) != len(acls) {
			return fmt.Errorf("only %d acls inserted, expected %d", n, len(acls))
		}
		return nil
	})
}

func (m *dbMeta) doStoreToken(ctx Context, token []byte) (id uint32, st syscall.Errno) {
	err := m.txn(func(s *xorm.Session) error {
		t := &delegationToken{Token: token}
		_, err := s.Insert(t)
		if err != nil {
			return err
		}
		id = t.Id
		return nil
	})
	return id, errno(err)
}

func (m *dbMeta) doUpdateToken(ctx Context, id uint32, token []byte) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		_, err := s.Cols("token").Update(&delegationToken{Id: id, Token: token}, &delegationToken{Id: id})
		return err
	}))
}

func (m *dbMeta) doLoadToken(ctx Context, id uint32) (token []byte, st syscall.Errno) {
	err := m.simpleTxn(ctx, func(s *xorm.Session) error {
		t := &delegationToken{Id: id}
		ok, err := s.Get(t)
		if err != nil {
			return err
		}
		if !ok {
			return syscall.ENOENT
		}
		token = t.Token
		return nil
	})
	return token, errno(err)
}

func (m *dbMeta) doDeleteTokens(ctx Context, ids []uint32) syscall.Errno {
	return errno(m.txn(func(s *xorm.Session) error {
		_, err := s.In("id", ids).Delete(&delegationToken{})
		return err
	}))
}

func (m *dbMeta) doListTokens(ctx Context) (tokens map[uint32][]byte, st syscall.Errno) {
	err := m.roTxn(ctx, func(s *xorm.Session) error {
		var ts []delegationToken
		err := s.Find(&ts)
		if err != nil {
			return err
		}
		tokens = make(map[uint32][]byte, len(ts))
		for _, t := range ts {
			tokens[t.Id] = t.Token
		}
		return nil
	})
	return tokens, errno(err)
}

type dbDirHandler struct {
	dirHandler
}

func (m *dbMeta) newDirHandler(inode Ino, plus bool, entries []*Entry) DirHandler {
	h := &dbDirHandler{
		dirHandler: dirHandler{
			inode:       inode,
			plus:        plus,
			initEntries: entries,
			fetcher:     m.getDirFetcher(),
			batchNum:    DirBatchNum["db"],
		},
	}
	h.batch, _ = h.fetch(Background(), 0)
	return h
}

func (m *dbMeta) getDirFetcher() dirFetcher {
	return func(ctx Context, inode Ino, cursor interface{}, offset, limit int, plus bool) (interface{}, []*Entry, error) {
		entries := make([]*Entry, 0, limit)
		err := m.roTxn(Background(), func(s *xorm.Session) error {
			var name []byte
			if cursor != nil {
				name = cursor.([]byte)
			} else {
				if offset > 0 {
					var edges []edge
					if err := s.Table(&edge{}).Where("parent = ?", inode).OrderBy("name").Limit(1, offset-1).Find(&edges); err != nil {
						return err
					}
					if len(edges) < 1 {
						return nil
					}
					name = edges[0].Name
				}
			}

			var ids []int64
			var err error
			// sorted by (parent, name) index
			if name == nil {
				err = s.Table(&edge{}).Cols("id").Where("parent = ?", inode).OrderBy("name").Limit(limit).Find(&ids)
			} else {
				err = s.Table(&edge{}).Cols("id").Where("parent = ? and name > ?", inode, name).OrderBy("name").Limit(limit).Find(&ids)
			}
			if err != nil {
				return err
			}

			s = s.Table(&edge{}).In(m.sqlConv("edge.id"), ids).OrderBy(m.sqlConv("edge.name")) // need to sorted by name, otherwise the cursor will be invalid
			if plus {
				s = s.Join("INNER", &node{}, m.sqlConv("edge.inode=node.inode")).Cols(m.sqlConv("edge.name"), m.sqlConv("node.*"))
			} else {
				s = s.Cols(m.sqlConv("edge.id"), m.sqlConv("edge.name"), m.sqlConv("edge.type"))
			}
			var nodes []namedNode
			if err := s.Find(&nodes); err != nil {
				return err
			}

			for _, n := range nodes {
				if len(n.Name) == 0 {
					logger.Errorf("Corrupt entry with empty name: inode %d parent %d", n.Inode, inode)
					continue
				}
				entry := &Entry{
					Inode: n.Inode,
					Name:  n.Name,
					Attr:  &Attr{},
				}
				if plus {
					m.parseAttr(&n.node, entry.Attr)
					m.of.Update(n.Inode, entry.Attr)
				} else {
					entry.Attr.Typ = n.Type
				}
				entries = append(entries, entry)
			}
			return nil
		})
		if err != nil {
			return nil, nil, err
		}
		if len(entries) == 0 {
			return nil, nil, nil
		}
		return entries[len(entries)-1].Name, entries, nil
	}
}


================================================
FILE: pkg/meta/sql_bak.go
================================================
//go:build !nosqlite || !nomysql || !nopg
// +build !nosqlite !nomysql !nopg

/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"database/sql"
	"fmt"
	"strings"
	"sync"
	"sync/atomic"

	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/meta/pb"
	"github.com/pkg/errors"
	"golang.org/x/sync/errgroup"
	"google.golang.org/protobuf/proto"
	"xorm.io/xorm"
)

var (
	sqlDumpBatchSize = 100000
)

func (m *dbMeta) dump(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var dumps = []func(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error{
		m.dumpFormat,
		m.dumpCounters,
		m.dumpNodes,
		m.dumpChunks,
		m.dumpEdges,
		m.dumpSymlinks,
		m.dumpSustained,
		m.dumpDelFiles,
		m.dumpSliceRef,
		m.dumpACL,
		m.dumpXattr,
		m.dumpQuota,
		m.dumpDirStat,
	}

	ctx = ctx.WithValue(txMaxRetryKey{}, 3)
	if opt.Threads == 1 {
		// use same txn for all dumps
		sess := m.db.NewSession()
		defer sess.Close()

		opt := sql.TxOptions{
			Isolation: sql.LevelRepeatableRead,
			ReadOnly:  true,
		}
		err := sess.BeginTx(&opt)
		if err != nil && (strings.Contains(err.Error(), "READ") || strings.Contains(err.Error(), "driver does not support read-only transactions")) {
			logger.Warnf("the database does not support read-only transaction")
			opt = sql.TxOptions{} // use default level
			if err = sess.BeginTx(&opt); err != nil {
				return err
			}
		}
		defer sess.Rollback() //nolint:errcheck
		ctx = ctx.WithValue(txSessionKey{}, sess)
	} else {
		logger.Warnf("dump database with %d threads, please make sure that it's readonly, "+
			"otherwise the dumped metadata will be inconsistent", opt.Threads)
	}
	for _, f := range dumps {
		err := f(ctx, opt, ch)
		if err != nil {
			return err
		}
	}
	return nil
}

func (m *dbMeta) execTxn(ctx context.Context, f func(s *xorm.Session) error) error {
	if val := ctx.Value(txSessionKey{}); val != nil {
		return f(val.(*xorm.Session))
	}
	return m.roTxn(ctx, f)
}

func sqlQueryBatch(ctx Context, opt *DumpOption, maxId uint64, query func(ctx context.Context, start, end uint64) (int, error)) error {
	eg, egCtx := errgroup.WithContext(ctx)
	eg.SetLimit(opt.Threads)

	sum := int64(0)
	batch := uint64(sqlDumpBatchSize)
	for id := uint64(0); id <= maxId; id += batch {
		startId := id
		eg.Go(func() error {
			n, err := query(egCtx, startId, startId+batch)
			atomic.AddInt64(&sum, int64(n))
			return err
		})
	}
	logger.Debugf("dump %d rows", sum)
	return eg.Wait()
}

func (m *dbMeta) dumpNodes(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	pool := sync.Pool{New: func() interface{} { return &pb.Node{} }}
	release := func(p proto.Message) {
		for _, s := range p.(*pb.Batch).Nodes {
			pool.Put(s)
		}
	}

	var rows []node
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Where("inode >= ?", TrashInode).Find(&rows)
	}); err != nil {
		return err
	}
	nodes := make([]*pb.Node, 0, len(rows))
	var attr Attr
	for _, n := range rows {
		pn := pool.Get().(*pb.Node)
		pn.Inode = uint64(n.Inode)
		m.parseAttr(&n, &attr)
		pn.Data = m.marshal(&attr)
		nodes = append(nodes, pn)
	}
	if err := dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Nodes: nodes}, release}); err != nil {
		return errors.Wrap(err, "dump trash nodes")
	}

	var maxInode uint64
	err := m.execTxn(ctx, func(s *xorm.Session) error {
		var row node
		ok, err := s.Select("max(inode) as inode").Where("inode < ?", TrashInode).Get(&row)
		if ok {
			maxInode = uint64(row.Inode)
		}
		return err
	})
	if err != nil {
		return errors.Wrap(err, "max inode")
	}

	return sqlQueryBatch(ctx, opt, maxInode, func(ctx context.Context, start, end uint64) (int, error) {
		var rows []node
		if err := m.execTxn(ctx, func(s *xorm.Session) error {
			return s.Where("inode >= ? AND inode < ?", start, end).Find(&rows)
		}); err != nil {
			return 0, err
		}
		nodes := make([]*pb.Node, 0, len(rows))
		var attr Attr
		for _, n := range rows {
			pn := pool.Get().(*pb.Node)
			pn.Inode = uint64(n.Inode)
			m.parseAttr(&n, &attr)
			pn.Data = m.marshal(&attr)
			nodes = append(nodes, pn)
		}
		return len(rows), dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Nodes: nodes}, release})
	})
}

func (m *dbMeta) dumpChunks(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	pool := sync.Pool{New: func() interface{} { return &pb.Chunk{} }}
	release := func(p proto.Message) {
		for _, s := range p.(*pb.Batch).Chunks {
			pool.Put(s)
		}
	}

	var maxId uint64
	err := m.execTxn(ctx, func(s *xorm.Session) error {
		var row chunk
		ok, err := s.Select("MAX(id) as id").Get(&row)
		if ok {
			maxId = uint64(row.Id)
		}
		return err
	})
	if err != nil {
		return err
	}

	return sqlQueryBatch(ctx, opt, maxId, func(ctx context.Context, start, end uint64) (int, error) {
		var rows []chunk
		if err := m.execTxn(ctx, func(s *xorm.Session) error {
			return s.Where("id >= ? AND id < ?", start, end).Find(&rows)
		}); err != nil {
			return 0, err
		}
		chunks := make([]*pb.Chunk, 0, len(rows))
		for _, c := range rows {
			pc := pool.Get().(*pb.Chunk)
			pc.Inode = uint64(c.Inode)
			pc.Index = c.Indx
			pc.Slices = c.Slices
			chunks = append(chunks, pc)
		}
		return len(rows), dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Chunks: chunks}, release})
	})
}

func (m *dbMeta) dumpEdges(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	pool := sync.Pool{New: func() interface{} { return &pb.Edge{} }}
	release := func(p proto.Message) {
		for _, s := range p.(*pb.Batch).Edges {
			pool.Put(s)
		}
	}

	var maxId uint64
	err := m.execTxn(ctx, func(s *xorm.Session) error {
		var row edge
		ok, err := s.Select("MAX(id) as id").Get(&row)
		if ok {
			maxId = uint64(row.Id)
		}
		return err
	})
	if err != nil {
		return err
	}

	var mu sync.Mutex
	dumpParents := make(map[uint64][]uint64)
	err = sqlQueryBatch(ctx, opt, maxId, func(ctx context.Context, start, end uint64) (int, error) {
		var rows []edge
		if err := m.execTxn(ctx, func(s *xorm.Session) error {
			return s.Where("id >= ? AND id < ?", start, end).Find(&rows)
		}); err != nil {
			return 0, err
		}
		edges := make([]*pb.Edge, 0, len(rows))
		for _, e := range rows {
			pe := pool.Get().(*pb.Edge)
			pe.Parent = uint64(e.Parent)
			pe.Inode = uint64(e.Inode)
			pe.Name = e.Name
			pe.Type = uint32(e.Type)
			edges = append(edges, pe)
			mu.Lock()
			dumpParents[uint64(e.Inode)] = append(dumpParents[uint64(e.Inode)], uint64(e.Parent))
			mu.Unlock()
		}
		return len(rows), dumpResult(ctx, ch, &dumpedResult{&pb.Batch{Edges: edges}, release})
	})
	if err != nil {
		return err
	}

	parents := make([]*pb.Parent, 0, sqlDumpBatchSize)
	st := make(map[uint64]int64)
	for inode, ps := range dumpParents {
		if len(ps) > 1 {
			for k := range st {
				delete(st, k)
			}
			for _, p := range ps {
				st[p] = st[p] + 1
			}
			for parent, cnt := range st {
				parents = append(parents, &pb.Parent{Inode: inode, Parent: parent, Cnt: cnt})
			}
		}
		if len(parents) >= sqlDumpBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Parents: parents}}); err != nil {
				return err
			}
			parents = make([]*pb.Parent, 0, sqlDumpBatchSize)
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Parents: parents}})
}

func (m *dbMeta) dumpSymlinks(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []symlink
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}

	symlinks := make([]*pb.Symlink, 0, min(len(rows), sqlDumpBatchSize))
	for i, r := range rows {
		symlinks = append(symlinks, &pb.Symlink{Inode: uint64(r.Inode), Target: r.Target})
		if len(symlinks) >= sqlDumpBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Symlinks: symlinks}}); err != nil {
				return err
			}
			symlinks = make([]*pb.Symlink, 0, min(len(rows)-i-1, sqlDumpBatchSize))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Symlinks: symlinks}})
}

func (m *dbMeta) dumpCounters(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []counter
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	var counters = make([]*pb.Counter, 0, len(rows))
	for _, row := range rows {
		counters = append(counters, &pb.Counter{Key: row.Name, Value: row.Value})
	}
	logger.Debugf("dump counters %+v", counters)
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Counters: counters}})
}

func (m *dbMeta) dumpSustained(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []sustained
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	ss := make(map[uint64][]uint64)
	for _, row := range rows {
		ss[row.Sid] = append(ss[row.Sid], uint64(row.Inode))
	}
	sustained := make([]*pb.Sustained, 0, len(rows))
	for k, v := range ss {
		sustained = append(sustained, &pb.Sustained{Sid: k, Inodes: v})
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Sustained: sustained}})
}

func (m *dbMeta) dumpDelFiles(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []delfile
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	delFiles := make([]*pb.DelFile, 0, min(sqlDumpBatchSize, len(rows)))
	for i, row := range rows {
		delFiles = append(delFiles, &pb.DelFile{Inode: uint64(row.Inode), Length: row.Length, Expire: row.Expire})
		if len(delFiles) >= sqlDumpBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Delfiles: delFiles}}); err != nil {
				return err
			}
			delFiles = make([]*pb.DelFile, 0, min(sqlDumpBatchSize, len(rows)-i-1))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Delfiles: delFiles}})
}

func (m *dbMeta) dumpSliceRef(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []sliceRef
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Where("refs != 1").Find(&rows) // skip default refs
	}); err != nil {
		return err
	}
	sliceRefs := make([]*pb.SliceRef, 0, min(sqlDumpBatchSize, len(rows)))
	for i, sr := range rows {
		sliceRefs = append(sliceRefs, &pb.SliceRef{Id: sr.Id, Size: sr.Size, Refs: int64(sr.Refs)})
		if len(sliceRefs) >= sqlDumpBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{SliceRefs: sliceRefs}}); err != nil {
				return err
			}
			sliceRefs = make([]*pb.SliceRef, 0, min(sqlDumpBatchSize, len(rows)-i-1))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{SliceRefs: sliceRefs}})
}

func (m *dbMeta) dumpACL(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []acl
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	acls := make([]*pb.Acl, 0, len(rows))
	for _, row := range rows {
		acls = append(acls, &pb.Acl{
			Id:   row.Id,
			Data: row.toRule().Encode(),
		})
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Acls: acls}})
}

func (m *dbMeta) dumpXattr(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []xattr
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	xattrs := make([]*pb.Xattr, 0, min(sqlDumpBatchSize, len(rows)))
	for i, x := range rows {
		xattrs = append(xattrs, &pb.Xattr{
			Inode: uint64(x.Inode),
			Name:  x.Name,
			Value: x.Value,
		})
		if len(xattrs) >= sqlDumpBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Xattrs: xattrs}}); err != nil {
				return err
			}
			xattrs = make([]*pb.Xattr, 0, min(sqlDumpBatchSize, len(rows)-i-1))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Xattrs: xattrs}})
}

func (m *dbMeta) dumpQuota(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []dirQuota
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	quotas := make([]*pb.Quota, 0, len(rows))
	for _, q := range rows {
		quotas = append(quotas, &pb.Quota{
			Inode:      uint64(q.Inode),
			MaxSpace:   q.MaxSpace,
			MaxInodes:  q.MaxInodes,
			UsedSpace:  q.UsedSpace,
			UsedInodes: q.UsedInodes,
		})
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Quotas: quotas}})
}

func (m *dbMeta) dumpDirStat(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var rows []dirStats
	if err := m.execTxn(ctx, func(s *xorm.Session) error {
		return s.Find(&rows)
	}); err != nil {
		return err
	}
	dirStats := make([]*pb.Stat, 0, min(sqlDumpBatchSize, len(rows)))
	for i, st := range rows {
		dirStats = append(dirStats, &pb.Stat{
			Inode:      uint64(st.Inode),
			DataLength: st.DataLength,
			UsedInodes: st.UsedInodes,
			UsedSpace:  st.UsedSpace,
		})
		if len(dirStats) >= sqlDumpBatchSize {
			if err := dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Dirstats: dirStats}}); err != nil {
				return err
			}
			dirStats = make([]*pb.Stat, 0, min(sqlDumpBatchSize, len(rows)-i-1))
		}
	}
	return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Dirstats: dirStats}})
}

func (m *dbMeta) load(ctx Context, typ int, opt *LoadOption, val proto.Message) error {
	switch typ {
	case segTypeFormat:
		return m.loadFormat(ctx, val)
	case segTypeCounter:
		return m.loadCounters(ctx, val)
	case segTypeNode:
		return m.loadNodes(ctx, val)
	case segTypeChunk:
		return m.loadChunks(ctx, val)
	case segTypeEdge:
		return m.loadEdges(ctx, val)
	case segTypeSymlink:
		return m.loadSymlinks(ctx, val)
	case segTypeSustained:
		return m.loadSustained(ctx, val)
	case segTypeDelFile:
		return m.loadDelFiles(ctx, val)
	case segTypeSliceRef:
		return m.loadSliceRefs(ctx, val)
	case segTypeAcl:
		return m.loadAcl(ctx, val)
	case segTypeXattr:
		return m.loadXattrs(ctx, val)
	case segTypeQuota:
		return m.loadQuota(ctx, val)
	case segTypeStat:
		return m.loadDirStats(ctx, val)
	case segTypeParent:
		return nil // skip
	default:
		logger.Warnf("skip segment type %d", typ)
		return nil
	}
}

func (m *dbMeta) loadFormat(ctx Context, msg proto.Message) error {
	return m.insertRows([]interface{}{
		&setting{
			Name:  "format",
			Value: string(msg.(*pb.Format).Data),
		},
	})
}

func (m *dbMeta) loadCounters(ctx Context, msg proto.Message) error {
	var rows []interface{}
	for _, c := range msg.(*pb.Batch).Counters {
		rows = append(rows, counter{Name: c.Key, Value: c.Value})
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadNodes(ctx Context, msg proto.Message) error {
	nodes := msg.(*pb.Batch).Nodes
	b := m.getBase()
	rows := make([]interface{}, 0, len(nodes))
	ns := make([]node, len(nodes))
	attr := &Attr{}
	for i, n := range nodes {
		pn := &ns[i]
		pn.Inode = Ino(n.Inode)
		attr.reset()
		b.parseAttr(n.Data, attr)
		m.parseNode(attr, pn)
		rows = append(rows, pn)
	}
	return m.insertRows(rows)
}

func genMultiSQL(stmt string, num int) string {
	if num <= 0 {
		return ""
	}
	if num == 1 {
		return stmt
	}
	pattern := "(?,?,?)"
	idx := strings.Index(stmt, pattern)
	if idx == -1 {
		return stmt
	}
	values := strings.Repeat(pattern+",", num)
	values = values[:len(values)-1]
	return stmt[:idx] + values + stmt[idx+len(pattern):]
}

func insertSliceRefs(m *dbMeta, ss []*sliceRef) error {
	driver := m.Name()
	var stmt string
	if driver == "sqlite3" || driver == "postgres" {
		stmt = m.sqlConv(`INSERT INTO chunk_ref (chunkid, size, refs) VALUES (?,?,?) ON CONFLICT DO NOTHING`)
	} else {
		stmt = m.sqlConv(`INSERT IGNORE INTO chunk_ref (chunkid, size, refs) VALUES (?,?,?)`)
	}

	batch := m.getTxnBatchNum()
	for len(ss) > 0 {
		bs := min(batch, len(ss))
		err := m.txn(func(s *xorm.Session) error {
			nStmt := genMultiSQL(stmt, bs)
			rows := make([]interface{}, 0, 1+bs*3)
			rows = append(rows, nStmt)
			for i := 0; i < bs; i++ {
				rows = append(rows, ss[i].Id, ss[i].Size, ss[i].Refs)
			}
			_, err := s.Exec(rows...)
			return err
		})
		if err != nil {
			logger.Errorf("write %d slice ref: %s", bs, err)
			return err
		}
		ss = ss[bs:]
	}
	return nil
}

func (m *dbMeta) loadChunks(ctx Context, msg proto.Message) error {
	chunks := msg.(*pb.Batch).Chunks
	chkRows := make([]interface{}, 0, len(chunks))
	srRows := make([]*sliceRef, 0, len(chunks))
	cs := make([]chunk, len(chunks))
	for i, c := range chunks {
		pc := &cs[i]
		pc.Inode = Ino(c.Inode)
		pc.Indx = c.Index
		pc.Slices = c.Slices
		chkRows = append(chkRows, pc)

		ss := readSliceBuf(c.Slices)
		for _, s := range ss {
			srRows = append(srRows, &sliceRef{Id: s.id, Size: s.size, Refs: 1})
		}
	}
	if err := m.insertRows(chkRows); err != nil {
		return err
	}
	return insertSliceRefs(m, srRows)
}

func (m *dbMeta) loadEdges(ctx Context, msg proto.Message) error {
	edges := msg.(*pb.Batch).Edges
	rows := make([]interface{}, 0, len(edges))
	es := make([]edge, len(edges))
	for i, e := range edges {
		pe := &es[i]
		pe.Parent = Ino(e.Parent)
		pe.Inode = Ino(e.Inode)
		pe.Name = e.Name
		pe.Type = uint8(e.Type)
		rows = append(rows, pe)
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadSymlinks(ctx Context, msg proto.Message) error {
	symlinks := msg.(*pb.Batch).Symlinks
	rows := make([]interface{}, 0, len(symlinks))
	for _, sl := range symlinks {
		rows = append(rows, &symlink{Ino(sl.Inode), sl.Target})
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadSustained(ctx Context, msg proto.Message) error {
	sustaineds := msg.(*pb.Batch).Sustained
	rows := make([]interface{}, 0, len(sustaineds))
	for _, s := range sustaineds {
		for _, inode := range s.Inodes {
			rows = append(rows, sustained{Sid: s.Sid, Inode: Ino(inode)})
		}
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadDelFiles(ctx Context, msg proto.Message) error {
	delfiles := msg.(*pb.Batch).Delfiles
	rows := make([]interface{}, 0, len(delfiles))
	for _, f := range delfiles {
		rows = append(rows, &delfile{Inode: Ino(f.Inode), Length: f.Length, Expire: f.Expire})
	}
	return m.insertRows(rows)
}

func (m *dbMeta) upsertSliceRef(s *xorm.Session, id uint64, size uint32, refs int) error {
	var err error
	driver := m.Name()
	if driver == "sqlite3" || driver == "postgres" {
		state := m.sqlConv(`
			 INSERT INTO chunk_ref (chunkid, size, refs)
			 VALUES (?, ?, ?)
			 ON CONFLICT (chunkid)
			 DO UPDATE SET size=?, refs=?`)
		_, err = s.Exec(state, id, size, refs, size, refs)
	} else {
		_, err = s.Exec(m.sqlConv(`
			 INSERT INTO chunk_ref (chunkid, size, refs)
			 VALUES (?, ?, ?)
			 ON DUPLICATE KEY UPDATE
			 size=?, refs=?`), id, size, refs, size, refs)
	}
	return err
}

func (m *dbMeta) loadSliceRefs(ctx Context, msg proto.Message) error {
	batch := m.getTxnBatchNum()
	srs := msg.(*pb.Batch).SliceRefs
	for len(srs) > 0 {
		num := min(batch, len(srs))
		err := m.txn(func(s *xorm.Session) error {
			var err error
			for i := 0; i < num; i++ {
				if err = m.upsertSliceRef(s, srs[i].Id, srs[i].Size, int(srs[i].Refs)); err != nil {
					return err
				}
			}
			return nil
		})
		if err != nil {
			logger.Errorf("Write %d beans: %s", num, err)
			return err
		}
		srs = srs[num:]
	}
	return nil
}

func (m *dbMeta) loadAcl(ctx Context, msg proto.Message) error {
	acls := msg.(*pb.Batch).Acls
	rows := make([]interface{}, 0, len(acls))
	for _, pa := range acls {
		rule := &aclAPI.Rule{}
		rule.Decode(pa.Data)
		acl := newSQLAcl(rule)
		acl.Id = pa.Id
		rows = append(rows, acl)
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadXattrs(ctx Context, msg proto.Message) error {
	xattrs := msg.(*pb.Batch).Xattrs
	rows := make([]interface{}, 0, len(xattrs))
	for _, x := range xattrs {
		rows = append(rows, &xattr{Inode: Ino(x.Inode), Name: x.Name, Value: x.Value})
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadQuota(ctx Context, msg proto.Message) error {
	quotas := msg.(*pb.Batch).Quotas
	rows := make([]interface{}, 0, len(quotas))
	for _, q := range quotas {
		rows = append(rows, &dirQuota{
			Inode:      Ino(q.Inode),
			MaxSpace:   q.MaxSpace,
			MaxInodes:  q.MaxInodes,
			UsedSpace:  q.UsedSpace,
			UsedInodes: q.UsedInodes,
		})
	}
	return m.insertRows(rows)
}

func (m *dbMeta) loadDirStats(ctx Context, msg proto.Message) error {
	stats := msg.(*pb.Batch).Dirstats
	rows := make([]interface{}, 0, len(stats))
	for _, st := range stats {
		rows = append(rows, &dirStats{
			Inode:      Ino(st.Inode),
			DataLength: st.DataLength,
			UsedInodes: st.UsedInodes,
			UsedSpace:  st.UsedSpace,
		})
	}
	return m.insertRows(rows)
}

func (m *dbMeta) insertRows(beans []interface{}) error {
	batch := m.getTxnBatchNum()
	for len(beans) > 0 {
		bs := min(batch, len(beans))
		err := m.txn(func(s *xorm.Session) error {
			n, err := s.Insert(beans[:bs])
			if err == nil && int(n) != bs {
				err = fmt.Errorf("only %d records inserted", n)
			}
			return err
		})
		if err != nil {
			logger.Errorf("Write %d beans: %s", bs, err)
			return err
		}
		beans = beans[bs:]
	}
	return nil
}

func (m *dbMeta) prepareLoad(ctx Context, opt *LoadOption) error {
	opt.check()
	if err := m.checkAddr(); err != nil {
		return err
	}
	if err := m.syncAllTables(); err != nil {
		return err
	}
	return nil
}


================================================
FILE: pkg/meta/sql_lock.go
================================================
//go:build !nosqlite || !nomysql || !nopg
// +build !nosqlite !nomysql !nopg

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"context"
	"fmt"
	"syscall"
	"time"
	"xorm.io/xorm"
)

func (m *dbMeta) Flock(ctx Context, inode Ino, owner_ uint64, ltype uint32, block bool) syscall.Errno {
	owner := int64(owner_)
	if ltype == F_UNLCK {
		return errno(m.txn(func(s *xorm.Session) error {
			_, err := s.MustCols("inode", "owner", "sid").Delete(&flock{Inode: inode, Owner: owner, Sid: m.sid})
			return err
		}, inode))
	}
	var err syscall.Errno
	for {
		err = errno(m.txn(func(s *xorm.Session) error {
			if exists, err := s.ForUpdate().Get(&node{Inode: inode}); err != nil || !exists {
				if err == nil && !exists {
					err = syscall.ENOENT
				}
				return err
			}
			var fs []flock
			err := s.ForUpdate().Find(&fs, &flock{Inode: inode})
			if err != nil {
				return err
			}
			type key struct {
				sid uint64
				o   int64
			}
			var locks = make(map[key]flock)
			for _, l := range fs {
				locks[key{l.Sid, l.Owner}] = l
			}

			me := key{m.sid, owner}
			flk, ok := locks[me]
			delete(locks, me)
			var typec byte = 'W'
			if ltype == F_RDLCK {
				for _, l := range locks {
					if l.Ltype == 'W' {
						return syscall.EAGAIN
					}
				}
				typec = 'R'
			} else if len(locks) > 0 {
				return syscall.EAGAIN
			}
			var n int64
			if ok {
				if flk.Ltype != typec {
					n, err = s.MustCols("inode", "owner", "sid").Cols("Ltype").Update(&flock{Ltype: typec}, &flock{Inode: inode, Owner: owner, Sid: m.sid})
				} else {
					n = 1
				}
			} else {
				n, err = s.InsertOne(&flock{Inode: inode, Owner: owner, Ltype: typec, Sid: m.sid})
			}
			if err == nil && n == 0 {
				err = fmt.Errorf("insert/update failed")
			}
			return err
		}, inode))

		if !block || err != syscall.EAGAIN {
			break
		}
		if ltype == F_WRLCK {
			time.Sleep(time.Millisecond * 1)
		} else {
			time.Sleep(time.Millisecond * 10)
		}
		if ctx.Canceled() {
			return syscall.EINTR
		}
	}
	return err
}

func (m *dbMeta) Getlk(ctx Context, inode Ino, owner_ uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno {
	if *ltype == F_UNLCK {
		*start = 0
		*end = 0
		*pid = 0
		return 0
	}

	owner := int64(owner_)
	rows, err := m.db.Rows(&plock{Inode: inode})
	if err != nil {
		return errno(err)
	}
	type key struct {
		sid uint64
		o   int64
	}
	var locks = make(map[key][]byte)
	var l plock
	for rows.Next() {
		l.Records = nil
		if rows.Scan(&l) == nil && !(l.Sid == m.sid && l.Owner == owner) {
			locks[key{l.Sid, l.Owner}] = dup(l.Records)
		}
	}
	_ = rows.Close()

	for k, d := range locks {
		ls := loadLocks(d)
		for _, l := range ls {
			// find conflicted locks
			if (*ltype == F_WRLCK || l.Type == F_WRLCK) && *end >= l.Start && *start <= l.End {
				*ltype = l.Type
				*start = l.Start
				*end = l.End
				if k.sid == m.sid {
					*pid = l.Pid
				} else {
					*pid = 0
				}
				return 0
			}
		}
	}
	*ltype = F_UNLCK
	*start = 0
	*end = 0
	*pid = 0
	return 0
}

func (m *dbMeta) Setlk(ctx Context, inode Ino, owner_ uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno {
	var err syscall.Errno
	lock := plockRecord{ltype, pid, start, end}
	owner := int64(owner_)
	for {
		err = errno(m.txn(func(s *xorm.Session) error {
			if exists, err := s.ForUpdate().Get(&node{Inode: inode}); err != nil || !exists {
				if err == nil && !exists {
					err = syscall.ENOENT
				}
				return err
			}
			if ltype == F_UNLCK {
				var l = plock{Inode: inode, Owner: owner, Sid: m.sid}
				ok, err := s.ForUpdate().MustCols("inode", "owner", "sid").Get(&l)
				if err != nil {
					return err
				}
				if !ok {
					return nil
				}
				ls := loadLocks(l.Records)
				if len(ls) == 0 {
					return nil
				}
				ls = updateLocks(ls, lock)
				if len(ls) == 0 {
					_, err = s.MustCols("inode", "owner", "sid").Delete(&plock{Inode: inode, Owner: owner, Sid: m.sid})
				} else {
					_, err = s.MustCols("inode", "owner", "sid").Cols("records").Update(plock{Records: dumpLocks(ls)}, l)
				}
				return err
			}
			var ps []plock
			err := s.ForUpdate().Find(&ps, &plock{Inode: inode})
			if err != nil {
				return err
			}
			type key struct {
				sid   uint64
				owner int64
			}
			var locks = make(map[key][]byte)
			for _, l := range ps {
				locks[key{l.Sid, l.Owner}] = l.Records
			}
			lkey := key{m.sid, owner}
			for k, d := range locks {
				if k == lkey {
					continue
				}
				ls := loadLocks(d)
				for _, l := range ls {
					// find conflicted locks
					if (ltype == F_WRLCK || l.Type == F_WRLCK) && end >= l.Start && start <= l.End {
						return syscall.EAGAIN
					}
				}
			}
			ls := updateLocks(loadLocks(locks[lkey]), lock)
			var n int64
			records := dumpLocks(ls)
			if len(locks[lkey]) > 0 {
				if !bytes.Equal(locks[lkey], records) {
					n, err = s.MustCols("inode", "owner", "sid").Cols("records").Update(plock{Records: records},
						&plock{Inode: inode, Sid: m.sid, Owner: owner})
				} else {
					n = 1
				}
			} else {
				n, err = s.InsertOne(&plock{Inode: inode, Sid: m.sid, Owner: owner, Records: records})
			}
			if err == nil && n == 0 {
				err = fmt.Errorf("insert/update failed")
			}
			return err
		}, inode))

		if !block || err != syscall.EAGAIN {
			break
		}
		if ltype == F_WRLCK {
			time.Sleep(time.Millisecond * 1)
		} else {
			time.Sleep(time.Millisecond * 10)
		}
		if ctx.Canceled() {
			return syscall.EINTR
		}
	}
	return err
}

func (r *dbMeta) ListLocks(ctx context.Context, inode Ino) ([]PLockItem, []FLockItem, error) {
	var fs []flock
	if err := r.db.Find(&fs, &flock{Inode: inode}); err != nil {
		return nil, nil, err
	}

	flocks := make([]FLockItem, 0, len(fs))
	for _, f := range fs {
		flocks = append(flocks, FLockItem{ownerKey{f.Sid, uint64(f.Owner)}, string(f.Ltype)})
	}

	var ps []plock
	if err := r.db.Find(&ps, &plock{Inode: inode}); err != nil {
		return nil, nil, err
	}

	plocks := make([]PLockItem, 0)
	for _, p := range ps {
		ls := loadLocks(p.Records)
		for _, l := range ls {
			plocks = append(plocks, PLockItem{ownerKey{p.Sid, uint64(p.Owner)}, l})
		}
	}
	return plocks, flocks, nil
}


================================================
FILE: pkg/meta/sql_mysql.go
================================================
//go:build !nomysql
// +build !nomysql

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"github.com/go-sql-driver/mysql"
)

func isMySQLDuplicateEntryErr(err error) bool {
	if e, ok := err.(*mysql.MySQLError); ok {
		return e.Number == 1062
	}
	return false
}

func setMySQLTransactionIsolation(dns string) (string, error) {
	cfg, err := mysql.ParseDSN(dns)
	if err != nil {
		return "", err
	}
	if cfg.Params == nil {
		cfg.Params = make(map[string]string)
	}
	cfg.Params["transaction_isolation"] = "'repeatable-read'"
	return cfg.FormatDSN(), nil
}

func init() {
	dupErrorCheckers = append(dupErrorCheckers, isMySQLDuplicateEntryErr)
	setTransactionIsolation = setMySQLTransactionIsolation
	Register("mysql", newSQLMeta)
}


================================================
FILE: pkg/meta/sql_pg.go
================================================
//go:build !nopg
// +build !nopg

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"github.com/jackc/pgx/v5/pgconn"
	_ "github.com/jackc/pgx/v5/stdlib"
)

func isPGDuplicateEntryErr(err error) bool {
	if e, ok := err.(*pgconn.PgError); ok {
		return e.Code == "23505"
	}
	return false
}

func init() {
	dupErrorCheckers = append(dupErrorCheckers, isPGDuplicateEntryErr)
	Register("postgres", newSQLMeta)
}


================================================
FILE: pkg/meta/sql_sqlite.go
================================================
//go:build !nosqlite
// +build !nosqlite

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"github.com/mattn/go-sqlite3"
)

func isSQLiteDuplicateEntryErr(err error) bool {
	if e, ok := err.(sqlite3.Error); ok {
		return e.Code == sqlite3.ErrConstraint
	}
	return false
}

func init() {
	errBusy = sqlite3.ErrBusy
	dupErrorCheckers = append(dupErrorCheckers, isSQLiteDuplicateEntryErr)
	Register("sqlite3", newSQLMeta)
}


================================================
FILE: pkg/meta/sql_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//nolint:errcheck
package meta

import (
	"net/url"
	"os"
	"path"
	"strings"
	"testing"
)

func TestSQLiteClient(t *testing.T) {
	m, err := newSQLMeta("sqlite3", path.Join(t.TempDir(), "jfs-unit-test.db"), testConfig())
	if err != nil || m.Name() != "sqlite3" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestMySQLClient(t *testing.T) { //skip mutate
	m, err := newSQLMeta("mysql", "root:@/dev", testConfig())
	if err != nil || m.Name() != "mysql" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestPostgreSQLClient(t *testing.T) { //skip mutate
	if os.Getenv("SKIP_NON_CORE") == "true" {
		t.Skipf("skip non-core test")
	}
	m, err := newSQLMeta("postgres", "localhost:5432/test?sslmode=disable", testConfig())
	if err != nil || m.Name() != "postgres" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestPostgreSQLClientWithSearchPath(t *testing.T) { //skip mutate
	_, err := newSQLMeta("postgres", "localhost:5432/test?sslmode=disable&search_path=juicefs,public", testConfig())
	if !strings.Contains(err.Error(), "currently, only one schema is supported in search_path") {
		t.Fatalf("TestPostgreSQLClientWithSearchPath error: %s", err)
	}
}

func TestRecoveryMysqlPwd(t *testing.T) { //skip mutate
	testCase := []struct {
		addr   string
		expect string
	}{
		// no password
		{"root@(localhost:3306)/db1",
			"root@(localhost:3306)/db1",
		},
		// no password
		{"root:@(localhost:3306)/db1",
			"root:@(localhost:3306)/db1",
		},

		{"root::@@(localhost:3306)/db1",
			"root::@@(localhost:3306)/db1",
		},

		{"root:@:@(localhost:3306)/db1",
			"root:@:@(localhost:3306)/db1",
		},

		// no special char
		{"root:password@(localhost:3306)/db1",
			"root:password@(localhost:3306)/db1",
		},

		// set from env @
		{"root:pass%40word@(localhost:3306)/db1",
			"root:pass@word@(localhost:3306)/db1",
		},

		// direct pass special char @
		{"root:pass@word@(localhost:3306)/db1",
			"root:pass@word@(localhost:3306)/db1",
		},

		// set from env |
		{"root:pass%7Cword@(localhost:3306)/db1",
			"root:pass|word@(localhost:3306)/db1",
		},

		// direct pass special char |
		{"root:pass|word@(localhost:3306)/db1",
			"root:pass|word@(localhost:3306)/db1",
		},

		// set from env :
		{"root:pass%3Aword@(localhost:3306)/db1",
			"root:pass:word@(localhost:3306)/db1",
		},

		// direct pass special char :
		{"root:pass:word@(localhost:3306)/db1",
			"root:pass:word@(localhost:3306)/db1",
		},
	}
	for _, tc := range testCase {
		if got := recoveryMysqlPwd(tc.addr); got != tc.expect {
			t.Fatalf("recoveryMysqlPwd error: expect %s but got %s", tc.expect, got)
		}
	}
}

func TestGetCustomConfig(t *testing.T) {
	u := "mysql://root:password@tcp(localhost:3306)/db1?max_open_conns=100&notDefine=str"
	_, after, _ := strings.Cut(u, "?")
	query, err := url.ParseQuery(after)
	if err != nil {
		t.Fatalf("url parse query error: %s", err)
	}
	maxOpenConns, err := extractCustomConfig(&query, "max_open_conns", 1)
	if err != nil {
		t.Fatalf("getCustomConfig error: %s", err)
	}
	if maxOpenConns != 100 {
		t.Fatalf("getCustomConfig error: expect 100 but got %d", maxOpenConns)
	}
	if query.Has("max_open_conns") {
		t.Fatalf("getCustomConfig error: expect not found but found")
	}

	not, err := extractCustomConfig(&query, "notSetKey", "default")
	if err != nil {
		t.Fatalf("getCustomConfig error: %s", err)
	}
	if not != "default" {
		t.Fatalf("getCustomConfig error: expect default but got %s", not)
	}
	if !query.Has("notDefine") {
		t.Fatalf("getCustomConfig error: expect found but not")
	}

}


================================================
FILE: pkg/meta/status.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"fmt"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
)

// Statistic contains the statistics of the filesystem
type Statistic struct {
	UsedSpace                uint64
	AvailableSpace           uint64
	UsedInodes               uint64
	AvailableInodes          uint64
	TrashFileCount           int64 `json:",omitempty"`
	TrashFileSize            int64 `json:",omitempty"`
	PendingDeletedFileCount  int64 `json:",omitempty"`
	PendingDeletedFileSize   int64 `json:",omitempty"`
	TrashSliceCount          int64 `json:",omitempty"`
	TrashSliceSize           int64 `json:",omitempty"`
	PendingDeletedSliceCount int64 `json:",omitempty"`
	PendingDeletedSliceSize  int64 `json:",omitempty"`
}

type Sections struct {
	Setting  *Format
	Sessions []*Session
	Stat     *Statistic
}

// Status retrieves the status of the filesystem
func Status(ctx context.Context, m Meta, trash bool, sections *Sections) error {
	format, err := m.Load(true)
	if err != nil {
		return fmt.Errorf("load setting: %v", err)
	}
	format.RemoveSecret()

	sessions, err := m.ListSessions()
	if err != nil {
		return fmt.Errorf("list sessions: %v", err)
	}

	stat := &Statistic{}
	var totalSpace uint64
	if err = m.StatFS(Background(), RootInode, &totalSpace, &stat.AvailableSpace, &stat.UsedInodes, &stat.AvailableInodes); err != syscall.Errno(0) {
		return fmt.Errorf("stat fs: %v", err)
	}
	stat.UsedSpace = totalSpace - stat.AvailableSpace

	if trash {
		progress := utils.NewProgress(false)
		trashFileSpinner := progress.AddDoubleSpinner("Trash Files")
		pendingDeletedFileSpinner := progress.AddDoubleSpinner("Pending Deleted Files")
		trashSlicesSpinner := progress.AddDoubleSpinner("Trash Slices")
		pendingDeletedSlicesSpinner := progress.AddDoubleSpinner("Pending Deleted Slices")
		err = m.ScanDeletedObject(
			WrapContext(ctx),
			func(ss []Slice, _ int64) (bool, error) {
				for _, s := range ss {
					trashSlicesSpinner.IncrInt64(int64(s.Size))
				}
				return false, nil
			},
			func(_ uint64, size uint32) (bool, error) {
				pendingDeletedSlicesSpinner.IncrInt64(int64(size))
				return false, nil
			},
			func(_ Ino, size uint64, _ time.Time) (bool, error) {
				trashFileSpinner.IncrInt64(int64(size))
				return false, nil
			},
			func(_ Ino, size uint64, _ int64) (bool, error) {
				pendingDeletedFileSpinner.IncrInt64(int64(size))
				return false, nil
			},
		)
		if err != nil {
			return fmt.Errorf("statistic: %v", err)
		}

		trashSlicesSpinner.Done()
		pendingDeletedSlicesSpinner.Done()
		trashFileSpinner.Done()
		pendingDeletedFileSpinner.Done()
		progress.Done()
		stat.TrashSliceCount, stat.TrashSliceSize = trashSlicesSpinner.Current()
		stat.PendingDeletedSliceCount, stat.PendingDeletedSliceSize = pendingDeletedSlicesSpinner.Current()
		stat.TrashFileCount, stat.TrashFileSize = trashFileSpinner.Current()
		stat.PendingDeletedFileCount, stat.PendingDeletedFileSize = pendingDeletedFileSpinner.Current()
	}

	if sections != nil {
		sections.Setting = format
		sections.Sessions = sessions
		sections.Stat = stat
	}
	return nil
}


================================================
FILE: pkg/meta/tkv.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bufio"
	"bytes"
	"context"
	"encoding/binary"
	"encoding/json"
	"fmt"
	"io"
	"math"
	"math/rand"
	"runtime"
	"runtime/debug"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/dustin/go-humanize"
	aclAPI "github.com/juicedata/juicefs/pkg/acl"
	"github.com/pkg/errors"

	"github.com/juicedata/juicefs/pkg/utils"
)

type kvtxn interface {
	get(key []byte) []byte
	gets(keys ...[]byte) [][]byte
	// scan stops when handler returns false; begin and end must not be nil
	scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool)
	exist(prefix []byte) bool
	set(key, value []byte)
	append(key []byte, value []byte)
	incrBy(key []byte, value int64) int64
	delete(key []byte)
}

type tkvClient interface {
	name() string
	simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) error // should only be used for point get scenarios
	txn(ctx context.Context, f func(*kvTxn) error, retry int) error
	scan(prefix []byte, handler func(key, value []byte) bool) error
	reset(prefix []byte) error
	close() error
	shouldRetry(err error) bool
	gc()
	config(key string) interface{}
}

type kvTxn struct {
	kvtxn
	retry int
}

func (tx *kvTxn) deleteKeys(prefix []byte) {
	tx.scan(prefix, nextKey(prefix), true, func(k, v []byte) bool {
		tx.delete(k)
		return true
	})
}

type kvMeta struct {
	*baseMeta
	client tkvClient
	snap   map[Ino]*DumpedEntry
}

var _ Meta = (*kvMeta)(nil)
var _ engine = (*kvMeta)(nil)

var drivers = make(map[string]func(string) (tkvClient, error))

func newTkvClient(driver, addr string) (tkvClient, error) {
	fn, ok := drivers[driver]
	if !ok {
		return nil, fmt.Errorf("unsupported driver %s", driver)
	}
	return fn(addr)
}

func newKVMeta(driver, addr string, conf *Config) (Meta, error) {
	client, err := newTkvClient(driver, addr)
	if err != nil {
		return nil, fmt.Errorf("connect to addr %s: %s", addr, err)
	}
	// TODO: ping server and check latency > Millisecond
	// logger.Warnf("The latency to database is too high: %s", time.Since(start))
	m := &kvMeta{
		baseMeta: newBaseMeta(addr, conf),
		client:   client,
	}
	m.en = m
	return m, nil
}

func (m *kvMeta) Shutdown() error {
	return m.client.close()
}

func (m *kvMeta) Name() string {
	return m.client.name()
}

func (m *kvMeta) doDeleteSlice(id uint64, size uint32) error {
	return m.deleteKeys(m.sliceKey(id, size))
}

func (m *kvMeta) keyLen(args ...interface{}) int {
	var c int
	for _, a := range args {
		switch a := a.(type) {
		case byte:
			c++
		case uint32:
			c += 4
		case uint64:
			c += 8
		case Ino:
			c += 8
		case string:
			c += len(a)
		default:
			panic(fmt.Sprintf("invalid type %T, value %v", a, a))
		}
	}
	return c
}

func (m *kvMeta) fmtKey(args ...interface{}) []byte {
	b := utils.NewBuffer(uint32(m.keyLen(args...)))
	for _, a := range args {
		switch a := a.(type) {
		case byte:
			b.Put8(a)
		case uint32:
			b.Put32(a)
		case uint64:
			b.Put64(a)
		case Ino:
			m.encodeInode(a, b.Get(8))
		case string:
			b.Put([]byte(a))
		default:
			panic(fmt.Sprintf("invalid type %T, value %v", a, a))
		}
	}
	return b.Bytes()
}

/**
  Ino     iiiiiiii
  Length  llllllll
  Indx    nnnn
  name    ...
  sliceId cccccccc
  session ssssssss
  aclId   aaaa

All keys:
  setting            format
  C...               counter
  AiiiiiiiiI         inode attribute
  AiiiiiiiiD...      dentry
  AiiiiiiiiPiiiiiiii parents // for hard links
  AiiiiiiiiCnnnn     file chunks
  AiiiiiiiiS         symlink target
  AiiiiiiiiX...      extented attribute
  Diiiiiiiillllllll  delete inodes
  Fiiiiiiii          Flocks
  Piiiiiiii          POSIX locks
  Kccccccccnnnn      slice refs
  Lttttttttcccccccc  delayed slices
  SEssssssss         session expire time
  SHssssssss         session heartbeat // for legacy client
  SIssssssss         session info
  SSssssssssiiiiiiii sustained inode
  Uiiiiiiii          data length, space and inodes usage in directory
  Niiiiiiii          detached inde
  QDiiiiiiii         directory quota
  Raaaa			     POSIX acl
  KDaaaa			 delegation token
*/

func (m *kvMeta) inodeKey(inode Ino) []byte {
	return m.fmtKey("A", inode, "I")
}

func (m *kvMeta) entryKey(parent Ino, name string) []byte {
	return m.fmtKey("A", parent, "D", name)
}

func (m *kvMeta) parentKey(inode, parent Ino) []byte {
	return m.fmtKey("A", inode, "P", parent)
}

func (m *kvMeta) chunkKey(inode Ino, indx uint32) []byte {
	return m.fmtKey("A", inode, "C", indx)
}

func (m *kvMeta) sliceKey(id uint64, size uint32) []byte {
	return m.fmtKey("K", id, size)
}

func (m *kvMeta) delSliceKey(ts int64, id uint64) []byte {
	return m.fmtKey("L", uint64(ts), id)
}

func (m *kvMeta) symKey(inode Ino) []byte {
	return m.fmtKey("A", inode, "S")
}

func (m *kvMeta) xattrKey(inode Ino, name string) []byte {
	return m.fmtKey("A", inode, "X", name)
}

func (m *kvMeta) flockKey(inode Ino) []byte {
	return m.fmtKey("F", inode)
}

func (m *kvMeta) plockKey(inode Ino) []byte {
	return m.fmtKey("P", inode)
}

func (m *kvMeta) sessionKey(sid uint64) []byte {
	return m.fmtKey("SE", sid)
}

func (m *kvMeta) legacySessionKey(sid uint64) []byte {
	return m.fmtKey("SH", sid)
}

func (m *kvMeta) dirStatKey(inode Ino) []byte {
	return m.fmtKey("U", inode)
}

func (m *kvMeta) detachedKey(inode Ino) []byte {
	return m.fmtKey("N", inode)
}

func (m *kvMeta) dirQuotaKey(inode Ino) []byte {
	return m.fmtKey("QD", inode)
}

func (m *kvMeta) userQuotaKey(uid uint64) []byte {
	return m.fmtKey("QU", uid)
}

func (m *kvMeta) groupQuotaKey(gid uint64) []byte {
	return m.fmtKey("QG", gid)
}

func (m *kvMeta) aclKey(id uint32) []byte {
	return m.fmtKey("R", id)
}

func (m *kvMeta) krbTokenKey(id uint32) []byte {
	return m.fmtKey("KD", id)
}

func (m *kvMeta) parseACLId(key string) uint32 {
	// trim "R"
	rb := utils.ReadBuffer([]byte(key[1:]))
	return rb.Get32()
}

func (m *kvMeta) parseSid(key string) uint64 {
	buf := []byte(key[2:]) // "SE" or "SH"
	if len(buf) != 8 {
		panic("invalid sid value")
	}
	return binary.BigEndian.Uint64(buf)
}

func (m *kvMeta) sessionInfoKey(sid uint64) []byte {
	return m.fmtKey("SI", sid)
}

func (m *kvMeta) sustainedKey(sid uint64, inode Ino) []byte {
	return m.fmtKey("SS", sid, inode)
}

func (m *kvMeta) encodeInode(ino Ino, buf []byte) {
	binary.LittleEndian.PutUint64(buf, uint64(ino))
}

func (m *kvMeta) decodeInode(buf []byte) Ino {
	return Ino(binary.LittleEndian.Uint64(buf))
}

func (m *kvMeta) delfileKey(inode Ino, length uint64) []byte {
	return m.fmtKey("D", inode, length)
}

func (m *kvMeta) counterKey(key string) []byte {
	return m.fmtKey("C", key)
}

// Used for values that are modified by directly set; mostly timestamps
func (m *kvMeta) packInt64(value int64) []byte {
	b := make([]byte, 8)
	binary.BigEndian.PutUint64(b, uint64(value))
	return b
}

func (m *kvMeta) parseInt64(buf []byte) int64 {
	if len(buf) == 0 {
		return 0
	}
	if len(buf) != 8 {
		panic("invalid value")
	}
	return int64(binary.BigEndian.Uint64(buf))
}

// Used for most counter values that are modified by incrBy
func packCounter(value int64) []byte {
	b := make([]byte, 8)
	binary.LittleEndian.PutUint64(b, uint64(value))
	return b
}

func parseCounter(buf []byte) int64 {
	if len(buf) == 0 {
		return 0
	}
	if len(buf) != 8 {
		panic("invalid counter value")
	}
	return int64(binary.LittleEndian.Uint64(buf))
}

func (m *kvMeta) packEntry(_type uint8, inode Ino) []byte {
	b := utils.NewBuffer(9)
	b.Put8(_type)
	b.Put64(uint64(inode))
	return b.Bytes()
}

func (m *kvMeta) parseEntry(buf []byte) (uint8, Ino) {
	b := utils.FromBuffer(buf)
	return b.Get8(), Ino(b.Get64())
}

func (m *kvMeta) packDirStat(st *dirStat) []byte {
	b := utils.NewBuffer(24)
	b.Put64(uint64(st.length))
	b.Put64(uint64(st.space))
	b.Put64(uint64(st.inodes))
	return b.Bytes()
}

func (m *kvMeta) parseDirStat(buf []byte) *dirStat {
	b := utils.FromBuffer(buf)
	return &dirStat{int64(b.Get64()), int64(b.Get64()), int64(b.Get64())}
}

func (m *kvMeta) packQuota(q *Quota) []byte {
	b := utils.NewBuffer(32)
	b.Put64(uint64(q.MaxSpace))
	b.Put64(uint64(q.MaxInodes))
	b.Put64(uint64(q.UsedSpace))
	b.Put64(uint64(q.UsedInodes))
	return b.Bytes()
}

func (m *kvMeta) parseQuota(buf []byte) *Quota {
	b := utils.FromBuffer(buf)
	return &Quota{
		MaxSpace:   int64(b.Get64()),
		MaxInodes:  int64(b.Get64()),
		UsedSpace:  int64(b.Get64()),
		UsedInodes: int64(b.Get64()),
	}
}

func (m *kvMeta) get(key []byte) ([]byte, error) {
	var value []byte
	err := m.client.simpleTxn(Background(), func(tx *kvTxn) error {
		value = tx.get(key)
		return nil
	}, 0)
	return value, err
}

func (m *kvMeta) scanKeys(ctx context.Context, prefix []byte) ([][]byte, error) {
	var keys [][]byte
	err := m.client.txn(ctx, func(tx *kvTxn) error {
		tx.scan(prefix, nextKey(prefix), true, func(k, v []byte) bool {
			keys = append(keys, k)
			return true
		})
		return nil
	}, 0)
	return keys, err
}

func (m *kvMeta) scanValues(ctx context.Context, prefix []byte, limit int, filter func(k, v []byte) bool) (map[string][]byte, error) {
	if limit == 0 {
		return nil, nil
	}
	values := make(map[string][]byte)
	err := m.client.txn(ctx, func(tx *kvTxn) error {
		var c int
		tx.scan(prefix, nextKey(prefix), false, func(k, v []byte) bool {
			if filter == nil || filter(k, v) {
				values[string(k)] = v
				c++
			}
			return limit < 0 || c < limit
		})
		return nil
	}, 0)
	return values, err
}

func (m *kvMeta) scan(startKey, endKey []byte, limit int, filter func(k, v []byte) bool) ([][]byte, [][]byte, error) {
	if limit == 0 {
		return nil, nil, nil
	}
	var keys, vals [][]byte
	err := m.client.txn(Background(), func(tx *kvTxn) error {
		var c int
		tx.scan(startKey, endKey, false, func(k, v []byte) bool {
			if filter == nil || filter(k, v) {
				keys = append(keys, k)
				vals = append(vals, v)
				c++
			}
			return limit < 0 || c < limit
		})
		return nil
	}, 0)
	return keys, vals, err
}

func (m *kvMeta) doInit(format *Format, force bool) error {
	body, err := m.get(m.fmtKey("setting"))
	if err != nil {
		return err
	}

	if body != nil {
		var old Format
		err = json.Unmarshal(body, &old)
		if err != nil {
			return fmt.Errorf("json: %s", err)
		}
		if !old.DirStats && format.DirStats {
			// remove dir stats as they are outdated
			var keys [][]byte
			prefix := m.fmtKey("U")
			err := m.client.txn(Background(), func(tx *kvTxn) error {
				tx.scan(prefix, nextKey(prefix), true, func(k, v []byte) bool {
					if len(k) == 9 {
						keys = append(keys, k)
					}
					return true
				})
				return nil
			}, 0)
			if err != nil {
				return errors.Wrap(err, "scan dir stats")
			}
			err = m.deleteKeys(keys...)
			if err != nil {
				return errors.Wrap(err, "delete dir stats")
			}
		}
		if !old.UserGroupQuota && format.UserGroupQuota {
			// remove user group quota as they are outdated
			userPrefix := m.fmtKey("QU")
			groupPrefix := m.fmtKey("QG")
			err := m.client.txn(Background(), func(tx *kvTxn) error {
				tx.deleteKeys(userPrefix)
				tx.deleteKeys(groupPrefix)
				return nil
			}, 0)
			if err != nil {
				return errors.Wrap(err, "delete user group quota")
			}
		}
		if err = format.update(&old, force); err != nil {
			return errors.Wrap(err, "update format")
		}
	}

	data, err := json.MarshalIndent(format, "", "")
	if err != nil {
		return fmt.Errorf("json: %s", err)
	}

	m.fmt = format
	ts := time.Now().Unix()
	attr := &Attr{
		Typ:    TypeDirectory,
		Atime:  ts,
		Mtime:  ts,
		Ctime:  ts,
		Nlink:  2,
		Length: 4 << 10,
		Parent: RootInode,
	}
	return m.txn(Background(), func(tx *kvTxn) error {
		if format.TrashDays > 0 {
			buf := tx.get(m.inodeKey(TrashInode))
			if buf == nil {
				attr.Mode = 0555
				tx.set(m.inodeKey(TrashInode), m.marshal(attr))
			}
		}
		tx.set(m.fmtKey("setting"), data)
		if body == nil || m.client.name() == "memkv" {
			attr.Mode = 0777
			tx.set(m.inodeKey(RootInode), m.marshal(attr))
			tx.incrBy(m.counterKey("nextInode"), 2)
			tx.incrBy(m.counterKey("nextChunk"), 1)
		}
		return nil
	})
}

func (m *kvMeta) cacheACLs(ctx Context) error {
	if !m.getFormat().EnableACL {
		return nil
	}

	acls, err := m.scanValues(ctx, m.fmtKey("R"), -1, nil)
	if err != nil {
		return err
	}
	for key, val := range acls {
		tmpRule := &aclAPI.Rule{}
		tmpRule.Decode(val)
		m.aclCache.Put(m.parseACLId(key), tmpRule)
	}
	return nil
}

func (m *kvMeta) Reset() error {
	return m.client.reset(nil)
}

func (m *kvMeta) doLoad() ([]byte, error) {
	return m.get(m.fmtKey("setting"))
}

func (m *kvMeta) updateStats(space int64, inodes int64) {
	atomic.AddInt64(&m.newSpace, space)
	atomic.AddInt64(&m.newInodes, inodes)
}

func (m *kvMeta) doFlushStats() {
	if space := atomic.LoadInt64(&m.newSpace); space != 0 {
		if v, err := m.incrCounter(usedSpace, space); err == nil {
			atomic.AddInt64(&m.newSpace, -space)
			atomic.StoreInt64(&m.usedSpace, v)
		} else {
			logger.Warnf("Update space stats: %s", err)
		}
	}
	if inodes := atomic.LoadInt64(&m.newInodes); inodes != 0 {
		if v, err := m.incrCounter(totalInodes, inodes); err == nil {
			atomic.AddInt64(&m.newInodes, -inodes)
			atomic.StoreInt64(&m.usedInodes, v)
		} else {
			logger.Warnf("Update inodes stats: %s", err)
		}
	}
}

func (m *kvMeta) doNewSession(sinfo []byte, update bool) error {
	if err := m.setValue(m.sessionKey(m.sid), m.packInt64(m.expireTime())); err != nil {
		return fmt.Errorf("set session ID %d: %s", m.sid, err)
	}
	if err := m.setValue(m.sessionInfoKey(m.sid), sinfo); err != nil {
		return fmt.Errorf("set session info: %s", err)
	}
	return nil
}

func (m *kvMeta) doRefreshSession() error {
	return m.txn(Background(), func(tx *kvTxn) error {
		buf := tx.get(m.sessionKey(m.sid))
		if buf == nil {
			logger.Warnf("Session %d was stale and cleaned up, but now it comes back again", m.sid)
			tx.set(m.sessionInfoKey(m.sid), m.newSessionInfo())
		}
		tx.set(m.sessionKey(m.sid), m.packInt64(m.expireTime()))
		return nil
	})
}

func (m *kvMeta) doCleanStaleSession(sid uint64) error {
	var fail bool
	// release locks
	ctx := Background()
	if flocks, err := m.scanValues(ctx, m.fmtKey("F"), -1, nil); err == nil {
		for k, v := range flocks {
			ls := unmarshalFlock(v)
			for o := range ls {
				if o.sid == sid {
					if err = m.txn(ctx, func(tx *kvTxn) error {
						v := tx.get([]byte(k))
						ls := unmarshalFlock(v)
						delete(ls, o)
						if len(ls) > 0 {
							tx.set([]byte(k), marshalFlock(ls))
						} else {
							tx.delete([]byte(k))
						}
						return nil
					}); err != nil {
						logger.Warnf("Delete flock with sid %d: %s", sid, err)
						fail = true
					}
				}
			}
		}
	} else {
		logger.Warnf("Scan flock with sid %d: %s", sid, err)
		fail = true
	}

	if plocks, err := m.scanValues(ctx, m.fmtKey("P"), -1, nil); err == nil {
		for k, v := range plocks {
			ls := unmarshalPlock(v)
			for o := range ls {
				if o.sid == sid {
					if err = m.txn(ctx, func(tx *kvTxn) error {
						v := tx.get([]byte(k))
						ls := unmarshalPlock(v)
						delete(ls, o)
						if len(ls) > 0 {
							tx.set([]byte(k), marshalPlock(ls))
						} else {
							tx.delete([]byte(k))
						}
						return nil
					}); err != nil {
						logger.Warnf("Delete plock with sid %d: %s", sid, err)
						fail = true
					}
				}
			}
		}
	} else {
		logger.Warnf("Scan plock with sid %d: %s", sid, err)
		fail = true
	}

	if keys, err := m.scanKeys(ctx, m.fmtKey("SS", sid)); err == nil {
		for _, key := range keys {
			inode := m.decodeInode(key[10:]) // "SS" + sid
			if err = m.doDeleteSustainedInode(sid, inode); err != nil {
				logger.Warnf("Delete sustained inode %d of sid %d: %s", inode, sid, err)
				fail = true
			}
		}
	} else {
		logger.Warnf("Scan sustained with sid %d: %s", sid, err)
		fail = true
	}

	if fail {
		return fmt.Errorf("failed to clean up sid %d", sid)
	} else {
		return m.deleteKeys(m.sessionKey(sid), m.legacySessionKey(sid), m.sessionInfoKey(sid))
	}
}

func (m *kvMeta) doFindStaleSessions(limit int) ([]uint64, error) {
	ctx := Background()
	vals, err := m.scanValues(ctx, m.fmtKey("SE"), limit, func(k, v []byte) bool {
		return m.parseInt64(v) < time.Now().Unix()
	})
	if err != nil {
		return nil, err
	}
	sids := make([]uint64, 0, len(vals))
	for k := range vals {
		sids = append(sids, m.parseSid(k))
	}
	limit -= len(sids)
	if limit <= 0 {
		return sids, nil
	}

	// check clients with version before 1.0-beta3 as well
	vals, err = m.scanValues(ctx, m.fmtKey("SH"), limit, func(k, v []byte) bool {
		return m.parseInt64(v) < time.Now().Add(time.Minute*-5).Unix()
	})
	if err != nil {
		logger.Errorf("Scan stale legacy sessions: %s", err)
		return sids, nil
	}
	for k := range vals {
		sids = append(sids, m.parseSid(k))
	}
	return sids, nil
}

func (m *kvMeta) getSession(sid uint64, detail bool) (*Session, error) {
	info, err := m.get(m.sessionInfoKey(sid))
	if err != nil {
		return nil, err
	}
	if info == nil {
		info = []byte("{}")
	}
	var s Session
	if err = json.Unmarshal(info, &s); err != nil {
		return nil, fmt.Errorf("corrupted session info; json error: %s", err)
	}
	s.Sid = sid
	if detail {
		ctx := Background()
		inodes, err := m.scanKeys(ctx, m.fmtKey("SS", sid))
		if err != nil {
			return nil, err
		}
		s.Sustained = make([]Ino, 0, len(inodes))
		for _, sinode := range inodes {
			inode := m.decodeInode(sinode[10:]) // "SS" + sid
			s.Sustained = append(s.Sustained, inode)
		}
		flocks, err := m.scanValues(ctx, m.fmtKey("F"), -1, nil)
		if err != nil {
			return nil, err
		}
		for k, v := range flocks {
			inode := m.decodeInode([]byte(k[1:])) // "F"
			ls := unmarshalFlock(v)
			for o, l := range ls {
				if o.sid == sid {
					s.Flocks = append(s.Flocks, Flock{inode, o.sid, string(l)})
				}
			}
		}
		plocks, err := m.scanValues(ctx, m.fmtKey("P"), -1, nil)
		if err != nil {
			return nil, err
		}
		for k, v := range plocks {
			inode := m.decodeInode([]byte(k[1:])) // "P"
			ls := unmarshalPlock(v)
			for o, l := range ls {
				if o.sid == sid {
					s.Plocks = append(s.Plocks, Plock{inode, o.sid, loadLocks(l)})
				}
			}
		}
	}
	return &s, nil
}

func (m *kvMeta) GetSession(sid uint64, detail bool) (*Session, error) {
	var legacy bool
	value, err := m.get(m.sessionKey(sid))
	if err == nil && value == nil {
		legacy = true
		value, err = m.get(m.legacySessionKey(sid))
	}
	if err != nil {
		return nil, err
	}
	if value == nil {
		return nil, fmt.Errorf("session not found: %d", sid)
	}
	s, err := m.getSession(sid, detail)
	if err != nil {
		return nil, err
	}
	s.Expire = time.Unix(m.parseInt64(value), 0)
	if legacy {
		s.Expire = s.Expire.Add(time.Minute * 5)
	}
	return s, nil
}

func (m *kvMeta) ListSessions() ([]*Session, error) {
	ctx := Background()
	vals, err := m.scanValues(ctx, m.fmtKey("SE"), -1, nil)
	if err != nil {
		return nil, err
	}
	sessions := make([]*Session, 0, len(vals))
	for k, v := range vals {
		s, err := m.getSession(m.parseSid(k), false)
		if err != nil {
			logger.Errorf("get session: %s", err)
			continue
		}
		s.Expire = time.Unix(m.parseInt64(v), 0)
		sessions = append(sessions, s)
	}

	// add clients with version before 1.0-beta3 as well
	vals, err = m.scanValues(ctx, m.fmtKey("SH"), -1, nil)
	if err != nil {
		logger.Errorf("Scan legacy sessions: %s", err)
		return sessions, nil
	}
	for k, v := range vals {
		s, err := m.getSession(m.parseSid(k), false)
		if err != nil {
			logger.Errorf("Get legacy session: %s", err)
			continue
		}
		s.Expire = time.Unix(m.parseInt64(v), 0).Add(time.Minute * 5)
		sessions = append(sessions, s)
	}
	return sessions, nil
}

func (m *kvMeta) shouldRetry(err error) bool {
	return m.client.shouldRetry(err)
}

func (m *kvMeta) txn(ctx Context, f func(tx *kvTxn) error, inodes ...Ino) error {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	start := time.Now()
	defer func() { m.txDist.Observe(time.Since(start).Seconds()) }()
	defer m.txBatchLock(inodes...)()
	var (
		lastErr error
		method  txMethod
	)

	for i := 0; i < 50; i++ {
		if ctx.Canceled() {
			logger.Warnf("Transaction %s interrupted after %s, tried %d, inodes: %v", method.name(ctx), time.Since(start), i+1, inodes)
			return syscall.EINTR
		}
		err := m.client.txn(ctx, f, i)
		if eno, ok := err.(syscall.Errno); ok && eno == 0 {
			err = nil
		}
		if err != nil && m.shouldRetry(err) {
			m.txRestart.WithLabelValues(method.name(ctx)).Add(1)
			logger.Debugf("Transaction failed, restart it (tried %d): %s", i+1, err)
			lastErr = err
			time.Sleep(time.Millisecond * time.Duration(rand.Int()%((i+1)*(i+1))))
			continue
		} else if err == nil && i > 1 {
			logger.Warnf("Transaction succeeded after %d tries (%s), inodes: %v, method: %s, error: %s", i+1, time.Since(start), inodes, method.name(ctx), lastErr)
		}
		return err
	}
	logger.Warnf("Already tried 50 times, returning: %s", lastErr)
	return lastErr
}

func (m *kvMeta) setValue(key, value []byte) error {
	return m.txn(Background(), func(tx *kvTxn) error {
		tx.set(key, value)
		return nil
	})
}

func (m *kvMeta) getCounter(name string) (int64, error) {
	buf, err := m.get(m.counterKey(name))
	return parseCounter(buf), err
}

func (m *kvMeta) incrCounter(name string, value int64) (int64, error) {
	var new int64
	key := m.counterKey(name)
	err := m.txn(Background().WithValue(txMethodKey{}, "incrCounter:"+name), func(tx *kvTxn) error {
		new = tx.incrBy(key, value)
		return nil
	})
	return new, err
}

func (m *kvMeta) setIfSmall(name string, value, diff int64) (bool, error) {
	var changed bool
	key := m.counterKey(name)
	err := m.txn(Background().WithValue(txMethodKey{}, "setIfSmall:"+name), func(tx *kvTxn) error {
		changed = false
		if m.parseInt64(tx.get(key)) > value-diff {
			return nil
		} else {
			changed = true
			tx.set(key, m.packInt64(value))
			return nil
		}
	})

	return changed, err
}

func (m *kvMeta) deleteKeys(keys ...[]byte) error {
	if len(keys) == 0 {
		return nil
	}
	return m.txn(Background(), func(tx *kvTxn) error {
		for _, key := range keys {
			tx.delete(key)
		}
		return nil
	})
}

func (m *kvMeta) doLookup(ctx Context, parent Ino, name string, inode *Ino, attr *Attr) syscall.Errno {
	buf, err := m.get(m.entryKey(parent, name))
	if err != nil {
		return errno(err)
	}
	if buf == nil {
		return syscall.ENOENT
	}
	foundType, foundIno := m.parseEntry(buf)
	a, err := m.get(m.inodeKey(foundIno))
	if a != nil {
		m.parseAttr(a, attr)
		m.of.Update(foundIno, attr)
	} else if err == nil {
		logger.Warnf("no attribute for inode %d (%d, %s)", foundIno, parent, name)
		*attr = Attr{Typ: foundType}
	}
	*inode = foundIno
	return errno(err)
}

func (m *kvMeta) doGetAttr(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	return errno(m.client.simpleTxn(ctx, func(tx *kvTxn) error {
		val := tx.get(m.inodeKey(inode))
		if val == nil {
			return syscall.ENOENT
		}
		m.parseAttr(val, attr)
		return nil
	}, 0))
}

func (m *kvMeta) doSetAttr(ctx Context, inode Ino, set uint16, sugidclearmode uint8, attr *Attr, oldAttr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		var cur Attr
		a := tx.get(m.inodeKey(inode))
		if a == nil {
			return syscall.ENOENT
		}
		m.parseAttr(a, &cur)
		if oldAttr != nil {
			*oldAttr = cur
		}
		if cur.Parent > TrashInode {
			return syscall.EPERM
		}
		now := time.Now()

		rule, err := m.getACL(tx, cur.AccessACL)
		if err != nil {
			return err
		}

		rule = rule.Dup()
		dirtyAttr, st := m.mergeAttr(ctx, inode, set, &cur, attr, now, rule)
		if st != 0 {
			return st
		}
		if dirtyAttr == nil {
			return nil
		}

		dirtyAttr.AccessACL, err = m.insertACL(tx, rule)
		if err != nil {
			return err
		}

		dirtyAttr.Ctime = now.Unix()
		dirtyAttr.Ctimensec = uint32(now.Nanosecond())
		tx.set(m.inodeKey(inode), m.marshal(dirtyAttr))
		*attr = *dirtyAttr
		return nil
	}, inode))
}

func (m *kvMeta) doTruncate(ctx Context, inode Ino, flags uint8, length uint64, delta *dirStat, attr *Attr, skipPermCheck bool) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		*delta = dirStat{}
		a := tx.get(m.inodeKey(inode))
		if a == nil {
			return syscall.ENOENT
		}
		t := Attr{}
		m.parseAttr(a, &t)
		if t.Typ != TypeFile || t.Flags&(FlagImmutable|t.Flags&FlagAppend) != 0 || (flags == 0 && t.Parent > TrashInode) {
			return syscall.EPERM
		}
		if !skipPermCheck {
			if st := m.Access(ctx, inode, MODE_MASK_W, &t); st != 0 {
				return st
			}
		}
		if length == t.Length {
			*attr = t
			return nil
		}
		delta.length = int64(length) - int64(t.Length)
		delta.space = align4K(length) - align4K(t.Length)
		if err := m.checkQuota(ctx, delta.space, 0, t.Uid, t.Gid, m.getParents(tx, inode, t.Parent)...); err != 0 {
			return err
		}
		var left, right = t.Length, length
		if left > right {
			right, left = left, right
		}
		if right/ChunkSize-left/ChunkSize > 1 {
			buf := marshalSlice(0, 0, 0, 0, ChunkSize)
			tx.scan(m.chunkKey(inode, uint32(left/ChunkSize)+1), m.chunkKey(inode, uint32(right/ChunkSize)),
				false, func(k, v []byte) bool {
					tx.set(k, append(v, buf...))
					return true
				})
		}
		l := uint32(right - left)
		if right > (left/ChunkSize+1)*ChunkSize {
			l = ChunkSize - uint32(left%ChunkSize)
		}
		tx.append(m.chunkKey(inode, uint32(left/ChunkSize)), marshalSlice(uint32(left%ChunkSize), 0, 0, 0, l))
		if right > (left/ChunkSize+1)*ChunkSize && right%ChunkSize > 0 {
			tx.append(m.chunkKey(inode, uint32(right/ChunkSize)), marshalSlice(0, 0, 0, 0, uint32(right%ChunkSize)))
		}
		t.Length = length
		now := time.Now()
		t.Mtime = now.Unix()
		t.Mtimensec = uint32(now.Nanosecond())
		t.Ctime = now.Unix()
		t.Ctimensec = uint32(now.Nanosecond())
		tx.set(m.inodeKey(inode), m.marshal(&t))
		*attr = t
		return nil
	}, inode))
}

func (m *kvMeta) doFallocate(ctx Context, inode Ino, mode uint8, off uint64, size uint64, delta *dirStat, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		*delta = dirStat{}
		a := tx.get(m.inodeKey(inode))
		if a == nil {
			return syscall.ENOENT
		}
		t := Attr{}
		m.parseAttr(a, &t)
		if t.Typ == TypeFIFO {
			return syscall.EPIPE
		}
		if t.Typ != TypeFile || (t.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if st := m.Access(ctx, inode, MODE_MASK_W, &t); st != 0 {
			return st
		}
		if (t.Flags&FlagAppend) != 0 && (mode&^fallocKeepSize) != 0 {
			return syscall.EPERM
		}
		length := t.Length
		if off+size > t.Length {
			if mode&fallocKeepSize == 0 {
				length = off + size
			}
		}

		old := t.Length
		delta.length = int64(length) - int64(t.Length)
		delta.space = align4K(length) - align4K(t.Length)
		if err := m.checkQuota(ctx, delta.space, 0, t.Uid, t.Gid, m.getParents(tx, inode, t.Parent)...); err != 0 {
			return err
		}
		t.Length = length
		now := time.Now()
		t.Mtime = now.Unix()
		t.Mtimensec = uint32(now.Nanosecond())
		t.Ctime = now.Unix()
		t.Ctimensec = uint32(now.Nanosecond())
		tx.set(m.inodeKey(inode), m.marshal(&t))
		if mode&(fallocZeroRange|fallocPunchHole) != 0 && off < old {
			off, size := off, size
			if off+size > old {
				size = old - off
			}
			for size > 0 {
				indx := uint32(off / ChunkSize)
				coff := off % ChunkSize
				l := size
				if coff+size > ChunkSize {
					l = ChunkSize - coff
				}
				tx.append(m.chunkKey(inode, indx), marshalSlice(uint32(coff), 0, 0, 0, uint32(l)))
				off += l
				size -= l
			}
		}
		*attr = t
		return nil
	}, inode))
}

func (m *kvMeta) doReadlink(ctx Context, inode Ino, noatime bool) (atime int64, target []byte, err error) {
	if noatime {
		target, err = m.get(m.symKey(inode))
		return
	}

	attr := &Attr{}
	now := time.Now()
	err = m.txn(ctx, func(tx *kvTxn) error {
		rs := tx.gets(m.inodeKey(inode), m.symKey(inode))
		if rs[0] == nil {
			return syscall.ENOENT
		}
		m.parseAttr(rs[0], attr)
		if attr.Typ != TypeSymlink {
			return syscall.EINVAL
		}
		if rs[1] == nil {
			return syscall.EIO
		}
		target = rs[1]
		if !m.atimeNeedsUpdate(attr, now) {
			atime = attr.Atime*int64(time.Second) + int64(attr.Atimensec)
			return nil
		}
		attr.Atime = now.Unix()
		attr.Atimensec = uint32(now.Nanosecond())
		atime = now.UnixNano()
		tx.set(m.inodeKey(inode), m.marshal(attr))
		return nil
	}, inode)
	return
}

func (m *kvMeta) doMknod(ctx Context, parent Ino, name string, _type uint8, mode, cumask uint16, path string, inode *Ino, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		var pattr Attr
		rs := tx.gets(m.inodeKey(parent), m.entryKey(parent, name))
		if rs[0] == nil {
			return syscall.ENOENT
		}
		m.parseAttr(rs[0], &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pattr.Flags & FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if (pattr.Flags & FlagSkipTrash) != 0 {
			attr.Flags |= FlagSkipTrash
		}

		buf := rs[1]
		var foundIno Ino
		var foundType uint8
		if buf != nil {
			foundType, foundIno = m.parseEntry(buf)
		} else if m.conf.CaseInsensi {
			if entry := m.resolveCase(ctx, parent, name); entry != nil {
				foundType, foundIno = entry.Attr.Typ, entry.Inode
			}
		}
		if foundIno != 0 {
			if _type == TypeFile || _type == TypeDirectory {
				a := tx.get(m.inodeKey(foundIno))
				if a != nil {
					m.parseAttr(a, attr)
				} else {
					*attr = Attr{Typ: foundType, Parent: parent} // corrupt entry
				}
				*inode = foundIno
			}
			return syscall.EEXIST
		} else if parent == TrashInode { // user's inode is allocated by prefetch, trash inode is allocated on demand
			key := m.counterKey("nextTrash")
			next := tx.incrBy(key, 1)
			*inode = TrashInode + Ino(next)
		}

		mode &= 07777
		if pattr.DefaultACL != aclAPI.None && _type != TypeSymlink {
			// inherit default acl
			if _type == TypeDirectory {
				attr.DefaultACL = pattr.DefaultACL
			}

			// set access acl by parent's default acl
			rule, err := m.getACL(tx, pattr.DefaultACL)
			if err != nil {
				return err
			}

			if rule.IsMinimal() {
				// simple acl as default
				attr.Mode = mode & (0xFE00 | rule.GetMode())
			} else {
				cRule := rule.ChildAccessACL(mode)
				id, err := m.insertACL(tx, cRule)
				if err != nil {
					return err
				}

				attr.AccessACL = id
				attr.Mode = (mode & 0xFE00) | cRule.GetMode()
			}
		} else {
			attr.Mode = mode & ^cumask
		}

		var updateParent bool
		now := time.Now()
		if parent != TrashInode {
			if _type == TypeDirectory {
				pattr.Nlink++
				if m.conf.SkipDirNlink <= 0 || tx.retry < m.conf.SkipDirNlink {
					updateParent = true
				} else {
					logger.Warnf("Skip updating nlink of directory %d to reduce conflict", parent)
				}
			}
			if updateParent || now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
				pattr.Mtime = now.Unix()
				pattr.Mtimensec = uint32(now.Nanosecond())
				pattr.Ctime = now.Unix()
				pattr.Ctimensec = uint32(now.Nanosecond())
				updateParent = true
			}
		}
		attr.Atime = now.Unix()
		attr.Atimensec = uint32(now.Nanosecond())
		attr.Mtime = now.Unix()
		attr.Mtimensec = uint32(now.Nanosecond())
		attr.Ctime = now.Unix()
		attr.Ctimensec = uint32(now.Nanosecond())
		if ctx.Value(CtxKey("behavior")) == "Hadoop" || runtime.GOOS == "darwin" {
			attr.Gid = pattr.Gid
		} else if runtime.GOOS == "linux" && pattr.Mode&02000 != 0 {
			attr.Gid = pattr.Gid
			if _type == TypeDirectory {
				attr.Mode |= 02000
			} else if attr.Mode&02010 == 02010 && ctx.Uid() != 0 {
				var found bool
				for _, gid := range ctx.Gids() {
					if gid == pattr.Gid {
						found = true
					}
				}
				if !found {
					attr.Mode &= ^uint16(02000)
				}
			}
		}

		tx.set(m.entryKey(parent, name), m.packEntry(_type, *inode))
		if updateParent {
			tx.set(m.inodeKey(parent), m.marshal(&pattr))
		}
		tx.set(m.inodeKey(*inode), m.marshal(attr))
		if _type == TypeSymlink {
			tx.set(m.symKey(*inode), []byte(path))
		}
		if _type == TypeDirectory {
			tx.set(m.dirStatKey(*inode), m.packDirStat(&dirStat{}))
		}
		return nil
	}, parent))
}

func (m *kvMeta) doUnlink(ctx Context, parent Ino, name string, attr *Attr, skipCheckTrash ...bool) syscall.Errno {
	var trash Ino
	if !(len(skipCheckTrash) == 1 && skipCheckTrash[0]) {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}

	if attr == nil {
		attr = &Attr{}
	}
	var _type uint8
	var inode Ino
	var opened bool
	var newSpace, newInode int64
	err := m.txn(ctx, func(tx *kvTxn) error {
		opened = false
		*attr = Attr{}
		newSpace, newInode = 0, 0
		buf := tx.get(m.entryKey(parent, name))
		if buf == nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parent, name); e != nil {
				name = string(e.Name)
				buf = m.packEntry(e.Attr.Typ, e.Inode)
			}
		}
		if buf == nil {
			return syscall.ENOENT
		}
		_type, inode = m.parseEntry(buf)
		if _type == TypeDirectory {
			return syscall.EPERM
		}
		keys := [][]byte{m.inodeKey(parent), m.inodeKey(inode)}
		if trash > 0 {
			keys = append(keys, m.entryKey(trash, m.trashEntry(parent, inode, name)))
		}
		rs := tx.gets(keys...)
		if rs[0] == nil {
			return syscall.ENOENT
		}
		var pattr Attr
		m.parseAttr(rs[0], &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pattr.Flags&FlagAppend) != 0 || (pattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		opened = false
		now := time.Now()
		if rs[1] != nil {
			m.parseAttr(rs[1], attr)
			if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid {
				return syscall.EACCES
			}
			if (attr.Flags&FlagAppend) != 0 || (attr.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if (attr.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			if trash > 0 && attr.Nlink > 1 && rs[2] != nil {
				trash = 0
			}
			attr.Ctime = now.Unix()
			attr.Ctimensec = uint32(now.Nanosecond())
			if trash == 0 {
				attr.Nlink--
				if _type == TypeFile && attr.Nlink == 0 && m.sid > 0 {
					opened = m.of.IsOpen(inode)
				}
			} else if attr.Parent > 0 {
				attr.Parent = trash
			}
		} else {
			logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name)
			trash = 0
		}

		defer func() { m.of.InvalidateChunk(inode, invalidateAttrOnly) }()
		var updateParent bool
		if !parent.IsTrash() && now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
			pattr.Mtime = now.Unix()
			pattr.Mtimensec = uint32(now.Nanosecond())
			pattr.Ctime = now.Unix()
			pattr.Ctimensec = uint32(now.Nanosecond())
			updateParent = true
		}

		tx.delete(m.entryKey(parent, name))
		if updateParent {
			tx.set(m.inodeKey(parent), m.marshal(&pattr))
		}
		if attr.Nlink > 0 {
			tx.set(m.inodeKey(inode), m.marshal(attr))
			if trash > 0 {
				tx.set(m.entryKey(trash, m.trashEntry(parent, inode, name)), buf)
				if attr.Parent == 0 {
					tx.incrBy(m.parentKey(inode, trash), 1)
				}
			}
			if attr.Parent == 0 {
				tx.incrBy(m.parentKey(inode, parent), -1)
			}
		} else {
			switch _type {
			case TypeFile:
				if opened {
					tx.set(m.inodeKey(inode), m.marshal(attr))
					tx.set(m.sustainedKey(m.sid, inode), []byte{1})
				} else {
					tx.set(m.delfileKey(inode, attr.Length), m.packInt64(now.Unix()))
					tx.delete(m.inodeKey(inode))
					newSpace, newInode = -align4K(attr.Length), -1
				}
			case TypeSymlink:
				tx.delete(m.symKey(inode))
				fallthrough
			default:
				tx.delete(m.inodeKey(inode))
				newSpace, newInode = -align4K(0), -1
			}
			tx.deleteKeys(m.xattrKey(inode, ""))
			if attr.Parent == 0 {
				tx.deleteKeys(m.fmtKey("A", inode, "P"))
			}
		}
		return nil
	}, parent)
	if err == nil && trash == 0 {
		if _type == TypeFile && attr.Nlink == 0 {
			m.fileDeleted(opened, parent.IsTrash(), inode, attr.Length)
		}
		m.updateStats(newSpace, newInode)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, newSpace, newInode)
	}
	return errno(err)
}

func (m *kvMeta) doBatchUnlink(ctx Context, parent Ino, entries []*Entry, delta *dirStat, skipCheckTrash ...bool) syscall.Errno {
	if len(entries) == 0 {
		return 0
	}

	// Each entry averages ~6 tx operations, so batch size should be 10000/6
	maxOps := 10000
	if m.Name() == "etcd" {
		maxOps = 128
	}
	batchNum := maxOps / 6

	type entryInfo struct {
		name      string
		inode     Ino
		typ       uint8
		trash     Ino
		attr      *Attr
		trashName string
		buf       []byte
	}
	type dNode struct {
		opened bool
		length uint64
	}

	for len(entries) > 0 {
		batchSize := batchNum
		if batchSize > len(entries) {
			batchSize = len(entries)
		}
		batch := entries[:batchSize]
		entries = entries[batchSize:]

		var trash Ino
		if len(skipCheckTrash) == 0 || !skipCheckTrash[0] {
			if st := m.checkTrash(parent, &trash); st != 0 {
				return st
			}
		}

		var entryInfos []*entryInfo
		var batchDirLength, batchDirSpace, batchDirInodes int64
		var batchFsSpace, batchFsInodes int64
		var deltas ugQuotaDeltas
		var delNodes map[Ino]*dNode

		err := m.txn(ctx, func(tx *kvTxn) error {
			batchDirLength, batchDirSpace, batchDirInodes = 0, 0, 0
			batchFsSpace, batchFsInodes = 0, 0
			deltas = make(ugQuotaDeltas)
			delNodes = make(map[Ino]*dNode)
			pbuf := tx.get(m.inodeKey(parent))
			if pbuf == nil {
				return syscall.ENOENT
			}
			var pattr Attr
			m.parseAttr(pbuf, &pattr)
			if pattr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
			if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
				return st
			}
			if (pattr.Flags&FlagAppend) != 0 || (pattr.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}

			entryInfos = make([]*entryInfo, 0, len(batch))
			now := time.Now()
			keys := make([][]byte, 0, len(batch))
			for _, entry := range batch {
				keys = append(keys, m.entryKey(parent, string(entry.Name)))
			}
			vals := tx.gets(keys...)
			for idx, entry := range batch {
				if vals[idx] == nil {
					continue
				}
				typ, ino := m.parseEntry(vals[idx])
				if ino != entry.Inode || typ == TypeDirectory || (entry.Attr != nil && typ != entry.Attr.Typ) {
					continue
				}
				info := entryInfo{
					name:  string(entry.Name),
					inode: ino,
					typ:   typ,
					trash: trash,
					buf:   vals[idx],
				}
				entryInfos = append(entryInfos, &info)
			}

			// Collect unique inodes
			inodesSet := make(map[Ino]struct{}, len(entryInfos))
			for _, info := range entryInfos {
				if _, ok := inodesSet[info.inode]; !ok {
					inodesSet[info.inode] = struct{}{}
				}
			}

			// Load inode attrs for all distinct inodes
			if len(inodesSet) > 0 {
				inodesList := make([]Ino, 0, len(inodesSet))
				keys := make([][]byte, 0, len(inodesSet))
				for ino := range inodesSet {
					inodesList = append(inodesList, ino)
					keys = append(keys, m.inodeKey(ino))
				}
				rs := tx.gets(keys...)
				nodeMap := make(map[Ino]*Attr, len(inodesList))
				for i, v := range rs {
					if v == nil {
						continue
					}
					var a Attr
					m.parseAttr(v, &a)
					nodeMap[inodesList[i]] = &a
				}

				// Iterate all target entries, apply basic checks and build info
				for _, info := range entryInfos {
					attr, ok := nodeMap[info.inode]
					if !ok {
						info.trash = 0
						info.attr = nil
						continue
					}
					if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid {
						return syscall.EACCES
					}
					if (attr.Flags&FlagAppend) != 0 || (attr.Flags&FlagImmutable) != 0 {
						return syscall.EPERM
					}
					if (attr.Flags & FlagSkipTrash) != 0 {
						info.trash = 0
					}
					info.attr = attr
				}
			}

			// Check trash entries for hard links
			for _, info := range entryInfos {
				if info.attr == nil {
					continue
				}
				if info.trash > 0 && info.attr.Nlink > 1 {
					info.trashName = m.trashEntry(parent, info.inode, info.name)
					trashEntryKey := m.entryKey(info.trash, info.trashName)
					if tx.get(trashEntryKey) != nil {
						info.trash = 0
					}
				}
				// Update ctime
				info.attr.Ctime = now.Unix()
				info.attr.Ctimensec = uint32(now.Nanosecond())
				if info.trash > 0 && info.attr.Parent > 0 {
					info.attr.Parent = info.trash
				}
				if info.trash == 0 && info.attr.Nlink > 0 {
					info.attr.Nlink--
				}
			}

			// Check opened status for all inodes with Nlink == 0 after all decrements
			for _, info := range entryInfos {
				if info.attr != nil && info.trash == 0 && info.attr.Nlink == 0 && info.typ == TypeFile {
					opened := false
					if m.sid > 0 {
						opened = m.of.IsOpen(info.inode)
					}
					delNodes[info.inode] = &dNode{opened, info.attr.Length}
				}
			}

			var updateParent bool
			if !parent.IsTrash() && now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
				pattr.Mtime = now.Unix()
				pattr.Mtimensec = uint32(now.Nanosecond())
				pattr.Ctime = now.Unix()
				pattr.Ctimensec = uint32(now.Nanosecond())
				updateParent = true
			}

			nowUnix := now.Unix()
			visited := make(map[Ino]bool)
			visited[0] = true // skip dummyNode

			for _, info := range entryInfos {
				tx.delete(m.entryKey(parent, info.name))
				if info.attr == nil {
					continue
				}
				if info.typ == TypeFile {
					batchDirLength -= int64(info.attr.Length)
					batchDirSpace -= align4K(info.attr.Length)
				} else {
					batchDirSpace -= align4K(0)
				}
				batchDirInodes--
				if !visited[info.inode] {
					if info.attr.Nlink > 0 {
						tx.set(m.inodeKey(info.inode), m.marshal(info.attr))
					} else {
						switch info.typ {
						case TypeFile:
							if dnode, ok := delNodes[info.inode]; ok && dnode.opened {
								tx.set(m.inodeKey(info.inode), m.marshal(info.attr))
								tx.set(m.sustainedKey(m.sid, info.inode), []byte{1})
							} else {
								tx.set(m.delfileKey(info.inode, info.attr.Length), m.packInt64(nowUnix))
								tx.delete(m.inodeKey(info.inode))
								batchFsSpace -= align4K(info.attr.Length)
								batchFsInodes--
								deltas.add(&ugQuotaDelta{
									Uid:    info.attr.Uid,
									Gid:    info.attr.Gid,
									Space:  -align4K(info.attr.Length),
									Inodes: -1,
								})
							}
						case TypeSymlink:
							tx.delete(m.symKey(info.inode))
							fallthrough
						default:
							tx.delete(m.inodeKey(info.inode))
							batchFsSpace -= align4K(0)
							batchFsInodes--
							deltas.add(&ugQuotaDelta{
								Uid:    info.attr.Uid,
								Gid:    info.attr.Gid,
								Space:  -align4K(0),
								Inodes: -1,
							})
						}
						// Delete xattrs and parent keys
						tx.deleteKeys(m.xattrKey(info.inode, ""))
						if info.attr.Parent == 0 {
							tx.deleteKeys(m.fmtKey("A", info.inode, "P"))
						}
					}
					m.of.InvalidateChunk(info.inode, invalidateAttrOnly)
				}
				visited[info.inode] = true

				if info.trash > 0 {
					if info.trashName == "" {
						info.trashName = m.trashEntry(parent, info.inode, info.name)
					}
					tx.set(m.entryKey(info.trash, info.trashName), info.buf)
					if info.attr.Parent == 0 {
						tx.incrBy(m.parentKey(info.inode, info.trash), 1)
					}
				}
				if info.attr.Parent == 0 && info.attr.Nlink > 0 {
					tx.incrBy(m.parentKey(info.inode, parent), -1)
				}
			}

			// Update parent directory if needed
			if updateParent {
				tx.set(m.inodeKey(parent), m.marshal(&pattr))
			}

			return nil
		}, parent)

		if err != nil {
			return errno(err)
		}

		// Outside of transaction: trigger data deletion callbacks
		for inode, info := range delNodes {
			m.fileDeleted(info.opened, parent.IsTrash(), inode, info.length)
		}

		delta.length += batchDirLength
		delta.space += batchDirSpace
		delta.inodes += batchDirInodes
		m.updateStats(batchFsSpace, batchFsInodes)
		for _, q := range deltas {
			m.updateUserGroupStat(ctx, q.Uid, q.Gid, q.Space, q.Inodes)
		}
	}
	return 0
}

func (m *kvMeta) doRmdir(ctx Context, parent Ino, name string, pinode *Ino, oldAttr *Attr, skipCheckTrash ...bool) syscall.Errno {
	var trash Ino
	var attr Attr
	if !(len(skipCheckTrash) == 1 && skipCheckTrash[0]) {
		if st := m.checkTrash(parent, &trash); st != 0 {
			return st
		}
	}
	err := m.txn(ctx, func(tx *kvTxn) error {
		buf := tx.get(m.entryKey(parent, name))
		if buf == nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parent, name); e != nil {
				name = string(e.Name)
				buf = m.packEntry(e.Attr.Typ, e.Inode)
			}
		}
		if buf == nil {
			return syscall.ENOENT
		}
		_type, inode := m.parseEntry(buf)
		if _type != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pinode != nil {
			*pinode = inode
		}
		rs := tx.gets(m.inodeKey(parent), m.inodeKey(inode))
		if rs[0] == nil {
			return syscall.ENOENT
		}
		var pattr Attr
		m.parseAttr(rs[0], &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if (pattr.Flags&FlagAppend) != 0 || (pattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if tx.exist(m.entryKey(inode, "")) {
			return syscall.ENOTEMPTY
		}

		now := time.Now()
		if rs[1] != nil {
			m.parseAttr(rs[1], &attr)
			if oldAttr != nil {
				*oldAttr = attr
			}
			if ctx.Uid() != 0 && pattr.Mode&01000 != 0 && ctx.Uid() != pattr.Uid && ctx.Uid() != attr.Uid {
				return syscall.EACCES
			}
			if (attr.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			if trash > 0 {
				attr.Ctime = now.Unix()
				attr.Ctimensec = uint32(now.Nanosecond())
				attr.Parent = trash
			}
		} else {
			logger.Warnf("no attribute for inode %d (%d, %s)", inode, parent, name)
			trash = 0
		}
		pattr.Nlink--
		var updateParent bool
		if m.conf.SkipDirNlink <= 0 || tx.retry < m.conf.SkipDirNlink {
			updateParent = true
		} else {
			logger.Warnf("Skip updating nlink of directory %d to reduce conflict", parent)
		}
		if updateParent || now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
			pattr.Mtime = now.Unix()
			pattr.Mtimensec = uint32(now.Nanosecond())
			pattr.Ctime = now.Unix()
			pattr.Ctimensec = uint32(now.Nanosecond())
			updateParent = true
		}

		if !parent.IsTrash() && updateParent {
			tx.set(m.inodeKey(parent), m.marshal(&pattr))
		}
		tx.delete(m.entryKey(parent, name))
		tx.delete(m.dirStatKey(inode))
		tx.delete(m.dirQuotaKey(inode))
		if trash > 0 {
			tx.set(m.inodeKey(inode), m.marshal(&attr))
			tx.set(m.entryKey(trash, m.trashEntry(parent, inode, name)), buf)
		} else {
			tx.delete(m.inodeKey(inode))
			tx.deleteKeys(m.xattrKey(inode, ""))
		}
		return nil
	}, parent)
	if err == nil && trash == 0 {
		m.updateStats(-align4K(0), -1)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, -align4K(0), -1)
	}
	return errno(err)
}

func (m *kvMeta) doRename(ctx Context, parentSrc Ino, nameSrc string, parentDst Ino, nameDst string, flags uint32, inode, tInode *Ino, attr, tAttr *Attr) syscall.Errno {
	var trash Ino
	if st := m.checkTrash(parentDst, &trash); st != 0 {
		return st
	}
	exchange := flags == RenameExchange
	var opened bool
	var dino Ino
	var dtyp uint8
	var tattr Attr
	var newSpace, newInode int64
	parentLocks := []Ino{parentDst}
	if !parentSrc.IsTrash() { // there should be no conflict if parentSrc is in trash, relax lock to accelerate `restore` subcommand
		parentLocks = append(parentLocks, parentSrc)
	}
	err := m.txn(ctx, func(tx *kvTxn) error {
		opened = false
		dino, dtyp = 0, 0
		tattr = Attr{}
		newSpace, newInode = 0, 0
		buf := tx.get(m.entryKey(parentSrc, nameSrc))
		if buf == nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parentSrc, nameSrc); e != nil {
				nameSrc = string(e.Name)
				buf = m.packEntry(e.Attr.Typ, e.Inode)
			}
		}
		if buf == nil {
			return syscall.ENOENT
		}
		typ, ino := m.parseEntry(buf)
		if parentSrc == parentDst && nameSrc == nameDst {
			if inode != nil {
				*inode = ino
			}
			return nil
		}
		rs := tx.gets(m.inodeKey(parentSrc), m.inodeKey(parentDst), m.inodeKey(ino))
		if rs[0] == nil || rs[1] == nil || rs[2] == nil {
			return syscall.ENOENT
		}
		var sattr, dattr, iattr Attr
		m.parseAttr(rs[0], &sattr)
		if sattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if st := m.Access(ctx, parentSrc, MODE_MASK_W|MODE_MASK_X, &sattr); st != 0 {
			return st
		}
		m.parseAttr(rs[1], &dattr)
		if dattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if flags&RenameRestore == 0 && dattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parentDst, MODE_MASK_W|MODE_MASK_X, &dattr); st != 0 {
			return st
		}
		// TODO: check parentDst is a subdir of source node
		if ino == parentDst || ino == dattr.Parent {
			return syscall.EPERM
		}
		m.parseAttr(rs[2], &iattr)
		if (sattr.Flags&FlagAppend) != 0 || (sattr.Flags&FlagImmutable) != 0 || (dattr.Flags&FlagImmutable) != 0 || (iattr.Flags&FlagAppend) != 0 || (iattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if parentSrc != parentDst && sattr.Mode&0o1000 != 0 && ctx.Uid() != 0 &&
			ctx.Uid() != iattr.Uid && (ctx.Uid() != sattr.Uid || iattr.Typ == TypeDirectory) {
			return syscall.EACCES
		}

		dbuf := tx.get(m.entryKey(parentDst, nameDst))
		if dbuf == nil && m.conf.CaseInsensi {
			if e := m.resolveCase(ctx, parentDst, nameDst); e != nil {
				if string(e.Name) != nameSrc || parentDst != parentSrc {
					nameDst = string(e.Name)
					dbuf = m.packEntry(e.Attr.Typ, e.Inode)
				}
			}
		}
		var supdate, dupdate bool
		now := time.Now()
		if dbuf != nil {
			if flags&RenameNoReplace != 0 {
				return syscall.EEXIST
			}
			dtyp, dino = m.parseEntry(dbuf)
			a := tx.get(m.inodeKey(dino))
			if a == nil { // corrupt entry
				logger.Warnf("no attribute for inode %d (%d, %s)", dino, parentDst, nameDst)
				trash = 0
			}
			m.parseAttr(a, &tattr)
			if (tattr.Flags&FlagAppend) != 0 || (tattr.Flags&FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if (tattr.Flags & FlagSkipTrash) != 0 {
				trash = 0
			}
			tattr.Ctime = now.Unix()
			tattr.Ctimensec = uint32(now.Nanosecond())
			if exchange {
				if parentSrc != parentDst {
					if dtyp == TypeDirectory {
						tattr.Parent = parentSrc
						dattr.Nlink--
						sattr.Nlink++
						if m.conf.SkipDirNlink <= 0 || tx.retry < m.conf.SkipDirNlink {
							supdate, dupdate = true, true
						} else {
							logger.Warnf("Skip updating nlink of directory %d,%d to reduce conflict", parentSrc, parentDst)
						}
					} else if tattr.Parent > 0 {
						tattr.Parent = parentSrc
					}
				}
			} else if dino == ino {
				return nil
			} else if typ == TypeDirectory && dtyp != TypeDirectory {
				return syscall.ENOTDIR
			} else if typ != TypeDirectory && dtyp == TypeDirectory {
				return syscall.EISDIR
			} else {
				if dtyp == TypeDirectory {
					if tx.exist(m.entryKey(dino, "")) {
						return syscall.ENOTEMPTY
					}
					dattr.Nlink--
					dupdate = true
					if trash > 0 {
						tattr.Parent = trash
					}
				} else {
					if trash == 0 {
						tattr.Nlink--
						if dtyp == TypeFile && tattr.Nlink == 0 && m.sid > 0 {
							opened = m.of.IsOpen(dino)
						}
						defer func() { m.of.InvalidateChunk(dino, invalidateAttrOnly) }()
					} else if tattr.Parent > 0 {
						tattr.Parent = trash
					}
				}
			}
			if ctx.Uid() != 0 && dattr.Mode&01000 != 0 && ctx.Uid() != dattr.Uid && ctx.Uid() != tattr.Uid {
				return syscall.EACCES
			}
		} else {
			if exchange {
				return syscall.ENOENT
			}
		}
		if ctx.Uid() != 0 && sattr.Mode&01000 != 0 && ctx.Uid() != sattr.Uid && ctx.Uid() != iattr.Uid {
			return syscall.EACCES
		}

		if parentSrc != parentDst {
			if typ == TypeDirectory {
				iattr.Parent = parentDst
				sattr.Nlink--
				dattr.Nlink++
				if m.conf.SkipDirNlink <= 0 || tx.retry < m.conf.SkipDirNlink {
					supdate, dupdate = true, true
				} else {
					logger.Warnf("Skip updating nlink of directory %d,%d to reduce conflict", parentSrc, parentDst)
				}
			} else if iattr.Parent > 0 {
				iattr.Parent = parentDst
			}
		}
		if supdate || now.Sub(time.Unix(sattr.Mtime, int64(sattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
			sattr.Mtime = now.Unix()
			sattr.Mtimensec = uint32(now.Nanosecond())
			sattr.Ctime = now.Unix()
			sattr.Ctimensec = uint32(now.Nanosecond())
			supdate = true
		}
		if dupdate || now.Sub(time.Unix(dattr.Mtime, int64(dattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
			dattr.Mtime = now.Unix()
			dattr.Mtimensec = uint32(now.Nanosecond())
			dattr.Ctime = now.Unix()
			dattr.Ctimensec = uint32(now.Nanosecond())
			dupdate = true
		}
		iattr.Ctime = now.Unix()
		iattr.Ctimensec = uint32(now.Nanosecond())
		if inode != nil {
			*inode = ino
		}
		if attr != nil {
			*attr = iattr
		}
		if dino > 0 {
			*tInode = dino
			*tAttr = tattr
		}

		if exchange { // dino > 0
			tx.set(m.entryKey(parentSrc, nameSrc), dbuf)
			tx.set(m.inodeKey(dino), m.marshal(&tattr))
			if parentSrc != parentDst && tattr.Parent == 0 {
				tx.incrBy(m.parentKey(dino, parentSrc), 1)
				tx.incrBy(m.parentKey(dino, parentDst), -1)
			}
		} else {
			tx.delete(m.entryKey(parentSrc, nameSrc))
			if dino > 0 {
				if trash > 0 {
					tx.set(m.inodeKey(dino), m.marshal(&tattr))
					tx.set(m.entryKey(trash, m.trashEntry(parentDst, dino, nameDst)), dbuf)
					if tattr.Parent == 0 {
						tx.incrBy(m.parentKey(dino, trash), 1)
						tx.incrBy(m.parentKey(dino, parentDst), -1)
					}
				} else if dtyp != TypeDirectory && tattr.Nlink > 0 {
					tx.set(m.inodeKey(dino), m.marshal(&tattr))
					if tattr.Parent == 0 {
						tx.incrBy(m.parentKey(dino, parentDst), -1)
					}
				} else {
					if dtyp == TypeFile {
						if opened {
							tx.set(m.inodeKey(dino), m.marshal(&tattr))
							tx.set(m.sustainedKey(m.sid, dino), []byte{1})
						} else {
							tx.set(m.delfileKey(dino, tattr.Length), m.packInt64(now.Unix()))
							tx.delete(m.inodeKey(dino))
							newSpace, newInode = -align4K(tattr.Length), -1
						}
					} else {
						if dtyp == TypeSymlink {
							tx.delete(m.symKey(dino))
						}
						tx.delete(m.inodeKey(dino))
						newSpace, newInode = -align4K(0), -1
					}
					tx.deleteKeys(m.xattrKey(dino, ""))
					if tattr.Parent == 0 {
						tx.deleteKeys(m.fmtKey("A", dino, "P"))
					}
				}
				if dtyp == TypeDirectory {
					tx.delete(m.dirQuotaKey(dino))
				}
			}
		}
		if parentDst != parentSrc {
			if !parentSrc.IsTrash() && supdate {
				tx.set(m.inodeKey(parentSrc), m.marshal(&sattr))
			}
			if iattr.Parent == 0 {
				tx.incrBy(m.parentKey(ino, parentDst), 1)
				tx.incrBy(m.parentKey(ino, parentSrc), -1)
			}
		}
		tx.set(m.inodeKey(ino), m.marshal(&iattr))
		tx.set(m.entryKey(parentDst, nameDst), buf)
		if dupdate {
			tx.set(m.inodeKey(parentDst), m.marshal(&dattr))
		}
		return nil
	}, parentLocks...)
	if err == nil && !exchange && trash == 0 {
		if dino > 0 && dtyp == TypeFile && tattr.Nlink == 0 {
			m.fileDeleted(opened, false, dino, tattr.Length)
		}
		m.updateStats(newSpace, newInode)
		m.updateUserGroupStat(ctx, tattr.Uid, tattr.Gid, newSpace, newInode)
	}
	return errno(err)
}

func (m *kvMeta) doLink(ctx Context, inode, parent Ino, name string, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		rs := tx.gets(m.inodeKey(parent), m.inodeKey(inode))
		if rs[0] == nil || rs[1] == nil {
			return syscall.ENOENT
		}
		var pattr, iattr Attr
		m.parseAttr(rs[0], &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); st != 0 {
			return st
		}
		if pattr.Flags&FlagImmutable != 0 {
			return syscall.EPERM
		}
		m.parseAttr(rs[1], &iattr)
		if iattr.Typ == TypeDirectory {
			return syscall.EPERM
		}
		if (iattr.Flags&FlagAppend) != 0 || (iattr.Flags&FlagImmutable) != 0 {
			return syscall.EPERM
		}
		buf := tx.get(m.entryKey(parent, name))
		if buf != nil || m.conf.CaseInsensi && m.resolveCase(ctx, parent, name) != nil {
			return syscall.EEXIST
		}

		var updateParent bool
		now := time.Now()
		if now.Sub(time.Unix(pattr.Mtime, int64(pattr.Mtimensec))) >= m.conf.SkipDirMtime*time.Duration(tx.retry+1) {
			pattr.Mtime = now.Unix()
			pattr.Mtimensec = uint32(now.Nanosecond())
			pattr.Ctime = now.Unix()
			pattr.Ctimensec = uint32(now.Nanosecond())
			updateParent = true
		}
		oldParent := iattr.Parent
		iattr.Parent = 0
		iattr.Ctime = now.Unix()
		iattr.Ctimensec = uint32(now.Nanosecond())
		iattr.Nlink++
		tx.set(m.entryKey(parent, name), m.packEntry(iattr.Typ, inode))
		if updateParent {
			tx.set(m.inodeKey(parent), m.marshal(&pattr))
		}
		tx.set(m.inodeKey(inode), m.marshal(&iattr))
		if oldParent > 0 {
			tx.incrBy(m.parentKey(inode, oldParent), 1)
		}
		tx.incrBy(m.parentKey(inode, parent), 1)
		if attr != nil {
			*attr = iattr
		}
		return nil
	}, parent))
}

func (m *kvMeta) fillAttr(entries []*Entry) (err error) {
	if len(entries) == 0 {
		return nil
	}
	var keys = make([][]byte, len(entries))
	for i, e := range entries {
		keys[i] = m.inodeKey(e.Inode)
	}
	var rs [][]byte
	err = m.client.simpleTxn(Background(), func(tx *kvTxn) error {
		rs = tx.gets(keys...)
		return nil
	}, 0)
	if err != nil {
		return err
	}
	for j, re := range rs {
		if re != nil {
			m.parseAttr(re, entries[j].Attr)
			// If `readdirplus` returns complete attributes, kernel may not invoke `GetAttr`. Therefore, we must also validate chunk cache here to prevent stale cache, which may lead to data corruption.
			m.of.Update(entries[j].Inode, entries[j].Attr)
		}
	}
	return err
}

func (m *kvMeta) doReaddir(ctx Context, inode Ino, plus uint8, entries *[]*Entry, limit int) syscall.Errno {
	// TODO: handle big directory
	vals, err := m.scanValues(ctx, m.entryKey(inode, ""), limit, nil)
	if err != nil {
		return errno(err)
	}
	prefix := len(m.entryKey(inode, ""))
	for name, buf := range vals {
		typ, ino := m.parseEntry(buf)
		if len(name) == prefix {
			logger.Errorf("Corrupt entry with empty name: inode %d parent %d", ino, inode)
			continue
		}
		*entries = append(*entries, &Entry{
			Inode: ino,
			Name:  []byte(name)[prefix:],
			Attr:  &Attr{Typ: typ},
		})
	}

	if plus != 0 && len(*entries) != 0 {
		if ctx.Canceled() {
			return errno(ctx.Err())
		}
		batchSize := 4096
		nEntries := len(*entries)
		if nEntries <= batchSize {
			err = m.fillAttr(*entries)
		} else {
			indexCh := make(chan []*Entry, 10)
			var wg sync.WaitGroup
			for i := 0; i < 2; i++ {
				wg.Add(1)
				go func() {
					defer wg.Done()
					for es := range indexCh {
						if e := m.fillAttr(es); e != nil {
							err = e
							break
						}
					}
				}()
			}
			for i := 0; i < nEntries; i += batchSize {
				if i+batchSize > nEntries {
					indexCh <- (*entries)[i:]
				} else {
					indexCh <- (*entries)[i : i+batchSize]
				}
			}
			close(indexCh)
			wg.Wait()
		}
		if err != nil {
			return errno(err)
		}
	}
	return 0
}

func (m *kvMeta) doDeleteSustainedInode(sid uint64, inode Ino) error {
	var attr Attr
	var newSpace int64
	err := m.txn(Background(), func(tx *kvTxn) error {
		newSpace = 0
		a := tx.get(m.inodeKey(inode))
		if a == nil {
			return nil
		}
		m.parseAttr(a, &attr)
		newSpace = -align4K(attr.Length)
		tx.set(m.delfileKey(inode, attr.Length), m.packInt64(time.Now().Unix()))
		tx.delete(m.inodeKey(inode))
		tx.delete(m.sustainedKey(sid, inode))
		return nil
	}, inode)
	if err == nil && newSpace < 0 {
		m.updateStats(newSpace, -1)
		m.tryDeleteFileData(inode, attr.Length, false)
		m.updateUserGroupStat(Background(), attr.Uid, attr.Gid, newSpace, 0)
	}
	return err
}

func (m *kvMeta) doRead(ctx Context, inode Ino, indx uint32) ([]*slice, syscall.Errno) {
	val, err := m.get(m.chunkKey(inode, indx))
	if err != nil {
		return nil, errno(err)
	}
	return readSliceBuf(val), 0
}

func (m *kvMeta) doList(ctx Context, inode Ino) ([]*slice, syscall.Errno) {
	vals, err := m.scanValues(ctx, m.fmtKey("A", inode, "C"), -1, nil)
	if err != nil {
		logger.Warnf("list of inode %d: %s", inode, err)
		return nil, errno(err)
	}
	var slices []*slice
	for _, v := range vals {
		ss := readSliceBuf(v)
		if ss == nil {
			continue
		}
		slices = append(slices, ss...)
	}

	return slices, 0
}

func (m *kvMeta) doWrite(ctx Context, inode Ino, indx uint32, off uint32, slice Slice, mtime time.Time, numSlices *int, delta *dirStat, attr *Attr) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		*delta = dirStat{}
		*attr = Attr{}
		rs := tx.gets(m.inodeKey(inode), m.chunkKey(inode, indx))
		if rs[0] == nil {
			return syscall.ENOENT
		}
		m.parseAttr(rs[0], attr)
		if attr.Typ != TypeFile {
			return syscall.EPERM
		}
		if len(rs[1])%sliceBytes != 0 {
			logger.Errorf("Invalid chunk value for inode %d indx %d: %d", inode, indx, len(rs[1]))
			return syscall.EIO
		}
		newleng := uint64(indx)*ChunkSize + uint64(off) + uint64(slice.Len)
		if newleng > attr.Length {
			delta.length = int64(newleng - attr.Length)
			delta.space = align4K(newleng) - align4K(attr.Length)
			attr.Length = newleng
		}
		if err := m.checkQuota(ctx, delta.space, 0, attr.Uid, attr.Gid, m.getParents(tx, inode, attr.Parent)...); err != 0 {
			return err
		}
		now := time.Now()
		attr.Mtime = mtime.Unix()
		attr.Mtimensec = uint32(mtime.Nanosecond())
		attr.Ctime = now.Unix()
		attr.Ctimensec = uint32(now.Nanosecond())
		val := marshalSlice(off, slice.Id, slice.Size, slice.Off, slice.Len)
		for i := 0; i < len(rs[1]); i += sliceBytes {
			if bytes.Equal(rs[1][i:i+sliceBytes], val) {
				logger.Warnf("Write same slice for inode %d indx %d sliceId %d", inode, indx, slice.Id)
				return nil
			}
		}
		val = append(rs[1], val...)
		tx.set(m.inodeKey(inode), m.marshal(attr))
		tx.set(m.chunkKey(inode, indx), val)
		*numSlices = len(val) / sliceBytes
		return nil
	}, inode))
}

func (m *kvMeta) CopyFileRange(ctx Context, fin Ino, offIn uint64, fout Ino, offOut uint64, size uint64, flags uint32, copied, outLength *uint64) syscall.Errno {
	defer m.timeit("CopyFileRange", time.Now())
	var newLength, newSpace int64
	f := m.of.find(fout)
	if f != nil {
		f.Lock()
		defer f.Unlock()
	}
	defer func() { m.of.InvalidateChunk(fout, invalidateAllChunks) }()
	var sattr, attr Attr
	err := m.txn(ctx, func(tx *kvTxn) error {
		newLength, newSpace = 0, 0
		rs := tx.gets(m.inodeKey(fin), m.inodeKey(fout))
		if rs[0] == nil || rs[1] == nil {
			return syscall.ENOENT
		}
		sattr = Attr{}
		m.parseAttr(rs[0], &sattr)
		if sattr.Typ != TypeFile {
			return syscall.EINVAL
		}
		if offIn >= sattr.Length {
			if copied != nil {
				*copied = 0
			}
			return nil
		}
		size := size
		if offIn+size > sattr.Length {
			size = sattr.Length - offIn
		}
		attr = Attr{}
		m.parseAttr(rs[1], &attr)
		if attr.Typ != TypeFile {
			return syscall.EINVAL
		}
		if (attr.Flags&FlagImmutable) != 0 || (attr.Flags&FlagAppend) != 0 {
			return syscall.EPERM
		}

		newleng := offOut + size
		if newleng > attr.Length {
			newLength = int64(newleng - attr.Length)
			newSpace = align4K(newleng) - align4K(attr.Length)
			attr.Length = newleng
		}
		if err := m.checkQuota(ctx, newSpace, 0, attr.Uid, attr.Gid, m.getParents(tx, fout, attr.Parent)...); err != 0 {
			return err
		}
		now := time.Now()
		attr.Mtime = now.Unix()
		attr.Mtimensec = uint32(now.Nanosecond())
		attr.Ctime = now.Unix()
		attr.Ctimensec = uint32(now.Nanosecond())
		if outLength != nil {
			*outLength = attr.Length
		}

		vals := make(map[string][]byte)
		tx.scan(m.chunkKey(fin, uint32(offIn/ChunkSize)), m.chunkKey(fin, uint32((offIn+size)/ChunkSize)+1),
			false, func(k, v []byte) bool {
				vals[string(k)] = v
				return true
			})
		chunks := make(map[uint32][]*slice)
		for indx := uint32(offIn / ChunkSize); indx <= uint32((offIn+size)/ChunkSize); indx++ {
			if v, ok := vals[string(m.chunkKey(fin, indx))]; ok {
				chunks[indx] = readSliceBuf(v)
				if chunks[indx] == nil {
					return syscall.EIO
				}
			}
		}

		coff := offIn / ChunkSize * ChunkSize
		chunksMap := make(map[string][]byte)
		for coff < offIn+size {
			if coff%ChunkSize != 0 {
				panic("coff")
			}
			// Add a zero chunk for hole
			ss := append([]*slice{{len: ChunkSize}}, chunks[uint32(coff/ChunkSize)]...)
			cs := buildSlice(ss)
			for _, s := range cs {
				pos := coff
				coff += uint64(s.Len)
				if pos < offIn+size && pos+uint64(s.Len) > offIn {
					if pos < offIn {
						dec := offIn - pos
						s.Off += uint32(dec)
						pos += dec
						s.Len -= uint32(dec)
					}
					if pos+uint64(s.Len) > offIn+size {
						dec := pos + uint64(s.Len) - (offIn + size)
						s.Len -= uint32(dec)
					}
					doff := pos - offIn + offOut
					indx := uint32(doff / ChunkSize)
					dpos := uint32(doff % ChunkSize)
					if dpos+s.Len > ChunkSize {
						chunksMap[string(m.chunkKey(fout, indx))] = append(chunksMap[string(m.chunkKey(fout, indx))], marshalSlice(dpos, s.Id, s.Size, s.Off, ChunkSize-dpos)...)
						if s.Id > 0 {
							tx.incrBy(m.sliceKey(s.Id, s.Size), 1)
						}
						skip := ChunkSize - dpos
						chunksMap[string(m.chunkKey(fout, indx+1))] = append(chunksMap[string(m.chunkKey(fout, indx+1))], marshalSlice(0, s.Id, s.Size, s.Off+skip, s.Len-skip)...)
						if s.Id > 0 {
							tx.incrBy(m.sliceKey(s.Id, s.Size), 1)
						}
					} else {
						chunksMap[string(m.chunkKey(fout, indx))] = append(chunksMap[string(m.chunkKey(fout, indx))], marshalSlice(dpos, s.Id, s.Size, s.Off, s.Len)...)
						if s.Id > 0 {
							tx.incrBy(m.sliceKey(s.Id, s.Size), 1)
						}
					}
				}
			}
		}
		for k, v := range chunksMap {
			tx.append([]byte(k), v)
		}
		tx.set(m.inodeKey(fout), m.marshal(&attr))
		if copied != nil {
			*copied = size
		}
		return nil
	}, fout)
	if err == nil {
		m.updateParentStat(ctx, fout, attr.Parent, newLength, newSpace)
		m.updateUserGroupStat(ctx, attr.Uid, attr.Gid, newSpace, 0)
	}
	return errno(err)
}

func (m *kvMeta) getParents(tx *kvTxn, inode, parent Ino) []Ino {
	if parent > 0 {
		return []Ino{parent}
	}
	var ps []Ino
	prefix := m.fmtKey("A", inode, "P")
	tx.scan(prefix, nextKey(prefix), false, func(k, v []byte) bool {
		if len(k) == 1+8+1+8 && parseCounter(v) > 0 {
			ps = append(ps, m.decodeInode([]byte(k[10:])))
		}
		return true
	})
	return ps
}

func (m *kvMeta) doGetParents(ctx Context, inode Ino) map[Ino]int {
	vals, err := m.scanValues(ctx, m.fmtKey("A", inode, "P"), -1, func(k, v []byte) bool {
		// parents: AiiiiiiiiPiiiiiiii
		return len(k) == 1+8+1+8 && parseCounter(v) > 0
	})
	if err != nil {
		logger.Warnf("Scan parent key of inode %d: %s", inode, err)
		return nil
	}
	ps := make(map[Ino]int)
	for k, v := range vals {
		ps[m.decodeInode([]byte(k[10:]))] = int(parseCounter(v))
	}
	return ps
}

func (m *kvMeta) doSyncDirStat(ctx Context, ino Ino) (*dirStat, syscall.Errno) {
	if m.conf.ReadOnly {
		return nil, syscall.EROFS
	}
	stat, st := m.calcDirStat(ctx, ino)
	if st != 0 {
		return nil, st
	}
	err := m.client.txn(ctx, func(tx *kvTxn) error {
		if tx.get(m.inodeKey(ino)) == nil {
			return syscall.ENOENT
		}
		tx.set(m.dirStatKey(ino), m.packDirStat(stat))
		return nil
	}, 0)
	if err != nil && m.shouldRetry(err) {
		// other clients have synced
		err = nil
	}
	return stat, errno(err)
}

func (m *kvMeta) doUpdateDirStat(ctx Context, batch map[Ino]dirStat) error {
	syncMap := make(map[Ino]bool, 0)
	for _, group := range m.groupBatch(batch, 20) {
		err := m.txn(ctx, func(tx *kvTxn) error {
			keys := make([][]byte, 0, len(group))
			for _, ino := range group {
				keys = append(keys, m.dirStatKey(ino))
			}
			for i, rawStat := range tx.gets(keys...) {
				ino := group[i]
				if rawStat == nil {
					syncMap[ino] = true
					continue
				}
				st := m.parseDirStat(rawStat)
				stat := batch[ino]
				st.length += stat.length
				st.space += stat.space
				st.inodes += stat.inodes
				if st.length < 0 || st.space < 0 || st.inodes < 0 {
					logger.Warnf("dir stat of inode %d is invalid: %+v, try to sync", ino, st)
					syncMap[ino] = true
					continue
				}
				tx.set(keys[i], m.packDirStat(st))
			}
			return nil
		})
		if err != nil {
			return err
		}
	}

	if len(syncMap) > 0 {
		m.parallelSyncDirStat(ctx, syncMap).Wait()
	}
	return nil
}

func (m *kvMeta) doGetDirStat(ctx Context, ino Ino, trySync bool) (*dirStat, syscall.Errno) {
	rawStat, err := m.get(m.dirStatKey(ino))
	if err != nil {
		return nil, errno(err)
	}
	if rawStat != nil {
		return m.parseDirStat(rawStat), 0
	}
	if trySync {
		return m.doSyncDirStat(ctx, ino)
	}
	return nil, 0
}

func (m *kvMeta) doFindDeletedFiles(ts int64, limit int) (map[Ino]uint64, error) {
	if limit == 0 {
		return nil, nil
	}
	klen := 1 + 8 + 8
	files := make(map[Ino]uint64)
	var count int
	err := m.client.scan(m.fmtKey("D"), func(k, v []byte) bool {
		if len(k) == klen && len(v) == 8 && m.parseInt64(v) < ts {
			rb := utils.FromBuffer([]byte(k)[1:])
			files[m.decodeInode(rb.Get(8))] = rb.Get64()
			count++
		}
		return limit < 0 || count < limit
	})
	return files, err
}

func (m *kvMeta) doCleanupSlices(ctx Context, count *uint64) error {
	if m.Name() == "tikv" {
		m.client.gc()
	}
	klen := 1 + 8 + 4
	var sErr, cErr error
	if sErr = m.client.scan(m.fmtKey("K"), func(k, v []byte) bool {
		if len(k) == klen && len(v) == 8 && parseCounter(v) <= 0 {
			rb := utils.FromBuffer(k[1:])
			id := rb.Get64()
			size := rb.Get32()
			refs := parseCounter(v)
			if refs < 0 {
				m.deleteSlice(id, size)
				if count != nil {
					*count++
				}
			} else {
				m.cleanupZeroRef(id, size)
			}
			if ctx.Canceled() {
				cErr = ctx.Err()
				return false
			}
		}
		return true
	}); sErr != nil {
		return sErr
	}
	return cErr
}

func (m *kvMeta) deleteChunk(inode Ino, indx uint32) error {
	key := m.chunkKey(inode, indx)
	var todel []*slice
	err := m.txn(Background(), func(tx *kvTxn) error {
		todel = todel[:0]
		buf := tx.get(key)
		slices := readSliceBuf(buf)
		if slices == nil {
			logger.Errorf("Corrupt value for inode %d chunk index %d, use `gc` to clean up leaked slices", inode, indx)
		}
		tx.delete(key)
		for _, s := range slices {
			if s.id > 0 && tx.incrBy(m.sliceKey(s.id, s.size), -1) < 0 {
				todel = append(todel, s)
			}
		}
		return nil
	}, inode)
	if err != nil {
		return err
	}
	for _, s := range todel {
		m.deleteSlice(s.id, s.size)
	}
	return nil
}

func (m *kvMeta) cleanupZeroRef(id uint64, size uint32) {
	_ = m.txn(Background(), func(tx *kvTxn) error {
		v := tx.incrBy(m.sliceKey(id, size), 0)
		if v != 0 {
			return syscall.EINVAL
		}
		tx.delete(m.sliceKey(id, size))
		return nil
	})
}

func (m *kvMeta) doDeleteFileData(inode Ino, length uint64) {
	keys, err := m.scanKeys(Background(), m.fmtKey("A", inode, "C"))
	if err != nil {
		logger.Warnf("delete chunks of inode %d: %s", inode, err)
		return
	}
	for i := range keys {
		idx := binary.BigEndian.Uint32(keys[i][10:])
		err := m.deleteChunk(inode, idx)
		if err != nil {
			logger.Warnf("delete chunk %d:%d: %s", inode, idx, err)
			return
		}
	}
	_ = m.deleteKeys(m.delfileKey(inode, length))
}

func (m *kvMeta) doCleanupDelayedSlices(ctx Context, edge int64) (int, error) {
	var count int
	var ss []Slice
	var rs []int64
	var keys [][]byte
	var batch int = 1e5
	for {
		if err := m.client.txn(ctx, func(tx *kvTxn) error {
			keys = keys[:0]
			var c int
			tx.scan(m.delSliceKey(0, 0), m.delSliceKey(edge, 0),
				true, func(k, v []byte) bool {
					if len(k) == 1+8+8 { // delayed slices: Lttttttttcccccccc
						keys = append(keys, k)
						c++
					}
					return c < batch
				})
			return nil
		}, 0); err != nil {
			logger.Warnf("Scan delayed slices: %s", err)
			return count, err
		}

		for _, key := range keys {
			if ctx.Canceled() {
				return count, ctx.Err()
			}
			if err := m.txn(ctx, func(tx *kvTxn) error {
				ss, rs = ss[:0], rs[:0]
				buf := tx.get(key)
				if len(buf) == 0 {
					return nil
				}
				m.decodeDelayedSlices(buf, &ss)
				if len(ss) == 0 {
					return fmt.Errorf("invalid value for delayed slices %q: %v", key, buf)
				}
				for _, s := range ss {
					rs = append(rs, tx.incrBy(m.sliceKey(s.Id, s.Size), -1))
				}
				tx.delete(key)
				return nil
			}); err != nil {
				logger.Warnf("Cleanup delayed slices %q: %s", key, err)
				continue
			}
			for i, s := range ss {
				if rs[i] < 0 {
					m.deleteSlice(s.Id, s.Size)
					count++
				}
				if ctx.Canceled() {
					return count, ctx.Err()
				}
			}
		}
		if len(keys) < batch {
			break
		}
	}
	return count, nil
}

func (m *kvMeta) doCompactChunk(inode Ino, indx uint32, buf []byte, ss []*slice, skipped int, pos uint32, id uint64, size uint32, delayed []byte) syscall.Errno {
	st := errno(m.txn(Background(), func(tx *kvTxn) error {
		buf2 := tx.get(m.chunkKey(inode, indx))
		if len(buf2) < len(buf) || !bytes.Equal(buf, buf2[:len(buf)]) {
			logger.Infof("chunk %d:%d was changed %d -> %d", inode, indx, len(buf), len(buf2))
			return syscall.EINVAL
		}

		buf2 = append(append(buf2[:skipped*sliceBytes], marshalSlice(pos, id, size, 0, size)...), buf2[len(buf):]...)
		tx.set(m.chunkKey(inode, indx), buf2)
		// create the key to tracking it
		tx.set(m.sliceKey(id, size), make([]byte, 8))
		if delayed != nil {
			if len(delayed) > 0 {
				tx.set(m.delSliceKey(time.Now().Unix(), id), delayed)
			}
		} else {
			for _, s := range ss {
				if s.id > 0 {
					tx.incrBy(m.sliceKey(s.id, s.size), -1)
				}
			}
		}
		return nil
	}, inode)) // less conflicts with `write`
	// there could be false-negative that the compaction is successful, double-check
	if st != 0 && st != syscall.EINVAL {
		refs, e := m.get(m.sliceKey(id, size))
		if e == nil {
			if len(refs) > 0 {
				st = 0
			} else {
				logger.Infof("compacted chunk %d was not used", id)
				st = syscall.EINVAL
			}
		}
	}

	if st == syscall.EINVAL {
		_ = m.txn(Background(), func(tx *kvTxn) error {
			tx.incrBy(m.sliceKey(id, size), -1)
			return nil
		})
	} else if st == 0 {
		m.cleanupZeroRef(id, size)
		if delayed == nil {
			var refs int64
			for _, s := range ss {
				if s.id > 0 && m.client.txn(Background(), func(tx *kvTxn) error {
					refs = tx.incrBy(m.sliceKey(s.id, s.size), 0)
					return nil
				}, 0) == nil && refs < 0 {
					m.deleteSlice(s.id, s.size)
				}
			}
		}
	}
	return st
}

func (m *kvMeta) scanAllChunks(ctx Context, ch chan<- cchunk, bar *utils.Bar) error {
	// AiiiiiiiiCnnnn     file chunks
	klen := 1 + 8 + 1 + 4
	return m.client.scan(m.fmtKey("A"), func(k, v []byte) bool {
		if len(k) == klen && k[1+8] == 'C' && len(v) > sliceBytes {
			bar.IncrTotal(1)
			ch <- cchunk{
				inode:  m.decodeInode(k[1:9]),
				indx:   binary.BigEndian.Uint32(k[10:]),
				slices: len(v) / sliceBytes,
			}
		}
		return true
	})
}

func (m *kvMeta) ListSlices(ctx Context, slices map[Ino][]Slice, scanPending, delete bool, showProgress func()) syscall.Errno {
	if delete {
		_ = m.doCleanupSlices(ctx, nil)
	}
	// AiiiiiiiiCnnnn     file chunks
	klen := 1 + 8 + 1 + 4
	if err := m.client.scan(m.fmtKey("A"), func(key, value []byte) bool {
		if len(key) != klen || key[1+8] != 'C' {
			return true
		}
		inode := m.decodeInode([]byte(key)[1:9])
		ss := readSliceBuf(value)
		if ss == nil {
			logger.Errorf("Corrupt value for inode %d chunk key %s", inode, key)
			return true
		}
		for _, s := range ss {
			if s.id > 0 {
				slices[inode] = append(slices[inode], Slice{Id: s.id, Size: s.size})
				if showProgress != nil {
					showProgress()
				}
			}
		}
		return true
	}); err != nil {
		return errno(err)
	}

	if scanPending {
		// slice refs: Kccccccccnnnn
		klen = 1 + 8 + 4
		_ = m.client.scan(m.fmtKey("K"), func(k, v []byte) bool {
			if len(k) == klen && len(v) == 8 && parseCounter(v) < 0 {
				rb := utils.FromBuffer([]byte(k)[1:])
				slices[0] = append(slices[0], Slice{Id: rb.Get64(), Size: rb.Get32()})
			}
			return true

		})
	}

	if m.getFormat().TrashDays == 0 {
		return 0
	}
	return errno(m.scanTrashSlices(ctx, func(ss []Slice, _ int64) (bool, error) {
		slices[1] = append(slices[1], ss...)
		if showProgress != nil {
			for range ss {
				showProgress()
			}
		}
		return false, nil
	}))
}

func (m *kvMeta) scanTrashSlices(ctx Context, scan trashSliceScan) error {
	if scan == nil {
		return nil
	}

	// delayed slices: Lttttttttcccccccc
	klen := 1 + 8 + 8
	var ss []Slice
	var rs []int64
	return m.client.scan(m.fmtKey("L"), func(key, value []byte) bool {
		if len(key) != klen || len(value) == 0 {
			return true
		}
		var clean bool
		var err error
		err = m.txn(ctx, func(tx *kvTxn) error {
			ss, rs = ss[:0], rs[:0]
			v := tx.get(key)
			if len(v) == 0 {
				return nil
			}
			b := utils.ReadBuffer(key[1:])
			ts := b.Get64()
			m.decodeDelayedSlices(v, &ss)
			clean, err = scan(ss, int64(ts))
			if err != nil {
				return err
			}
			if clean {
				for _, s := range ss {
					rs = append(rs, tx.incrBy(m.sliceKey(s.Id, s.Size), -1))
				}
				tx.delete(key)
			}
			return nil
		})
		if err != nil {
			logger.Warnf("scan trash slices %s: %s", key, err)
			return true
		}
		if clean && len(rs) == len(ss) {
			for i, s := range ss {
				if rs[i] < 0 {
					m.deleteSlice(s.Id, s.Size)
				}
			}
		}
		return true
	})
}

func (m *kvMeta) scanPendingSlices(ctx Context, scan pendingSliceScan) error {
	if scan == nil {
		return nil
	}

	// slice refs: Kiiiiiiiissss
	klen := 1 + 8 + 4
	return m.client.scan(m.fmtKey("K"), func(key, v []byte) bool {
		refs := parseCounter(v)
		if len(key) == klen && refs < 0 {
			b := utils.ReadBuffer([]byte(key)[1:])
			id := b.Get64()
			size := b.Get32()
			clean, err := scan(id, size)
			if err != nil {
				logger.Warnf("scan pending deleted slices %d %d: %s", id, size, err)
				return true
			}
			if clean {
				// TODO: m.deleteSlice(id, size)
				// avoid lint warning
				_ = clean
			}
		}
		return true
	})
}

func (m *kvMeta) scanPendingFiles(ctx Context, scan pendingFileScan) error {
	if scan == nil {
		return nil
	}
	// deleted files: Diiiiiiiissssssss
	klen := 1 + 8 + 8

	var scanErr error
	if err := m.client.scan(m.fmtKey("D"), func(key, val []byte) bool {
		if scanErr != nil {
			return true
		}
		if len(key) != klen {
			scanErr = fmt.Errorf("invalid key %x", key)
			return true
		}
		ino := m.decodeInode(key[1:9])
		size := binary.BigEndian.Uint64(key[9:])
		ts := m.parseInt64(val)
		_, scanErr = scan(ino, size, ts)
		return true
	}); err != nil {
		return err
	}

	return scanErr
}

func (m *kvMeta) doRepair(ctx Context, inode Ino, attr *Attr) syscall.Errno {
	prefix := m.entryKey(inode, "")
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		attr.Nlink = 2
		tx.scan(prefix, nextKey(prefix), false, func(k, v []byte) bool {
			typ, _ := m.parseEntry(v)
			if typ == TypeDirectory {
				attr.Nlink++
			}
			return true
		})
		tx.set(m.inodeKey(inode), m.marshal(attr))
		return nil
	}, inode))
}

func (m *kvMeta) GetXattr(ctx Context, inode Ino, name string, vbuff *[]byte) syscall.Errno {
	defer m.timeit("GetXattr", time.Now())
	inode = m.checkRoot(inode)
	buf, err := m.get(m.xattrKey(inode, name))
	if err != nil {
		return errno(err)
	}
	if buf == nil {
		return ENOATTR
	}
	*vbuff = buf
	return 0
}

func (m *kvMeta) ListXattr(ctx Context, inode Ino, names *[]byte) syscall.Errno {
	defer m.timeit("ListXattr", time.Now())
	inode = m.checkRoot(inode)
	keys, err := m.scanKeys(ctx, m.xattrKey(inode, ""))
	if err != nil {
		return errno(err)
	}
	*names = nil
	prefix := len(m.xattrKey(inode, ""))
	for _, name := range keys {
		*names = append(*names, name[prefix:]...)
		*names = append(*names, 0)
	}

	val, err := m.get(m.inodeKey(inode))
	if err != nil {
		return errno(err)
	}
	if val == nil {
		return syscall.ENOENT
	}
	attr := &Attr{}
	m.parseAttr(val, attr)
	setXAttrACL(names, attr.AccessACL, attr.DefaultACL)
	return 0
}

func (m *kvMeta) doSetXattr(ctx Context, inode Ino, name string, value []byte, flags uint32) syscall.Errno {
	if len(value) == 0 && m.Name() == "tikv" {
		return syscall.EINVAL
	}
	key := m.xattrKey(inode, name)
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		v := tx.get(key)
		switch flags {
		case XattrCreate:
			if v != nil {
				return syscall.EEXIST
			}
		case XattrReplace:
			if v == nil {
				return ENOATTR
			}
		}
		if v == nil || !bytes.Equal(v, value) {
			tx.set(key, value)
		}
		return nil
	}))
}

func (m *kvMeta) doRemoveXattr(ctx Context, inode Ino, name string) syscall.Errno {
	key := m.xattrKey(inode, name)
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		value := tx.get(key)
		if value == nil {
			return ENOATTR
		}
		tx.delete(key)
		return nil
	}))
}

func (m *kvMeta) getQuotaKey(qtype uint32, key uint64) ([]byte, error) {
	switch qtype {
	case DirQuotaType:
		return m.dirQuotaKey(Ino(key)), nil
	case UserQuotaType:
		return m.userQuotaKey(key), nil
	case GroupQuotaType:
		return m.groupQuotaKey(key), nil
	default:
		return nil, fmt.Errorf("invalid quota type: %d", qtype)
	}
}

func (m *kvMeta) doGetQuota(ctx Context, qtype uint32, key uint64) (*Quota, error) {
	quotaKey, err := m.getQuotaKey(qtype, key)
	if err != nil {
		return nil, err
	}

	buf, err := m.get(quotaKey)
	if err != nil {
		return nil, err
	}
	if buf == nil {
		return nil, nil
	}
	if len(buf) != 32 {
		return nil, fmt.Errorf("invalid quota value: %v", buf)
	}

	return m.parseQuota(buf), nil
}

func (m *kvMeta) doSetQuota(ctx Context, qtype uint32, key uint64, quota *Quota) (bool, error) {
	quotaKey, err := m.getQuotaKey(qtype, key)
	if err != nil {
		return false, err
	}

	var created bool
	err = m.txn(ctx, func(tx *kvTxn) error {
		buf := tx.get(quotaKey)
		var origin *Quota
		var exists bool
		if len(buf) == 32 {
			origin = m.parseQuota(buf)
			exists = true
		} else if len(buf) != 0 {
			return fmt.Errorf("invalid quota value: %v", buf)
		}

		if !exists {
			created = true
			origin = new(Quota)
			origin.MaxInodes, origin.MaxSpace = -1, -1
		} else {
			created = false
		}

		if quota.MaxSpace >= 0 {
			origin.MaxSpace = quota.MaxSpace
		}
		if quota.MaxInodes >= 0 {
			origin.MaxInodes = quota.MaxInodes
		}
		if quota.UsedSpace >= 0 {
			origin.UsedSpace = quota.UsedSpace
		}
		if quota.UsedInodes >= 0 {
			origin.UsedInodes = quota.UsedInodes
		}
		tx.set(quotaKey, m.packQuota(origin))
		return nil
	})
	return created, err
}

func (m *kvMeta) doDelQuota(ctx Context, qtype uint32, key uint64) error {
	quotaKey, err := m.getQuotaKey(qtype, key)
	if err != nil {
		return err
	}

	if qtype == UserQuotaType || qtype == GroupQuotaType {
		quota := &Quota{}
		val, err := m.get(quotaKey)
		if err != nil {
			return err
		}
		if len(val) > 0 {
			quota = m.parseQuota(val)
		}
		quota.MaxSpace = -1
		quota.MaxInodes = -1
		return m.txn(ctx, func(tx *kvTxn) error {
			tx.set(quotaKey, m.packQuota(quota))
			return nil
		})
	} else {
		// For dir quotas, remove all data
		return m.deleteKeys(quotaKey)
	}
}

func (m *kvMeta) doLoadQuotas(ctx Context) (map[uint64]*Quota, map[uint64]*Quota, map[uint64]*Quota, error) {
	quotaTypes := []struct {
		prefix string
		name   string
	}{
		{"QD", "dir"},
		{"QU", "user"},
		{"QG", "group"},
	}

	quotaMaps := make([]map[uint64]*Quota, 3)
	for i, qt := range quotaTypes {
		pairs, err := m.scanValues(ctx, m.fmtKey(qt.prefix), -1, nil)
		if err != nil {
			return nil, nil, nil, fmt.Errorf("failed to load %s quotas: %w", qt.name, err)
		}
		var quotas map[uint64]*Quota
		if len(pairs) == 0 {
			quotas = make(map[uint64]*Quota)
		} else {
			quotas = make(map[uint64]*Quota, len(pairs))
			for k, v := range pairs {
				var id uint64
				if qt.prefix == "QD" {
					id = uint64(m.decodeInode([]byte(k[2:]))) // skip prefix
				} else {
					id = binary.BigEndian.Uint64([]byte(k[2:])) // skip prefix
				}
				quotas[id] =  m.parseQuota(v)
			}
		}
		quotaMaps[i] = quotas
	}

	return quotaMaps[0], quotaMaps[1], quotaMaps[2], nil
}

func (m *kvMeta) doSyncVolumeStat(ctx Context) error {
	if m.conf.ReadOnly {
		return syscall.EROFS
	}
	var used, inodes int64
	if err := m.client.txn(ctx, func(tx *kvTxn) error {
		prefix := m.fmtKey("U")
		tx.scan(prefix, nextKey(prefix), false, func(k, v []byte) bool {
			stat := m.parseDirStat(v)
			used += stat.space
			inodes += stat.inodes
			return true
		})
		return nil
	}, 0); err != nil {
		return err
	}
	// need add sustained file size
	vals, err := m.scanKeys(ctx, m.fmtKey("SS"))
	if err != nil {
		return err
	}
	var attr Attr
	for _, k := range vals {
		b := utils.FromBuffer(k[2:])
		if b.Len() != 16 {
			logger.Warnf("Invalid sustainedKey: %v", k)
			continue
		}
		_ = b.Get64()
		inode := m.decodeInode(b.Get(8))
		if eno := m.doGetAttr(ctx, inode, &attr); eno != 0 {
			logger.Warnf("Get attr of inode %d: %s", inode, eno)
			continue
		}
		used += align4K(attr.Length)
		inodes += 1
	}

	if err := m.scanTrashEntry(ctx, func(_ Ino, length uint64) {
		used += align4K(length)
		inodes += 1
	}); err != nil {
		return err
	}
	logger.Debugf("Used space: %s, inodes: %d", humanize.IBytes(uint64(used)), inodes)
	err = m.setValue(m.counterKey(totalInodes), packCounter(inodes))
	if err != nil {
		return fmt.Errorf("set total inodes: %w", err)
	}
	return m.setValue(m.counterKey(usedSpace), packCounter(used))
}

func (m *kvMeta) doFlushQuotas(ctx Context, quotas []*iQuota) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		keys := make([][]byte, 0, len(quotas))
		qs := make([]*iQuota, 0, len(quotas))
		for _, q := range quotas {
			key, err := m.getQuotaKey(q.qtype, q.qkey)
			if err != nil {
				return err
			}
			keys = append(keys, key)
			qs = append(qs, q)
		}
		for i, v := range tx.gets(keys...) {
			if len(v) == 0 {
				if qs[i].qtype == UserQuotaType || qs[i].qtype == GroupQuotaType {
					quota := &Quota{
						MaxSpace:   -1,
						MaxInodes:  -1,
						UsedSpace:  qs[i].quota.newSpace,
						UsedInodes: qs[i].quota.newInodes,
					}
					tx.set(keys[i], m.packQuota(quota))
				}
				continue
			}
			if len(v) != 32 {
				logger.Errorf("Invalid quota value: %v", v)
				continue
			}
			q := m.parseQuota(v)
			q.UsedSpace += qs[i].quota.newSpace
			q.UsedInodes += qs[i].quota.newInodes
			tx.set(keys[i], m.packQuota(q))
		}
		return nil
	})
}

func (m *kvMeta) dumpEntry(inode Ino, e *DumpedEntry, showProgress func(totalIncr, currentIncr int64)) error {
	ctx := Background()
	return m.client.txn(ctx, func(tx *kvTxn) error {
		a := tx.get(m.inodeKey(inode))
		if a == nil {
			logger.Warnf("inode %d not found", inode)
		}

		attr := &Attr{Nlink: 1}
		m.parseAttr(a, attr)
		if a == nil && e.Attr != nil {
			attr.Typ = typeFromString(e.Attr.Type)
			if attr.Typ == TypeDirectory {
				attr.Nlink = 2
			}
		}
		dumpAttr(attr, e.Attr)
		e.Attr.Inode = inode

		var xattrs []*DumpedXattr
		tx.scan(m.xattrKey(inode, ""), nextKey(m.xattrKey(inode, "")), false, func(k, v []byte) bool {
			xattrs = append(xattrs, &DumpedXattr{string(k[10:]), string(v)}) // "A" + inode + "X"
			return true
		})
		if len(xattrs) > 0 {
			sort.Slice(xattrs, func(i, j int) bool { return xattrs[i].Name < xattrs[j].Name })
			e.Xattrs = xattrs
		}

		accessACl, err := m.getACL(tx, attr.AccessACL)
		if err != nil {
			return err
		}
		e.AccessACL = dumpACL(accessACl)
		defaultACL, err := m.getACL(tx, attr.DefaultACL)
		if err != nil {
			return err
		}
		e.DefaultACL = dumpACL(defaultACL)

		if attr.Typ == TypeFile {
			e.Chunks = e.Chunks[:0]
			vals := make(map[string][]byte)
			tx.scan(m.chunkKey(inode, 0), m.chunkKey(inode, uint32(attr.Length/ChunkSize)+1),
				false, func(k, v []byte) bool {
					vals[string(k)] = v
					return true
				})
			for indx := uint32(0); uint64(indx)*ChunkSize < attr.Length; indx++ {
				v, ok := vals[string(m.chunkKey(inode, indx))]
				if !ok {
					continue
				}
				ss := readSliceBuf(v)
				if ss == nil {
					logger.Errorf("Corrupt value for inode %d chunk index %d", inode, indx)
				}
				slices := make([]*DumpedSlice, 0, len(ss))
				for _, s := range ss {
					slices = append(slices, &DumpedSlice{Id: s.id, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len})
				}
				e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices})
			}
		} else if attr.Typ == TypeSymlink {
			l := tx.get(m.symKey(inode))
			if l == nil {
				logger.Warnf("no link target for inode %d", inode)
			}
			e.Symlink = string(l)
		} else if attr.Typ == TypeDirectory {
			vals, err := m.scanValues(ctx, m.entryKey(inode, ""), 10000, nil)
			if err != nil {
				return err
			}
			if showProgress != nil {
				showProgress(int64(len(e.Entries)), 0)
			}
			if len(vals) < 10000 {
				e.Entries = make(map[string]*DumpedEntry, len(vals))
				for k, value := range vals {
					name := k[10:]
					ce := entryPool.Get()
					ce.Name = name
					typ, inode := m.parseEntry(value)
					ce.Attr.Inode = inode
					ce.Attr.Type = typeToString(typ)
					e.Entries[name] = ce
				}
			}
		}
		return nil
	}, 0)
}

func (m *kvMeta) dumpDir(ctx Context, inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth, threads int, showProgress func(totalIncr, currentIncr int64)) error {
	bwWrite := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}
	if tree.Entries == nil {
		// retry for large directory
		vals, err := m.scanValues(ctx, m.entryKey(inode, ""), -1, nil)
		if err != nil {
			return err
		}
		tree.Entries = make(map[string]*DumpedEntry, len(vals))
		for k, value := range vals {
			name := k[10:]
			ce := entryPool.Get()
			ce.Name = name
			typ, inode := m.parseEntry(value)
			ce.Attr.Inode = inode
			ce.Attr.Type = typeToString(typ)
			tree.Entries[name] = ce
		}
		if showProgress != nil {
			showProgress(int64(len(tree.Entries))-10000, 0)
		}
	}
	var entries []*DumpedEntry
	for _, e := range tree.Entries {
		entries = append(entries, e)
	}
	sort.Slice(entries, func(i, j int) bool { return entries[i].Name < entries[j].Name })
	_ = tree.writeJsonWithOutEntry(bw, depth)

	ms := make([]sync.Mutex, threads)
	conds := make([]*sync.Cond, threads)
	ready := make([]bool, threads)
	var err error
	for c := 0; c < threads; c++ {
		conds[c] = sync.NewCond(&ms[c])
		if c < len(entries) {
			go func(c int) {
				for i := c; i < len(entries) && err == nil; i += threads {
					e := entries[i]
					er := m.dumpEntry(e.Attr.Inode, e, showProgress)
					ms[c].Lock()
					ready[c] = true
					if er != nil {
						err = er
					}
					conds[c].Signal()
					for ready[c] && err == nil {
						conds[c].Wait()
					}
					ms[c].Unlock()
				}
			}(c)
		}
	}

	for i, e := range entries {
		c := i % threads
		ms[c].Lock()
		for !ready[c] && err == nil {
			conds[c].Wait()
		}
		ready[c] = false
		conds[c].Signal()
		ms[c].Unlock()
		if err != nil {
			return err
		}
		if e.Attr.Type == "directory" {
			err = m.dumpDir(ctx, e.Attr.Inode, e, bw, depth+2, threads, showProgress)
		} else {
			err = e.writeJSON(bw, depth+2)
		}
		if err != nil {
			return err
		}
		entries[i] = nil
		entryPool.Put(e)
		if i != len(entries)-1 {
			bwWrite(",")
		}
		if showProgress != nil {
			showProgress(0, 1)
		}
	}
	bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth)))
	return nil
}

func (m *kvMeta) dumpDirFast(inode Ino, tree *DumpedEntry, bw *bufio.Writer, depth int, showProgress func(totalIncr, currentIncr int64)) error {
	bwWrite := func(s string) {
		if _, err := bw.WriteString(s); err != nil {
			panic(err)
		}
	}
	var names []string
	entries := tree.Entries
	for n, de := range entries {
		if !de.Attr.full && de.Attr.Inode != TrashInode {
			logger.Warnf("Corrupt inode: %d, missing attribute", inode)
		}
		names = append(names, n)
	}
	sort.Slice(names, func(i, j int) bool { return names[i] < names[j] })
	_ = tree.writeJsonWithOutEntry(bw, depth)
	for i, name := range names {
		e := entries[name]
		e.Name = name
		inode := e.Attr.Inode
		if e.Attr.Type == "directory" {
			_ = m.dumpDirFast(inode, e, bw, depth+2, showProgress)
		} else {
			_ = e.writeJSON(bw, depth+2)
		}
		if i != len(entries)-1 {
			bwWrite(",")
		}
		if showProgress != nil {
			showProgress(0, 1)
		}
	}
	bwWrite(fmt.Sprintf("\n%s}\n%s}", strings.Repeat(jsonIndent, depth+1), strings.Repeat(jsonIndent, depth)))
	return nil
}

func (m *kvMeta) DumpMeta(w io.Writer, root Ino, threads int, keepSecret, fast, skipTrash bool) (err error) {
	defer func() {
		if p := recover(); p != nil {
			debug.PrintStack()
			if e, ok := p.(error); ok {
				err = e
			} else {
				err = errors.Errorf("DumpMeta error: %v", p)
			}
		}
	}()
	ctx := Background()
	vals, err := m.scanValues(ctx, m.fmtKey("D"), -1, nil)
	if err != nil {
		return err
	}
	dels := make([]*DumpedDelFile, 0, len(vals))
	for k, v := range vals {
		b := utils.FromBuffer([]byte(k[1:])) // "D"
		if b.Len() != 16 {
			logger.Warnf("invalid delfileKey: %s", k)
			continue
		}
		inode := m.decodeInode(b.Get(8))
		dels = append(dels, &DumpedDelFile{inode, b.Get64(), m.parseInt64(v)})
	}

	progress := utils.NewProgress(false)
	var tree, trash *DumpedEntry
	root = m.checkRoot(root)

	bInodes, _ := m.get(m.counterKey(totalInodes))
	inodeTotal := parseCounter(bInodes)
	if root == RootInode && fast { // make snap
		m.snap = make(map[Ino]*DumpedEntry)
		defer func() {
			m.snap = nil
		}()
		bar := progress.AddCountBar("Scan keys", 0)
		bUsed, _ := m.get(m.counterKey(usedSpace))
		used := parseCounter(bUsed)
		var guessKeyTotal int64 = 3 // setting, nextInode, nextChunk
		if inodeTotal > 0 {
			guessKeyTotal += int64(math.Ceil((float64(used/inodeTotal/(64*1024*1024)) + float64(3)) * float64(inodeTotal)))
		}
		bar.SetCurrent(0) // Reset
		bar.SetTotal(guessKeyTotal)
		threshold := 0.1
		var cnt int

		if err = m.cacheACLs(Background()); err != nil {
			return err
		}

		err := m.client.scan(nil, func(key, value []byte) bool {
			if len(key) > 9 && key[0] == 'A' {
				ino := m.decodeInode(key[1:9])
				e := m.snap[ino]
				if e == nil {
					e = &DumpedEntry{Attr: &DumpedAttr{Inode: ino}}
					m.snap[ino] = e
				}
				switch key[9] {
				case 'I':
					attr := &Attr{Nlink: 1}
					m.parseAttr(value, attr)
					dumpAttr(attr, e.Attr)
					e.Attr.Inode = ino
					e.AccessACL = dumpACL(m.aclCache.Get(attr.AccessACL))
					e.DefaultACL = dumpACL(m.aclCache.Get(attr.DefaultACL))
				case 'C':
					indx := binary.BigEndian.Uint32(key[10:])
					ss := readSliceBuf(value)
					if ss == nil {
						logger.Errorf("Corrupt value for inode %d chunk index %d", ino, indx)
					}
					slices := make([]*DumpedSlice, 0, len(ss))
					for _, s := range ss {
						slices = append(slices, &DumpedSlice{Id: s.id, Pos: s.pos, Size: s.size, Off: s.off, Len: s.len})
					}
					e.Chunks = append(e.Chunks, &DumpedChunk{indx, slices})
				case 'D':
					name := string(key[10:])
					typ, inode := m.parseEntry(value)
					child := m.snap[inode]
					if child == nil {
						child = &DumpedEntry{Attr: &DumpedAttr{Inode: inode, Type: typeToString(typ)}}
						m.snap[inode] = child
					} else if child.Attr.Type == "" {
						child.Attr.Type = typeToString(typ)
					}
					if e.Entries == nil {
						e.Entries = map[string]*DumpedEntry{}
					}
					e.Entries[name] = child
				case 'X':
					e.Xattrs = append(e.Xattrs, &DumpedXattr{string(key[10:]), string(value)})
				case 'S':
					e.Symlink = string(value)
				}
			}
			cnt++
			if cnt%100 == 0 && bar.Current() > int64(math.Ceil(float64(guessKeyTotal)*(1-threshold))) {
				guessKeyTotal += int64(math.Ceil(float64(guessKeyTotal) * threshold))
				bar.SetTotal(guessKeyTotal)
			}
			bar.Increment()
			return true
		})
		if err != nil {
			return err
		}
		bar.Done()
		tree = m.snap[root]
		if !skipTrash {
			trash = m.snap[TrashInode]
			if trash == nil {
				trash = &DumpedEntry{
					Attr: &DumpedAttr{
						Inode: TrashInode,
						Type:  "directory",
						Nlink: 2,
					},
				}
				m.snap[TrashInode] = trash
			}
		}
	} else {
		tree = &DumpedEntry{
			Attr: &DumpedAttr{
				Inode: root,
				Type:  "directory",
			},
		}
		if err = m.dumpEntry(root, tree, nil); err != nil {
			return err
		}
		if root == RootInode && !skipTrash {
			trash = &DumpedEntry{
				Attr: &DumpedAttr{
					Inode: TrashInode,
					Type:  "directory",
				},
			}
			if err = m.dumpEntry(TrashInode, trash, nil); err != nil {
				return err
			}
		}
	}

	if tree == nil || tree.Attr == nil {
		return errors.New("The entry of the root inode was not found")
	}
	tree.Name = "FSTree"

	var rs [][]byte
	err = m.txn(Background(), func(tx *kvTxn) error {
		rs = tx.gets(m.counterKey(usedSpace),
			m.counterKey(totalInodes),
			m.counterKey("nextInode"),
			m.counterKey("nextChunk"),
			m.counterKey("nextSession"),
			m.counterKey("nextTrash"))
		return nil
	})
	if err != nil {
		return err
	}
	cs := make([]int64, len(rs))
	for i, r := range rs {
		if r != nil {
			cs[i] = parseCounter(r)
		}
	}

	vals, err = m.scanValues(ctx, m.fmtKey("SS"), -1, nil)
	if err != nil {
		return err
	}
	ss := make(map[uint64][]Ino)
	for k := range vals {
		b := utils.FromBuffer([]byte(k[2:])) // "SS"
		if b.Len() != 16 {
			return fmt.Errorf("invalid sustainedKey: %s", k)
		}
		sid := b.Get64()
		inode := m.decodeInode(b.Get(8))
		ss[sid] = append(ss[sid], inode)
	}
	sessions := make([]*DumpedSustained, 0, len(ss))
	for k, v := range ss {
		sessions = append(sessions, &DumpedSustained{k, v})
	}

	pairs, err := m.scanValues(ctx, m.fmtKey("QD"), -1, func(k, v []byte) bool {
		return len(k) == 10 && len(v) == 32
	})
	if err != nil {
		return err
	}
	quotas := make(map[Ino]*DumpedQuota, len(pairs))
	for k, v := range pairs {
		inode := m.decodeInode([]byte(k[2:]))
		quota := m.parseQuota(v)
		quotas[inode] = &DumpedQuota{quota.MaxSpace, quota.MaxInodes, 0, 0}
	}

	dm := DumpedMeta{
		Setting: *m.getFormat(),
		Counters: &DumpedCounters{
			UsedSpace:   cs[0],
			UsedInodes:  cs[1],
			NextInode:   cs[2],
			NextChunk:   cs[3],
			NextSession: cs[4],
			NextTrash:   cs[5],
		},
		Sustained: sessions,
		DelFiles:  dels,
		Quotas:    quotas,
	}
	if !keepSecret && dm.Setting.SecretKey != "" {
		dm.Setting.SecretKey = "removed"
		logger.Warnf("Secret key is removed for the sake of safety")
	}
	if !keepSecret && dm.Setting.SessionToken != "" {
		dm.Setting.SessionToken = "removed"
		logger.Warnf("Session token is removed for the sake of safety")
	}
	bw, err := dm.writeJsonWithOutTree(w)
	if err != nil {
		return err
	}
	useTotal := root == RootInode && !skipTrash
	bar := progress.AddCountBar("Dumped entries", 1) // with root
	if useTotal {
		bar.SetTotal(inodeTotal)
	}
	bar.Increment()
	if trash != nil {
		trash.Name = "Trash"
		bar.IncrTotal(1)
		bar.Increment()
	}
	showProgress := func(totalIncr, currentIncr int64) {
		if !useTotal {
			bar.IncrTotal(totalIncr)
		}
		bar.IncrInt64(currentIncr)
	}
	if m.snap != nil {
		if err = m.dumpDirFast(root, tree, bw, 1, showProgress); err != nil {
			return err
		}
	} else {
		showProgress(int64(len(tree.Entries)), 0)
		if err = m.dumpDir(ctx, root, tree, bw, 1, threads, showProgress); err != nil {
			return err
		}
	}
	if trash != nil {
		if _, err = bw.WriteString(","); err != nil {
			return err
		}
		if m.snap != nil {
			if err = m.dumpDirFast(TrashInode, trash, bw, 1, showProgress); err != nil {
				return err
			}
		} else {
			showProgress(int64(len(tree.Entries)), 0)
			if err = m.dumpDir(ctx, TrashInode, trash, bw, 1, threads, showProgress); err != nil {
				return err
			}
		}
	}
	if _, err = bw.WriteString("\n}\n"); err != nil {
		return err
	}
	progress.Done()

	return bw.Flush()
}

type pair struct {
	key   []byte
	value []byte
}

func (m *kvMeta) loadEntry(e *DumpedEntry, kv chan *pair, aclMaxId *uint32) {
	inode := e.Attr.Inode
	attr := loadAttr(e.Attr)
	attr.Parent = e.Parents[0]
	if attr.Typ == TypeFile {
		attr.Length = e.Attr.Length
		for _, c := range e.Chunks {
			if len(c.Slices) == 0 {
				continue
			}
			slices := make([]byte, 0, sliceBytes*len(c.Slices))
			for _, s := range c.Slices {
				slices = append(slices, marshalSlice(s.Pos, s.Id, s.Size, s.Off, s.Len)...)
			}
			kv <- &pair{m.chunkKey(inode, c.Index), slices}
		}
	} else if attr.Typ == TypeDirectory {
		attr.Length = 4 << 10
		var stat dirStat
		for name, c := range e.Entries {
			length := uint64(0)
			if typeFromString(c.Attr.Type) == TypeFile {
				length = c.Attr.Length
			}
			stat.length += int64(length)
			stat.space += align4K(length)
			stat.inodes++

			kv <- &pair{m.entryKey(inode, string(unescape(name))), m.packEntry(typeFromString(c.Attr.Type), c.Attr.Inode)}
		}
		kv <- &pair{m.dirStatKey(inode), m.packDirStat(&stat)}
	} else if attr.Typ == TypeSymlink {
		symL := unescape(e.Symlink)
		attr.Length = uint64(len(symL))
		kv <- &pair{m.symKey(inode), []byte(symL)}
	}
	for _, x := range e.Xattrs {
		kv <- &pair{m.xattrKey(inode, x.Name), []byte(unescape(x.Value))}
	}

	attr.AccessACL = m.saveACL(loadACL(e.AccessACL), aclMaxId)
	attr.DefaultACL = m.saveACL(loadACL(e.DefaultACL), aclMaxId)
	kv <- &pair{m.inodeKey(inode), m.marshal(attr)}
}

func (m *kvMeta) LoadMeta(r io.Reader) error {
	var exist bool
	err := m.txn(Background(), func(tx *kvTxn) error {
		exist = tx.exist(m.fmtKey())
		return nil
	})
	if err != nil {
		return err
	}
	if exist {
		return fmt.Errorf("Database %s://%s is not empty", m.Name(), m.addr)
	}

	kv := make(chan *pair, 10000)
	batch := 10000
	if m.Name() == "etcd" {
		batch = 128
	}
	var wg sync.WaitGroup
	for i := 0; i < 10; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			var buffer []*pair
			var total int
			for p := range kv {
				buffer = append(buffer, p)
				total += len(p.key) + len(p.value)
				if len(buffer) >= batch || total > 5<<20 {
					err := m.txn(Background(), func(tx *kvTxn) error {
						for _, p := range buffer {
							tx.set(p.key, p.value)
						}
						return nil
					})
					if err != nil {
						logger.Fatalf("write %d pairs: %s", len(buffer), err)
					}
					buffer = buffer[:0]
					total = 0
				}
			}
			if len(buffer) > 0 {
				err := m.txn(Background(), func(tx *kvTxn) error {
					for _, p := range buffer {
						tx.set(p.key, p.value)
					}
					return nil
				})
				if err != nil {
					logger.Fatalf("write %d pairs: %s", len(buffer), err)
				}
			}
		}()
	}

	var aclMaxId uint32
	dm, counters, parents, refs, err := loadEntries(r, func(e *DumpedEntry) { m.loadEntry(e, kv, &aclMaxId) }, nil)
	if err != nil {
		return err
	}

	if err = m.loadDumpedACLs(Background()); err != nil {
		return err
	}

	format, _ := json.MarshalIndent(dm.Setting, "", "")
	kv <- &pair{m.fmtKey("setting"), format}
	kv <- &pair{m.counterKey(usedSpace), packCounter(counters.UsedSpace)}
	kv <- &pair{m.counterKey(totalInodes), packCounter(counters.UsedInodes)}
	kv <- &pair{m.counterKey("nextInode"), packCounter(counters.NextInode)}
	kv <- &pair{m.counterKey("nextChunk"), packCounter(counters.NextChunk)}
	kv <- &pair{m.counterKey("nextSession"), packCounter(counters.NextSession)}
	kv <- &pair{m.counterKey("nextTrash"), packCounter(counters.NextTrash)}
	for _, d := range dm.DelFiles {
		kv <- &pair{m.delfileKey(d.Inode, d.Length), m.packInt64(d.Expire)}
	}
	for k, v := range refs {
		if v > 1 {
			kv <- &pair{m.sliceKey(k.id, k.size), packCounter(v - 1)}
		}
	}
	close(kv)
	wg.Wait()

	// update nlinks and parents for hardlinks
	st := make(map[Ino]int64)
	defer m.loadDumpedQuotas(Background(), dm.Quotas)
	return m.txn(Background(), func(tx *kvTxn) error {
		for i, ps := range parents {
			if len(ps) > 1 {
				a := tx.get(m.inodeKey(i))
				// reset nlink and parent
				binary.BigEndian.PutUint32(a[47:51], uint32(len(ps))) // nlink
				binary.BigEndian.PutUint64(a[63:71], 0)
				tx.set(m.inodeKey(i), a)
				for k := range st {
					delete(st, k)
				}
				for _, p := range ps {
					st[p] = st[p] + 1
				}
				for p, c := range st {
					tx.set(m.parentKey(i, p), packCounter(c))
				}
			}
		}
		return nil
	})
}

func (m *kvMeta) doCloneEntry(ctx Context, srcIno Ino, parent Ino, name string, ino Ino, originAttr *Attr, cmode uint8, cumask uint16, top bool) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		a := tx.get(m.inodeKey(srcIno))
		if a == nil {
			return syscall.ENOENT
		}
		m.parseAttr(a, originAttr)
		attr := *originAttr
		if eno := m.Access(ctx, srcIno, MODE_MASK_R, &attr); eno != 0 {
			return eno
		}
		attr.Parent = parent
		now := time.Now()
		if cmode&CLONE_MODE_PRESERVE_ATTR == 0 {
			attr.Uid = ctx.Uid()
			attr.Gid = ctx.Gid()
			attr.Mode &= ^cumask
			attr.Atime = now.Unix()
			attr.Mtime = now.Unix()
			attr.Ctime = now.Unix()
			attr.Atimensec = uint32(now.Nanosecond())
			attr.Mtimensec = uint32(now.Nanosecond())
			attr.Ctimensec = uint32(now.Nanosecond())
		}
		// TODO: preserve hardlink
		if attr.Typ == TypeFile && attr.Nlink > 1 {
			attr.Nlink = 1
		}

		if top {
			var pattr Attr
			a = tx.get(m.inodeKey(parent))
			if a == nil {
				return syscall.ENOENT
			}
			m.parseAttr(a, &pattr)
			if pattr.Typ != TypeDirectory {
				return syscall.ENOTDIR
			}
			if (pattr.Flags & FlagImmutable) != 0 {
				return syscall.EPERM
			}
			if tx.get(m.entryKey(parent, name)) != nil {
				return syscall.EEXIST
			}
			if eno := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, &pattr); eno != 0 {
				return eno
			}
			if attr.Typ != TypeDirectory {
				now := time.Now()
				pattr.Mtime = now.Unix()
				pattr.Mtimensec = uint32(now.Nanosecond())
				pattr.Ctime = now.Unix()
				pattr.Ctimensec = uint32(now.Nanosecond())
				tx.set(m.inodeKey(parent), m.marshal(&pattr))
			}
		}

		tx.set(m.inodeKey(ino), m.marshal(&attr))
		prefix := m.xattrKey(srcIno, "")
		tx.scan(prefix, nextKey(prefix), false, func(k, v []byte) bool {
			tx.set(m.xattrKey(ino, string(k[len(prefix):])), v)
			return true
		})
		if top && attr.Typ == TypeDirectory {
			tx.set(m.detachedKey(ino), m.packInt64(time.Now().Unix()))
		} else {
			tx.set(m.entryKey(parent, name), m.packEntry(attr.Typ, ino))
		}

		switch attr.Typ {
		case TypeDirectory:
			tx.set(m.dirStatKey(ino), tx.get(m.dirStatKey(srcIno)))
		case TypeFile:
			if attr.Length != 0 {
				vals := make(map[string][]byte)
				tx.scan(m.chunkKey(srcIno, 0), m.chunkKey(srcIno, uint32(attr.Length/ChunkSize)+1),
					false, func(k, v []byte) bool {
						vals[string(k)] = v
						return true
					})

				refKeys := make([][]byte, 0, len(vals))
				for indx := uint32(0); indx <= uint32(attr.Length/ChunkSize); indx++ {
					if v, ok := vals[string(m.chunkKey(srcIno, indx))]; ok {
						tx.set(m.chunkKey(ino, indx), v)
						ss := readSliceBuf(v)
						for _, s := range ss {
							if s.id > 0 {
								refKeys = append(refKeys, m.sliceKey(s.id, s.size))
							}
						}
					}
				}
				refs := tx.gets(refKeys...)
				for i := range refKeys {
					tx.set(refKeys[i], packCounter(parseCounter(refs[i])+1))
				}
			}
		case TypeSymlink:
			tx.set(m.symKey(ino), tx.get(m.symKey(srcIno)))
		}
		return nil
	}, srcIno))
}

func (m *kvMeta) doFindDetachedNodes(t time.Time) []Ino {
	vals, err := m.scanValues(Background(), m.fmtKey("N"), -1, func(k, v []byte) bool {
		return len(k) == 9 && m.parseInt64(v) < t.Unix()
	})
	if err != nil {
		logger.Errorf("Scan detached nodes error: %s", err)
		return nil
	}
	var inodes []Ino
	for k := range vals {
		inodes = append(inodes, m.decodeInode([]byte(k)[1:]))
	}
	return inodes
}

func (m *kvMeta) doCleanupDetachedNode(ctx Context, ino Ino) syscall.Errno {
	buf, err := m.get(m.inodeKey(ino))
	if err != nil || buf == nil {
		return errno(err)
	}
	rmConcurrent := make(chan int, 10)
	if eno := m.emptyDir(ctx, ino, true, nil, rmConcurrent); eno != 0 {
		return eno
	}
	m.updateStats(-align4K(0), -1)
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		tx.delete(m.inodeKey(ino))
		tx.deleteKeys(m.xattrKey(ino, ""))
		tx.delete(m.dirStatKey(ino))
		tx.delete(m.detachedKey(ino))
		return nil
	}, ino))
}

func (m *kvMeta) doBatchClone(ctx Context, srcParent Ino, dstParent Ino, entries []*Entry, cmode uint8, cumask uint16, result *batchCloneResult) syscall.Errno {
	// TODO: Implement batch clone for TKV backend
	return syscall.ENOTSUP
}

func (m *kvMeta) doAttachDirNode(ctx Context, parent Ino, inode Ino, name string) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		a := tx.get(m.inodeKey(parent))
		if a == nil {
			return syscall.ENOENT
		}
		var pattr Attr
		m.parseAttr(a, &pattr)
		if pattr.Typ != TypeDirectory {
			return syscall.ENOTDIR
		}
		if pattr.Parent > TrashInode {
			return syscall.ENOENT
		}
		if (pattr.Flags & FlagImmutable) != 0 {
			return syscall.EPERM
		}
		if tx.get(m.entryKey(parent, name)) != nil {
			return syscall.EEXIST
		}

		pattr.Nlink++
		now := time.Now()
		pattr.Mtime = now.Unix()
		pattr.Mtimensec = uint32(now.Nanosecond())
		pattr.Ctime = now.Unix()
		pattr.Ctimensec = uint32(now.Nanosecond())
		tx.set(m.inodeKey(parent), m.marshal(&pattr))
		tx.set(m.entryKey(parent, name), m.packEntry(TypeDirectory, inode))
		tx.delete(m.detachedKey(inode))
		return nil
	}, parent))
}

func (m *kvMeta) doTouchAtime(ctx Context, inode Ino, attr *Attr, now time.Time) (bool, error) {
	var updated bool
	err := m.txn(ctx, func(tx *kvTxn) error {
		a := tx.get(m.inodeKey(inode))
		if a == nil {
			return syscall.ENOENT
		}
		m.parseAttr(a, attr)
		if !m.atimeNeedsUpdate(attr, now) {
			return nil
		}
		attr.Atime = now.Unix()
		attr.Atimensec = uint32(now.Nanosecond())
		tx.set(m.inodeKey(inode), m.marshal(attr))
		updated = true
		return nil
	}, inode)
	return updated, err
}

func (m *kvMeta) doSetFacl(ctx Context, ino Ino, aclType uint8, rule *aclAPI.Rule) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		val := tx.get(m.inodeKey(ino))
		if val == nil {
			return syscall.ENOENT
		}
		attr := &Attr{}
		m.parseAttr(val, attr)

		if ctx.Uid() != 0 && ctx.Uid() != attr.Uid {
			return syscall.EPERM
		}

		if attr.Flags&FlagImmutable != 0 {
			return syscall.EPERM
		}

		oriACL, oriMode := getAttrACLId(attr, aclType), attr.Mode

		// https://github.com/torvalds/linux/blob/480e035fc4c714fb5536e64ab9db04fedc89e910/fs/fuse/acl.c#L143-L151
		// TODO: check linux capabilities
		if ctx.Uid() != 0 && !inGroup(ctx, attr.Gid) {
			// clear sgid
			attr.Mode &= 05777
		}

		if rule.IsEmpty() {
			// remove acl
			setAttrACLId(attr, aclType, aclAPI.None)
		} else if rule.IsMinimal() && aclType == aclAPI.TypeAccess {
			// remove acl
			setAttrACLId(attr, aclType, aclAPI.None)
			// set mode
			attr.Mode &= 07000
			attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Group & 7) << 3) | (rule.Other & 7)
		} else {
			// set acl
			rule.InheritPerms(attr.Mode)
			aclId, err := m.insertACL(tx, rule)
			if err != nil {
				return err
			}
			setAttrACLId(attr, aclType, aclId)

			// set mode
			if aclType == aclAPI.TypeAccess {
				attr.Mode &= 07000
				attr.Mode |= ((rule.Owner & 7) << 6) | ((rule.Mask & 7) << 3) | (rule.Other & 7)
			}
		}

		// update attr
		if oriACL != getAttrACLId(attr, aclType) || oriMode != attr.Mode {
			now := time.Now()
			attr.Ctime = now.Unix()
			attr.Ctimensec = uint32(now.Nanosecond())
			tx.set(m.inodeKey(ino), m.marshal(attr))
		}
		return nil
	}, ino))
}

func (m *kvMeta) doGetFacl(ctx Context, ino Ino, aclType uint8, aclId uint32, rule *aclAPI.Rule) syscall.Errno {
	return errno(m.client.txn(ctx, func(tx *kvTxn) error {
		if aclId == aclAPI.None {
			val := tx.get(m.inodeKey(ino))
			if val == nil {
				return syscall.ENOENT
			}
			attr := &Attr{}
			m.parseAttr(val, attr)
			m.of.Update(ino, attr)

			aclId = getAttrACLId(attr, aclType)
		}

		a, err := m.getACL(tx, aclId)
		if err != nil {
			return err
		}
		if a == nil {
			return ENOATTR
		}
		*rule = *a
		return nil
	}, 0))
}

func (m *kvMeta) insertACL(tx *kvTxn, rule *aclAPI.Rule) (uint32, error) {
	if rule == nil || rule.IsEmpty() {
		return aclAPI.None, nil
	}

	if err := m.tryLoadMissACLs(tx); err != nil {
		logger.Warnf("load miss acls error: %s", err)
	}

	var aclId uint32
	if aclId = m.aclCache.GetId(rule); aclId == aclAPI.None {
		newId, err := m.incrCounter(aclCounter, 1)
		if err != nil {
			return aclAPI.None, err
		}
		aclId = uint32(newId)

		tx.set(m.aclKey(aclId), rule.Encode())
		m.aclCache.Put(aclId, rule)
	}
	return aclId, nil
}

func (m *kvMeta) tryLoadMissACLs(tx *kvTxn) error {
	missIds := m.aclCache.GetMissIds()
	if len(missIds) > 0 {
		missKeys := make([][]byte, len(missIds))
		for i, id := range missIds {
			missKeys[i] = m.aclKey(id)
		}

		acls := tx.gets(missKeys...)
		for i, data := range acls {
			var rule aclAPI.Rule
			if len(data) > 0 {
				rule.Decode(data)
			}
			m.aclCache.Put(missIds[i], &rule)
		}
	}
	return nil
}

func (m *kvMeta) getACL(tx *kvTxn, id uint32) (*aclAPI.Rule, error) {
	if id == aclAPI.None {
		return nil, nil
	}
	if cRule := m.aclCache.Get(id); cRule != nil {
		return cRule, nil
	}

	val := tx.get(m.aclKey(id))
	if val == nil {
		return nil, syscall.EIO
	}

	rule := &aclAPI.Rule{}
	rule.Decode(val)
	m.aclCache.Put(id, rule)
	return rule, nil
}

func (m *kvMeta) loadDumpedACLs(ctx Context) error {
	id2Rule := m.aclCache.GetAll()
	if len(id2Rule) == 0 {
		return nil
	}

	return m.txn(ctx, func(tx *kvTxn) error {
		maxId := uint32(0)
		for id, rule := range id2Rule {
			if id > maxId {
				maxId = id
			}
			tx.set(m.aclKey(id), rule.Encode())
		}
		tx.set(m.counterKey(aclCounter), packCounter(int64(maxId)))
		return nil
	})
}

func (m *kvMeta) doStoreToken(ctx Context, token []byte) (id uint32, st syscall.Errno) {
	err := m.txn(ctx, func(tx *kvTxn) error {
		newId, err := m.incrCounter(krbTokenCounter, 1)
		if err != nil {
			return err
		}
		tx.set(m.krbTokenKey(uint32(newId)), token)
		id = uint32(newId)
		return nil
	})
	return id, errno(err)
}

func (m *kvMeta) doUpdateToken(ctx Context, id uint32, token []byte) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		if tx.get(m.krbTokenKey(id)) == nil {
			return syscall.ENOENT
		}
		tx.set(m.krbTokenKey(id), token)
		return nil
	}))
}

func (m *kvMeta) doLoadToken(ctx Context, id uint32) (token []byte, st syscall.Errno) {
	err := m.txn(ctx, func(tx *kvTxn) error {
		token = tx.get(m.krbTokenKey(id))
		if token == nil {
			return syscall.ENOENT
		}
		return nil
	})
	return token, errno(err)
}

func (m *kvMeta) doDeleteTokens(ctx Context, ids []uint32) syscall.Errno {
	return errno(m.txn(ctx, func(tx *kvTxn) error {
		for _, id := range ids {
			tx.delete(m.krbTokenKey(id))
		}
		return nil
	}))
}

func (m *kvMeta) doListTokens(ctx Context) (tokens map[uint32][]byte, st syscall.Errno) {
	tokens = make(map[uint32][]byte)
	err := m.client.scan(m.fmtKey("KD"), func(k, v []byte) bool {
		rb := utils.FromBuffer(k[2:])
		id := rb.Get32()
		tokens[id] = v
		return true
	})
	return tokens, errno(err)
}

type kvDirHandler struct {
	dirHandler
}

func (m *kvMeta) newDirHandler(inode Ino, plus bool, entries []*Entry) DirHandler {
	s := &kvDirHandler{
		dirHandler: dirHandler{
			inode:       inode,
			plus:        plus,
			initEntries: entries,
			fetcher:     m.getDirFetcher(),
			batchNum:    DirBatchNum["kv"],
		},
	}
	s.batch, _ = s.fetch(Background(), 0)
	return s
}

func (m *kvMeta) getDirFetcher() dirFetcher {
	return func(ctx Context, inode Ino, cursor interface{}, offset, limit int, plus bool) (interface{}, []*Entry, error) {
		var startKey []byte
		sCursor := ""
		var total int
		if cursor == nil {
			if offset > 0 {
				total += offset
			}
		} else {
			limit += 1 // skip the cursor
			sCursor = string(cursor.([]byte))
		}
		total += limit
		startKey = m.entryKey(inode, sCursor)
		endKey := nextKey(m.entryKey(inode, ""))

		keys, vals, err := m.scan(startKey, endKey, total, nil)
		if err != nil {
			return nil, nil, err
		}

		if cursor != nil {
			keys, vals = keys[1:], vals[1:]
		}

		if total > limit && offset <= len(keys) {
			keys, vals = keys[offset:], vals[offset:]
		}

		prefix := len(m.entryKey(inode, ""))
		entries := make([]*Entry, 0, len(keys))
		var name []byte
		var typ uint8
		var ino Ino
		for i, buf := range vals {
			name = keys[i]
			typ, ino = m.parseEntry(buf)
			if len(name) == prefix {
				logger.Errorf("Corrupt entry with empty name: inode %d parent %d", ino, inode)
				continue
			}
			entries = append(entries, &Entry{
				Inode: ino,
				Name:  []byte(name)[prefix:],
				Attr:  &Attr{Typ: typ},
			})
		}

		if plus {
			if err = m.fillAttr(entries); err != nil {
				return nil, nil, err
			}
		}

		if len(entries) == 0 {
			return nil, nil, nil
		}
		return entries[len(entries)-1].Name, entries, nil
	}
}


================================================
FILE: pkg/meta/tkv_badger.go
================================================
//go:build !nobadger
// +build !nobadger

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"context"
	"time"

	"github.com/dgraph-io/badger/v4"
	"github.com/juicedata/juicefs/pkg/utils"
)

type badgerTxn struct {
	t *badger.Txn
	c *badger.DB
}

func (tx *badgerTxn) get(key []byte) []byte {
	item, err := tx.t.Get(key)
	if err == badger.ErrKeyNotFound {
		return nil
	}
	if err != nil {
		panic(err)
	}
	value, err := item.ValueCopy(nil)
	if err != nil {
		panic(err)
	}
	return value
}

func (tx *badgerTxn) gets(keys ...[]byte) [][]byte {
	values := make([][]byte, len(keys))
	for i, key := range keys {
		values[i] = tx.get(key)
	}
	return values
}

func (tx *badgerTxn) scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool) {
	var prefix bool
	options := badger.IteratorOptions{}
	if keysOnly {
		options.PrefetchValues = false
		options.PrefetchSize = 0
	}
	if bytes.Equal(nextKey(begin), end) {
		prefix = true
		options.Prefix = begin
	}
	it := tx.t.NewIterator(options)
	if prefix {
		it.Rewind()
	} else {
		it.Seek(begin)
	}
	defer it.Close()
	for ; it.Valid(); it.Next() {
		item := it.Item()
		if !prefix && bytes.Compare(item.Key(), end) >= 0 {
			break
		}
		var value []byte
		if !keysOnly {
			var err error
			value, err = item.ValueCopy(nil)
			if err != nil {
				panic(err)
			}
		}
		if !handler(item.KeyCopy(nil), value) {
			break
		}
	}
}

func (tx *badgerTxn) exist(prefix []byte) bool {
	it := tx.t.NewIterator(badger.IteratorOptions{
		Prefix:       prefix,
		PrefetchSize: 1,
	})
	defer it.Close()
	it.Rewind()
	return it.Valid()
}

func (tx *badgerTxn) set(key, value []byte) {
	if err := tx.t.Set(key, value); err != nil {
		panic(err)
	}
}

func (tx *badgerTxn) append(key []byte, value []byte) {
	list := append(tx.get(key), value...)
	tx.set(key, list)
}

func (tx *badgerTxn) incrBy(key []byte, value int64) int64 {
	buf := tx.get(key)
	newCounter := parseCounter(buf)
	if value != 0 {
		newCounter += value
		tx.set(key, packCounter(newCounter))
	}
	return newCounter
}

func (tx *badgerTxn) delete(key []byte) {
	if err := tx.t.Delete(key); err != nil {
		panic(err)
	}
}

type badgerClient struct {
	client *badger.DB
	ticker *time.Ticker
	done chan struct{}
}

func (c *badgerClient) name() string {
	return "badger"
}

func (c *badgerClient) shouldRetry(err error) bool {
	return err == badger.ErrConflict
}

func (c *badgerClient) config(key string) interface{} {
	return nil
}

func (c *badgerClient) simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	return c.txn(ctx, f, retry)
}

func (c *badgerClient) txn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	tx := &badgerTxn{c.client.NewTransaction(true), c.client}
	defer func() { tx.t.Discard() }()
	defer func() {
		if r := recover(); r != nil {
			fe, ok := r.(error)
			if ok {
				err = fe
			} else {
				panic(r)
			}
		}
	}()
	err = f(&kvTxn{tx, retry})
	if err != nil {
		return err
	}
	// tx.t may differ from the original
	return tx.t.Commit()
}

func (c *badgerClient) scan(prefix []byte, handler func(key []byte, value []byte) bool) error {
	tx := c.client.NewTransaction(false)
	defer tx.Discard()
	it := tx.NewIterator(badger.IteratorOptions{
		Prefix:         prefix,
		PrefetchValues: true,
		PrefetchSize:   10240,
	})
	defer it.Close()
	for it.Rewind(); it.Valid(); it.Next() {
		item := it.Item()
		value, err := item.ValueCopy(nil)
		if err != nil {
			return err
		}
		if !handler(item.KeyCopy(nil), value) {
			break
		}
	}
	return nil
}

func (c *badgerClient) reset(prefix []byte) error {
	if prefix == nil {
		return c.client.DropAll()
	}
	return c.client.DropPrefix(prefix)
}

func (c *badgerClient) close() error {
	close(c.done)
	c.ticker.Stop()
	return c.client.Close()
}

func (c *badgerClient) gc() {}

func newBadgerClient(addr string) (tkvClient, error) {
	opt := badger.DefaultOptions(addr)
	opt.Logger = utils.GetLogger("badger")
	opt.MetricsEnabled = false
	client, err := badger.Open(opt)
	if err != nil {
		return nil, err
	}
	ticker := time.NewTicker(time.Hour)
	done := make(chan struct{})
	go func() {
		for {
			select {
			case <-ticker.C:
				for client.RunValueLogGC(0.7) == nil {
				}
			case <-done:
				return
			}
		}
	}()
	return &badgerClient{client, ticker, done}, nil
}

func init() {
	Register("badger", newKVMeta)
	drivers["badger"] = newBadgerClient
}


================================================
FILE: pkg/meta/tkv_bak.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
	"sort"
	"sync"
	"sync/atomic"

	"github.com/juicedata/juicefs/pkg/meta/pb"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
	"golang.org/x/sync/errgroup"
	"google.golang.org/protobuf/proto"
)

var (
	kvDumpBatchSize = 10000
)

func (m *kvMeta) dump(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	var dumps = []func(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error{
		m.dumpFormat,
		m.dumpCounters,
		m.dumpMix, // node, edge, chunk, symlink, xattr, parent
		m.dumpSustained,
		m.dumpDelFiles,
		m.dumpSliceRef,
		m.dumpACL,
		m.dumpQuota,
		m.dumpDirStat,
	}
	ts := m.client.config("startTS")
	if ts == nil && m.Name() == "tikv" {
		return errors.New("failed to get startTS, which is required for TiKV to ensure consistency")
	}
	if ts != nil {
		logger.Infof("dump kv with startTS: %d", ts.(uint64))
		ctx = ctx.WithValue(txSessionKey{}, ts)
	}

	for _, f := range dumps {
		err := f(ctx, opt, ch)
		if err != nil {
			return err
		}
	}
	return nil
}

func (m *kvMeta) load(ctx Context, typ int, opt *LoadOption, val proto.Message) error {
	return errors.New("not implemented, use kvMeta.LoadMetaV2 instead")
}

func (m *kvMeta) prepareLoad(ctx Context, opt *LoadOption) error {
	opt.check()

	var exist bool
	err := m.txn(ctx, func(tx *kvTxn) error {
		exist = tx.exist(m.fmtKey())
		return nil
	})
	if err != nil {
		return err
	}
	if exist {
		return fmt.Errorf("database %s://%s is not empty", m.Name(), m.addr)
	}
	return nil
}

func printSums(sums map[int]*atomic.Uint64) string {
	var p string
	for typ, sum := range sums {
		p += fmt.Sprintf("%d num: %d\n", typ, sum.Load())
	}
	return p
}

func (m *kvMeta) dumpCounters(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		counters := make([]*pb.Counter, 0, len(counterNames))
		for _, name := range counterNames {
			val := tx.get(m.counterKey(name))
			counters = append(counters, &pb.Counter{Key: name, Value: parseCounter(val)})
		}
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Counters: counters}})
	})
}

func splitInodeRange(n byte) [][2]byte {
	if n == 0 {
		return nil
	}

	step := 0xFF / n
	intervals := make([][2]byte, 0, n)

	for i := byte(0); i < n; i++ {
		start, end := i*step, (i+1)*step
		if i == n-1 {
			end = 0xFF
		}
		intervals = append(intervals, [2]byte{start, end})
	}
	return intervals
}

func (m *kvMeta) dumpMix(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	pools := map[int]*sync.Pool{
		segTypeNode:    {New: func() interface{} { return &pb.Node{} }},
		segTypeEdge:    {New: func() interface{} { return &pb.Edge{} }},
		segTypeChunk:   {New: func() interface{} { return &pb.Chunk{} }},
		segTypeSymlink: {New: func() interface{} { return &pb.Symlink{} }},
		segTypeXattr:   {New: func() interface{} { return &pb.Xattr{} }},
		segTypeParent:  {New: func() interface{} { return &pb.Parent{} }},
	}
	release := func(msg proto.Message) {
		batch := msg.(*pb.Batch)
		for _, node := range batch.Nodes {
			pools[segTypeNode].Put(node)
		}
		for _, edge := range batch.Edges {
			pools[segTypeEdge].Put(edge)
		}
		for _, chunk := range batch.Chunks {
			pools[segTypeChunk].Put(chunk)
		}
		for _, symlink := range batch.Symlinks {
			pools[segTypeSymlink].Put(symlink)
		}
		for _, xattr := range batch.Xattrs {
			pools[segTypeXattr].Put(xattr)
		}
		for _, parent := range batch.Parents {
			pools[segTypeParent].Put(parent)
		}
	}

	var sums = map[int]*atomic.Uint64{
		segTypeNode:    {},
		segTypeEdge:    {},
		segTypeChunk:   {},
		segTypeSymlink: {},
		segTypeXattr:   {},
		segTypeParent:  {},
	}
	createMsg := func(typ int) *pb.Batch {
		switch typ {
		case segTypeNode:
			return &pb.Batch{Nodes: make([]*pb.Node, 0, kvDumpBatchSize)}
		case segTypeEdge:
			return &pb.Batch{Edges: make([]*pb.Edge, 0, kvDumpBatchSize)}
		case segTypeChunk:
			return &pb.Batch{Chunks: make([]*pb.Chunk, 0, kvDumpBatchSize)}
		case segTypeSymlink:
			return &pb.Batch{Symlinks: make([]*pb.Symlink, 0, kvDumpBatchSize)}
		case segTypeXattr:
			return &pb.Batch{Xattrs: make([]*pb.Xattr, 0, kvDumpBatchSize)}
		case segTypeParent:
			return &pb.Batch{Parents: make([]*pb.Parent, 0, kvDumpBatchSize)}
		default:
			return nil
		}
	}
	var lists = make(map[int]*pb.Batch)
	for typ := range sums {
		lists[typ] = createMsg(typ)
	}

	var err error // final error
	eg, egCtx := errgroup.WithContext(ctx)
	eg.SetLimit(opt.Threads)

	type entry struct {
		k []byte
		v []byte
	}
	entryPool := &sync.Pool{
		New: func() interface{} {
			return &entry{}
		},
	}
	entryCh := make(chan *entry, kvDumpBatchSize)

	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		var e *entry
		var typ int
		var n int
		for {
			select {
			case <-ctx.Done():
				return
			case e = <-entryCh:
			}
			if e == nil {
				break
			}
			ino := m.decodeInode(e.k[1:9])
			switch e.k[9] {
			case 'I':
				typ = segTypeNode
				node := pools[typ].Get().(*pb.Node)
				node.Inode = uint64(ino)
				node.Data = e.v
				lists[typ].Nodes = append(lists[typ].Nodes, node)
				n = len(lists[typ].Nodes)
			case 'D':
				typ = segTypeEdge
				edge := pools[typ].Get().(*pb.Edge)
				edge.Parent = uint64(ino)
				edge.Name = e.k[10:]
				nTyp, inode := m.parseEntry(e.v)
				edge.Type, edge.Inode = uint32(nTyp), uint64(inode)
				lists[typ].Edges = append(lists[typ].Edges, edge)
				n = len(lists[typ].Edges)
			case 'C':
				typ = segTypeChunk
				chk := pools[typ].Get().(*pb.Chunk)
				chk.Inode = uint64(ino)
				chk.Index = binary.BigEndian.Uint32(e.k[10:])
				chk.Slices = e.v
				lists[typ].Chunks = append(lists[typ].Chunks, chk)
				n = len(lists[typ].Chunks)
			case 'S':
				typ = segTypeSymlink
				sym := pools[typ].Get().(*pb.Symlink)
				sym.Inode = uint64(ino)
				sym.Target = unescape(string(e.v))
				lists[typ].Symlinks = append(lists[typ].Symlinks, sym)
				n = len(lists[typ].Symlinks)
			case 'X':
				typ = segTypeXattr
				xattr := pools[typ].Get().(*pb.Xattr)
				xattr.Inode = uint64(ino)
				xattr.Name = string(e.k[10:])
				xattr.Value = e.v
				lists[typ].Xattrs = append(lists[typ].Xattrs, xattr)
				n = len(lists[typ].Xattrs)
			case 'P':
				typ = segTypeParent
				parent := pools[typ].Get().(*pb.Parent)
				parent.Inode = uint64(ino)
				parent.Parent = uint64(m.decodeInode(e.k[10:]))
				parent.Cnt = parseCounter(e.v)
				lists[typ].Parents = append(lists[typ].Parents, parent)
				n = len(lists[typ].Parents)
			default:
				typ = segTypeUnknown
			}
			entryPool.Put(e)
			if typ != segTypeUnknown {
				sums[typ].Add(1)
				if n >= kvDumpBatchSize {
					if err = dumpResult(ctx, ch, &dumpedResult{lists[typ], release}); err != nil {
						return
					}
					lists[typ] = createMsg(typ)
				}
			}
		}
		for _, list := range lists {
			_ = dumpResult(ctx, ch, &dumpedResult{list, release})
		}
	}()

	if opt.Threads > 0xFF {
		opt.Threads = 0xFF
	}
	rs := splitInodeRange(byte(opt.Threads))
	for i, r := range rs {
		start, end := []byte{'A', r[0]}, []byte{'A', r[1]}
		if i == len(rs)-1 {
			end = []byte{'B'}
		}
		logger.Debugf("range: %v-%v", start, end)
		eg.Go(func() error {
			return m.txn(WrapContext(egCtx), func(tx *kvTxn) error {
				var ent *entry
				tx.scan(start, end, false, func(k, v []byte) bool {
					if egCtx.Err() != nil {
						return false
					}
					if len(k) <= 9 || k[0] != 'A' {
						return true
					}
					ent = entryPool.Get().(*entry)
					ent.k, ent.v = k, v
					entryCh <- ent
					return true
				})
				return nil
			})
		})
	}

	if iErr := eg.Wait(); iErr != nil {
		ctx.Cancel()
		wg.Wait()
		return iErr
	}

	close(entryCh)
	wg.Wait()
	return err
}

func (m *kvMeta) dumpSustained(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		sids := make(map[uint64][]uint64)
		cnt := 0
		tx.scan(m.fmtKey("SS"), nextKey(m.fmtKey("SS")), true, func(k, v []byte) bool {
			b := utils.FromBuffer([]byte(k[2:])) // "SS"
			if b.Len() != 16 {
				logger.Warnf("invalid sustainedKey: %s", k)
				return true
			}
			sid := b.Get64()
			inode := uint64(m.decodeInode(b.Get(8)))
			sids[sid] = append(sids[sid], inode)
			cnt++
			return true
		})

		sustained := make([]*pb.Sustained, 0, cnt)
		for sid, inodes := range sids {
			sustained = append(sustained, &pb.Sustained{
				Sid:    sid,
				Inodes: inodes,
			})
		}
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Sustained: sustained}})
	})
}

func (m *kvMeta) dumpDelFiles(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		delFiles := make([]*pb.DelFile, 0, kvDumpBatchSize)
		tx.scan(m.fmtKey("D"), nextKey(m.fmtKey("D")), false, func(k, v []byte) bool {
			b := utils.FromBuffer([]byte(k[1:])) // "D"
			if b.Len() != 16 {
				logger.Warnf("invalid delfileKey: %s", k)
				return true
			}
			inode := m.decodeInode(b.Get(8))
			delFiles = append(delFiles, &pb.DelFile{Inode: uint64(inode), Length: b.Get64(), Expire: m.parseInt64(v)})
			return true
		})
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Delfiles: delFiles}})
	})
}

func (m *kvMeta) dumpSliceRef(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		sliceRefs := make([]*pb.SliceRef, 0, kvDumpBatchSize)
		tx.scan(m.fmtKey("K"), nextKey(m.fmtKey("K")), false, func(k, v []byte) bool {
			b := utils.FromBuffer([]byte(k[1:])) // "K"
			if b.Len() != 12 {
				logger.Warnf("invalid sliceRefKey: %s", k)
				return true
			}
			id := b.Get64()
			size := b.Get32()
			sliceRefs = append(sliceRefs, &pb.SliceRef{Id: id, Size: size, Refs: parseCounter(v) + 1})
			return true
		})
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{SliceRefs: sliceRefs}})
	})
}

func (m *kvMeta) dumpACL(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		acls := make([]*pb.Acl, 0, 128)
		tx.scan(m.fmtKey("R"), nextKey(m.fmtKey("R")), false, func(k, v []byte) bool {
			b := utils.FromBuffer([]byte(k[1:])) // "R"
			if b.Len() != 4 {
				logger.Warnf("invalid aclKey: %s", k)
				return true
			}
			acls = append(acls, &pb.Acl{Id: b.Get32(), Data: v})
			return true
		})
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Acls: acls}})
	})
}

func (m *kvMeta) dumpQuota(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		quotas := make([]*pb.Quota, 0, 128)
		tx.scan(m.fmtKey("QD"), nextKey(m.fmtKey("QD")), false, func(k, v []byte) bool {
			q := &pb.Quota{}
			q.Inode = uint64(m.decodeInode([]byte(k)[2:]))
			b := utils.FromBuffer(v)
			q.MaxSpace = int64(b.Get64())
			q.MaxInodes = int64(b.Get64())
			q.UsedSpace = int64(b.Get64())
			q.UsedInodes = int64(b.Get64())
			quotas = append(quotas, q)
			return true
		})
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Quotas: quotas}})
	})
}

func (m *kvMeta) dumpDirStat(ctx Context, opt *DumpOption, ch chan<- *dumpedResult) error {
	return m.txn(ctx, func(tx *kvTxn) error {
		stats := make([]*pb.Stat, 0, kvDumpBatchSize)
		tx.scan(m.fmtKey("U"), nextKey(m.fmtKey("U")), false, func(k, v []byte) bool {
			s := &pb.Stat{}
			s.Inode = uint64(m.decodeInode([]byte(k)[1:]))
			b := utils.FromBuffer(v)
			s.DataLength = int64(b.Get64())
			s.UsedSpace = int64(b.Get64())
			s.UsedInodes = int64(b.Get64())
			stats = append(stats, s)
			return true
		})
		return dumpResult(ctx, ch, &dumpedResult{msg: &pb.Batch{Dirstats: stats}})
	})
}

func (m *kvMeta) insertKVs(ctx Context, pairs []*pair, threads int) error {
	if len(pairs) == 0 {
		return nil
	}

	sort.Slice(pairs, func(i, j int) bool {
		return bytes.Compare(pairs[i].key, pairs[j].key) < 0
	})

	maxSize, maxNum := 5<<20, m.maxTxnBatchNum()
	n := len(pairs)
	last, num, size := 0, 0, 0

	eg, egCtx := errgroup.WithContext(ctx)
	eg.SetLimit(threads)

	for i, pair := range pairs {
		num++
		size += len(pair.key) + len(pair.value)
		if num >= maxNum || size >= maxSize || i >= n-1 {
			ePairs := pairs[last : i+1]
			num, size, last = 0, 0, i+1
			eg.Go(func() error {
				return m.txn(WrapContext(egCtx), func(tx *kvTxn) error {
					for _, ep := range ePairs {
						tx.set(ep.key, ep.value)
					}
					return nil
				})
			})
		}
	}
	return eg.Wait()
}

func (m *kvMeta) loadFormat(ctx Context, msg proto.Message, pairs *[]*pair) {
	*pairs = append(*pairs, &pair{m.fmtKey("setting"), msg.(*pb.Format).Data})
}

func (m *kvMeta) loadCounters(ctx Context, msg proto.Message, pairs *[]*pair) {
	for _, counter := range msg.(*pb.Batch).Counters {
		*pairs = append(*pairs, &pair{m.counterKey(counter.Key), packCounter(counter.Value)})
	}
}

func (m *kvMeta) loadNodes(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, pn := range batch.Nodes {
		*pairs = append(*pairs, &pair{m.inodeKey(Ino(pn.Inode)), pn.Data})
	}
}

func (m *kvMeta) loadChunks(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, chk := range batch.Chunks {
		*pairs = append(*pairs, &pair{m.chunkKey(Ino(chk.Inode), chk.Index), chk.Slices})
	}
}

func (m *kvMeta) loadEdges(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, edge := range batch.Edges {
		buff := utils.NewBuffer(9)
		buff.Put8(uint8(edge.Type))
		buff.Put64(edge.Inode)
		*pairs = append(*pairs, &pair{m.entryKey(Ino(edge.Parent), string(edge.Name)), buff.Bytes()})
	}
}

func (m *kvMeta) loadSymlinks(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, symlink := range batch.Symlinks {
		*pairs = append(*pairs, &pair{m.symKey(Ino(symlink.Inode)), []byte(symlink.Target)})
	}
}

func (m *kvMeta) loadSustained(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, sustained := range batch.Sustained {
		for _, inode := range sustained.Inodes {
			*pairs = append(*pairs, &pair{m.sustainedKey(sustained.Sid, Ino(inode)), []byte{1}})
		}
	}
}

func (m *kvMeta) loadDelFiles(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, f := range batch.Delfiles {
		*pairs = append(*pairs, &pair{m.delfileKey(Ino(f.Inode), f.Length), m.packInt64(f.Expire)})
	}
}

func (m *kvMeta) loadSliceRefs(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, r := range batch.SliceRefs {
		*pairs = append(*pairs, &pair{m.sliceKey(r.Id, r.Size), packCounter(r.Refs - 1)})
	}
}

func (m *kvMeta) loadAcl(ctx Context, msg proto.Message, pairs *[]*pair, maxAclId *uint32) {
	batch := msg.(*pb.Batch)
	for _, acl := range batch.Acls {
		if acl.Id > *maxAclId {
			*maxAclId = acl.Id
		}
		*pairs = append(*pairs, &pair{m.aclKey(acl.Id), acl.Data})
	}
}

func (m *kvMeta) loadXattrs(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, xattr := range batch.Xattrs {
		*pairs = append(*pairs, &pair{m.xattrKey(Ino(xattr.Inode), xattr.Name), xattr.Value})
	}
}

func (m *kvMeta) loadQuota(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, q := range batch.Quotas {
		b := utils.NewBuffer(32)
		b.Put64(uint64(q.MaxSpace))
		b.Put64(uint64(q.MaxInodes))
		b.Put64(uint64(q.UsedSpace))
		b.Put64(uint64(q.UsedInodes))
		*pairs = append(*pairs, &pair{m.dirQuotaKey(Ino(q.Inode)), b.Bytes()})
	}
}

func (m *kvMeta) loadDirStats(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, s := range batch.Dirstats {
		b := utils.NewBuffer(24)
		b.Put64(uint64(s.DataLength))
		b.Put64(uint64(s.UsedSpace))
		b.Put64(uint64(s.UsedInodes))
		*pairs = append(*pairs, &pair{m.dirStatKey(Ino(s.Inode)), b.Bytes()})
	}
}

func (m *kvMeta) loadParents(ctx Context, msg proto.Message, pairs *[]*pair) {
	batch := msg.(*pb.Batch)
	for _, parent := range batch.Parents {
		*pairs = append(*pairs, &pair{m.parentKey(Ino(parent.Inode), Ino(parent.Parent)), packCounter(parent.Cnt)})
	}
}

func (m *kvMeta) maxTxnBatchNum() int {
	if m.Name() == "etcd" {
		return 128
	}
	return 10240
}

func (m *kvMeta) LoadMetaV2(ctx Context, r io.Reader, opt *LoadOption) error {
	if opt == nil {
		opt = &LoadOption{}
	}
	if err := m.en.prepareLoad(ctx, opt); err != nil {
		return err
	}

	type task struct {
		typ int
		msg proto.Message
	}
	taskCh := make(chan *task, 100)

	var (
		wg       sync.WaitGroup
		maxAclId uint32
	)
	workerFunc := func(ctx Context, taskCh <-chan *task) {
		defer wg.Done()
		var task *task
		maxNum := m.maxTxnBatchNum() * opt.Threads
		pairs := make([]*pair, 0, maxNum)
		for {
			select {
			case <-ctx.Done():
				return
			case task = <-taskCh:
			}
			if task == nil {
				if err := m.insertKVs(ctx, pairs, opt.Threads); err != nil {
					logger.Errorf("insert kvs failed: %v", err)
				}

				if maxAclId != 0 {
					if err := m.txn(ctx, func(tx *kvTxn) error {
						tx.set(m.counterKey(aclCounter), packCounter(int64(maxAclId)))
						return nil
					}); err != nil {
						logger.Errorf("update maxAclId failed: %v", err)
					}
				}
				break
			}
			switch task.typ {
			case segTypeFormat:
				m.loadFormat(ctx, task.msg, &pairs)
			case segTypeCounter:
				m.loadCounters(ctx, task.msg, &pairs)
			case segTypeNode:
				m.loadNodes(ctx, task.msg, &pairs)
			case segTypeEdge:
				m.loadEdges(ctx, task.msg, &pairs)
			case segTypeChunk:
				m.loadChunks(ctx, task.msg, &pairs)
			case segTypeSymlink:
				m.loadSymlinks(ctx, task.msg, &pairs)
			case segTypeXattr:
				m.loadXattrs(ctx, task.msg, &pairs)
			case segTypeParent:
				m.loadParents(ctx, task.msg, &pairs)
			case segTypeSustained:
				m.loadSustained(ctx, task.msg, &pairs)
			case segTypeDelFile:
				m.loadDelFiles(ctx, task.msg, &pairs)
			case segTypeSliceRef:
				m.loadSliceRefs(ctx, task.msg, &pairs)
			case segTypeAcl:
				m.loadAcl(ctx, task.msg, &pairs, &maxAclId)
			case segTypeQuota:
				m.loadQuota(ctx, task.msg, &pairs)
			case segTypeStat:
				m.loadDirStats(ctx, task.msg, &pairs)
			}
			if len(pairs) >= maxNum {
				if err := m.insertKVs(ctx, pairs, opt.Threads); err != nil {
					logger.Errorf("insert kvs failed: %v", err)
					ctx.Cancel()
					return
				}
				pairs = make([]*pair, 0, maxNum)
			}
		}
	}

	wg.Add(1)
	go workerFunc(ctx, taskCh)

	bak := &BakFormat{}
	for {
		seg, err := bak.ReadSegment(r)
		if err != nil {
			if errors.Is(err, errBakEOF) {
				close(taskCh)
				break
			}
			ctx.Cancel()
			wg.Wait()
			return err
		}

		select {
		case <-ctx.Done():
			wg.Wait()
			return ctx.Err()
		case taskCh <- &task{int(seg.typ), seg.val}:
			if opt.Progress != nil {
				opt.Progress(seg.Name(), int(seg.num()))
			}
		}
	}
	wg.Wait()
	return nil
}


================================================
FILE: pkg/meta/tkv_etcd.go
================================================
//go:build !noetcd
// +build !noetcd

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"context"
	"crypto/tls"
	"fmt"
	"net"
	"net/url"
	"strings"
	"time"

	"github.com/pkg/errors"
	etcd "go.etcd.io/etcd/client/v3"
	"go.etcd.io/etcd/pkg/transport"
)

type etcdTxn struct {
	ctx      context.Context
	kv       etcd.KV
	observed map[string]int64
	buffer   map[string][]byte
}

func (tx *etcdTxn) get(key []byte) []byte {
	k := string(key)
	if v, ok := tx.buffer[k]; ok {
		return v
	}
	resp, err := tx.kv.Get(tx.ctx, k, etcd.WithLimit(1))
	if err != nil {
		panic(fmt.Errorf("get %v: %s", k, err))
	}
	if resp.Count == 0 {
		tx.observed[k] = 0
		return nil
	}
	if resp.Count > 1 {
		panic(fmt.Errorf("expect 1 keys but got %d", resp.Count))
	}
	for _, pair := range resp.Kvs {
		if bytes.Equal(pair.Key, key) {
			tx.observed[k] = pair.ModRevision
			return pair.Value
		} else {
			panic(fmt.Errorf("expect key %v, but got %v", k, string(pair.Key)))
		}
	}
	panic("unreachable")
}

func (tx *etcdTxn) gets(keys ...[]byte) [][]byte {
	if len(keys) > 128 {
		var rs = make([][]byte, 0, len(keys))
		for i := 0; i < len(keys); i += 128 {
			rs = append(rs, tx.gets(keys[i:min(i+128, len(keys))]...)...)
		}
		return rs
	}
	ops := make([]etcd.Op, len(keys))
	for i, key := range keys {
		ops[i] = etcd.OpGet(string(key))
	}
	r, err := tx.kv.Do(tx.ctx, etcd.OpTxn(nil, ops, nil))
	if err != nil {
		panic(fmt.Errorf("batch get with %d keys: %s", len(keys), err))
	}
	rs := make(map[string][]byte)
	for _, res := range r.Txn().Responses {
		for _, p := range res.GetResponseRange().Kvs {
			k := string(p.Key)
			tx.observed[k] = p.ModRevision
			rs[k] = p.Value
		}
	}
	values := make([][]byte, len(keys))
	for i, key := range keys {
		k := string(key)
		if v, ok := tx.buffer[k]; ok {
			values[i] = v
			continue
		}
		values[i] = rs[k]
		if len(values[i]) == 0 {
			tx.observed[k] = 0
		}
	}
	return values
}

func (tx *etcdTxn) scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool) {
	opts := []etcd.OpOption{etcd.WithRange(string(end))}
	if keysOnly {
		opts = append(opts, etcd.WithKeysOnly())
	}
	resp, err := tx.kv.Get(tx.ctx, string(begin), opts...)
	if err != nil {
		panic(fmt.Errorf("get range [%v-%v): %s", string(begin), string(end), err))
	}
	for _, kv := range resp.Kvs {
		tx.observed[string(kv.Key)] = kv.ModRevision
		if !handler(kv.Key, kv.Value) {
			break
		}
	}
}

func (tx *etcdTxn) exist(prefix []byte) bool {
	resp, err := tx.kv.Get(tx.ctx, string(prefix), etcd.WithPrefix(), etcd.WithCountOnly())
	if err != nil {
		panic(fmt.Errorf("get prefix %v with count only: %s", string(prefix), err))
	}
	return resp.Count > 0
}

func (tx *etcdTxn) set(key, value []byte) {
	tx.buffer[string(key)] = value
	if len(tx.buffer) >= 128 {
		err := tx.commmit()
		if err != nil {
			panic(err)
		}
		tx.observed = make(map[string]int64)
		tx.buffer = make(map[string][]byte)
	}
}

func (tx *etcdTxn) append(key []byte, value []byte) {
	new := append(tx.get(key), value...)
	tx.set(key, new)
}

func (tx *etcdTxn) incrBy(key []byte, value int64) int64 {
	buf := tx.get(key)
	new := parseCounter(buf)
	if value != 0 {
		new += value
		tx.set(key, packCounter(new))
	}
	return new
}

func (tx *etcdTxn) delete(key []byte) {
	tx.buffer[string(key)] = nil
}

func (tx *etcdTxn) commmit() error {
	start := time.Now()
	var conds []etcd.Cmp
	var ops []etcd.Op
	for k, v := range tx.observed {
		conds = append(conds, etcd.Compare(etcd.ModRevision(k), "=", v))
	}
	for k, v := range tx.buffer {
		var op etcd.Op
		if v == nil {
			op = etcd.OpDelete(string(k))
		} else {
			op = etcd.OpPut(string(k), string(v))
		}
		ops = append(ops, op)
	}
	resp, err := tx.kv.Txn(tx.ctx).If(conds...).Then(ops...).Commit()
	if time.Since(start) > time.Millisecond*10 {
		logger.Debugf("txn with %d conds and %d ops took %s", len(conds), len(ops), time.Since(start))
	}
	if err != nil {
		return err
	}
	if resp.Succeeded {
		return nil
	}
	return conflicted
}

type etcdClient struct {
	client *etcd.Client
	kv     etcd.KV
}

func (c *etcdClient) name() string {
	return "etcd"
}

func (c *etcdClient) shouldRetry(err error) bool {
	return errors.Is(err, conflicted)
}

func (c *etcdClient) config(key string) interface{} {
	return nil
}

func (c *etcdClient) simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	return c.txn(ctx, f, retry)
}

func (c *etcdClient) txn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	tx := &etcdTxn{
		ctx,
		c.kv,
		make(map[string]int64),
		make(map[string][]byte),
	}
	defer func() {
		if r := recover(); r != nil {
			fe, ok := r.(error)
			if ok {
				err = fe
			} else {
				panic(r)
			}
		}
	}()
	err = f(&kvTxn{tx, retry})
	if err != nil {
		return err
	}
	if len(tx.buffer) == 0 {
		return nil // read only
	}
	return tx.commmit()
}

var conflicted = errors.New("conflicted transaction")

func (c *etcdClient) scan(prefix []byte, handler func(key []byte, value []byte) bool) error {
	var start = prefix
	var end = string(nextKey(prefix))
	resp, err := c.client.Get(context.Background(), "anything")
	if err != nil {
		return err
	}
	currentRev := resp.Header.Revision
	var following bool
	for {
		resp, err := c.client.Get(context.Background(),
			string(start),
			etcd.WithRange(end),
			etcd.WithLimit(1024),
			etcd.WithMaxModRev(currentRev),
			etcd.WithSerializable())
		if err != nil {
			return fmt.Errorf("get start %v: %s", string(start), err)
		}
		if following && len(resp.Kvs) > 0 {
			resp.Kvs = resp.Kvs[1:]
		}
		if len(resp.Kvs) == 0 {
			break
		}
		for _, kv := range resp.Kvs {
			if !handler(kv.Key, kv.Value) {
				return nil
			}
		}
		start = resp.Kvs[len(resp.Kvs)-1].Key
		following = true
	}
	return nil
}

func (c *etcdClient) reset(prefix []byte) error {
	_, err := c.kv.Delete(context.Background(), string(prefix), etcd.WithPrefix())
	return err
}

func (c *etcdClient) close() error {
	return c.client.Close()
}

func (c *etcdClient) gc() {}

func buildTlsConfig(u *url.URL) (*tls.Config, error) {
	var tsinfo transport.TLSInfo
	q := u.Query()
	tsinfo.CAFile = q.Get("cacert")
	tsinfo.CertFile = q.Get("cert")
	tsinfo.KeyFile = q.Get("key")
	tsinfo.ServerName = q.Get("server-name")
	tsinfo.InsecureSkipVerify = q.Get("insecure-skip-verify") != ""
	if tsinfo.CAFile != "" || tsinfo.CertFile != "" || tsinfo.KeyFile != "" || tsinfo.ServerName != "" {
		return tsinfo.ClientConfig()
	}
	return nil, nil
}

func newEtcdClient(addr string) (tkvClient, error) {
	if !strings.Contains(addr, "://") {
		addr = "http://" + addr
	}
	u, err := url.Parse(addr)
	if err != nil {
		return nil, fmt.Errorf("parse %s: %s", addr, err)
	}
	passwd, _ := u.User.Password()
	hosts := strings.Split(u.Host, ",")
	for i, h := range hosts {
		h, _, err := net.SplitHostPort(h)
		if err != nil {
			hosts[i] = net.JoinHostPort(h, "2379")
		}
	}
	conf := etcd.Config{
		Endpoints:        hosts,
		Username:         u.User.Username(),
		Password:         passwd,
		AutoSyncInterval: time.Minute,
	}
	conf.TLS, err = buildTlsConfig(u)
	if err != nil {
		return nil, fmt.Errorf("build tls config from %s: %s", u.RawQuery, err)
	}
	c, err := etcd.New(conf)
	if err != nil {
		return nil, err
	}
	maxCompactSlices = 100
	var prefix string = u.Path + "\xFD"
	return withPrefix(&etcdClient{c, etcd.NewKV(c)}, []byte(prefix)), nil
}

func init() {
	Register("etcd", newKVMeta)
	drivers["etcd"] = newEtcdClient
}


================================================
FILE: pkg/meta/tkv_fdb.go
================================================
//go:build fdb
// +build fdb

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"fmt"
	"net/url"

	"github.com/apple/foundationdb/bindings/go/src/fdb"
)

func init() {
	Register("fdb", newKVMeta)
	drivers["fdb"] = newFdbClient
}

type fdbTxn struct {
	fdb.Transaction
}

type fdbClient struct {
	client fdb.Database
}

func newFdbClient(addr string) (tkvClient, error) {
	err := fdb.APIVersion(630)
	if err != nil {
		return nil, fmt.Errorf("set API version: %s", err)
	}
	u, err := url.Parse("fdb://" + addr)
	if err != nil {
		return nil, err
	}
	db, err := fdb.OpenDatabase(u.Path)
	if err != nil {
		return nil, fmt.Errorf("open database: %s", err)
	}
	// TODO: database options
	return withPrefix(&fdbClient{db}, append([]byte(u.Query().Get("prefix")), 0xFD)), nil
}

func (c *fdbClient) name() string {
	return "fdb"
}

func (c *fdbClient) config(key string) interface{} {
	return nil
}

func (c *fdbClient) simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	return c.txn(ctx, f, retry)
}

func (c *fdbClient) txn(ctx context.Context, f func(*kvTxn) error, retry int) error {
	_, err := c.client.Transact(func(t fdb.Transaction) (interface{}, error) {
		e := f(&kvTxn{&fdbTxn{t}, retry})
		return nil, e
	})
	return err
}

func (c *fdbClient) scan(prefix []byte, handler func(key, value []byte) bool) error {
	begin := fdb.Key(prefix)
	end := fdb.Key(nextKey(prefix))
	limit := 102400
	var done bool
	for {
		if _, err := c.client.ReadTransact(func(t fdb.ReadTransaction) (interface{}, error) {
			// TODO:  t.Options().SetPriorityBatch()
			snapshot := t.Snapshot()
			iter := snapshot.GetRange(
				fdb.KeyRange{Begin: begin, End: end},
				fdb.RangeOptions{Limit: limit, Mode: fdb.StreamingModeWantAll},
			).Iterator()
			var r fdb.KeyValue
			var count int
			for iter.Advance() {
				r = iter.MustGet()
				if !handler(r.Key, r.Value) {
					break
				}
				count++
			}
			if count < limit {
				done = true
			} else {
				begin = append(r.Key, 0)
			}
			return nil, nil
		}); err != nil {
			return err
		}
		if done {
			return nil
		}
	}
}

func (c *fdbClient) reset(prefix []byte) error {
	_, err := c.client.Transact(func(t fdb.Transaction) (interface{}, error) {
		t.ClearRange(fdb.KeyRange{
			Begin: fdb.Key(prefix),
			End:   fdb.Key(nextKey(prefix)),
		})
		return nil, nil
	})
	return err
}

func (c *fdbClient) close() error {
	return nil
}

func (c *fdbClient) shouldRetry(err error) bool {
	return false
}

func (c *fdbClient) gc() {}

func (tx *fdbTxn) get(key []byte) []byte {
	return tx.Get(fdb.Key(key)).MustGet()
}

func (tx *fdbTxn) gets(keys ...[]byte) [][]byte {
	fut := make([]fdb.FutureByteSlice, len(keys))
	for i, key := range keys {
		fut[i] = tx.Get(fdb.Key(key))
	}
	ret := make([][]byte, len(keys))
	for i, f := range fut {
		ret[i] = f.MustGet()
	}
	return ret
}

func (tx *fdbTxn) scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool) {
	it := tx.GetRange(fdb.KeyRange{Begin: fdb.Key(begin), End: fdb.Key(end)},
		fdb.RangeOptions{Mode: fdb.StreamingModeWantAll}).Iterator()
	for it.Advance() {
		kv := it.MustGet()
		if !handler(kv.Key, kv.Value) {
			break
		}
	}
}

func (tx *fdbTxn) exist(prefix []byte) bool {
	return tx.GetRange(
		fdb.KeyRange{Begin: fdb.Key(prefix), End: fdb.Key(nextKey(prefix))},
		fdb.RangeOptions{Mode: fdb.StreamingModeWantAll},
	).Iterator().Advance()
}

func (tx *fdbTxn) set(key, value []byte) {
	tx.Set(fdb.Key(key), value)
}

func (tx *fdbTxn) append(key []byte, value []byte) {
	tx.AppendIfFits(fdb.Key(key), fdb.Key(value))
}

func (tx *fdbTxn) incrBy(key []byte, value int64) int64 {
	tx.Add(fdb.Key(key), packCounter(value))
	// TODO: don't return new value if not needed
	return parseCounter(tx.Get(fdb.Key(key)).MustGet())
}

func (tx *fdbTxn) delete(key []byte) {
	tx.Clear(fdb.Key(key))
}


================================================
FILE: pkg/meta/tkv_fdb_test.go
================================================
//go:build fdb
// +build fdb

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//nolint:errcheck
package meta

import (
	"testing"
)

func TestFdbClient(t *testing.T) { //skip mutate
	m, err := newKVMeta("fdb", "/etc/foundationdb/fdb.cluster?prefix=test2", testConfig())
	if err != nil {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestFdb(t *testing.T) { //skip mutate
	c, err := newFdbClient("/etc/foundationdb/fdb.cluster?prefix=test1")
	if err != nil {
		t.Fatal(err)
	}
	testTKV(t, c)
}


================================================
FILE: pkg/meta/tkv_lock.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"strconv"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
)

type lockOwner struct {
	sid   uint64
	owner uint64
}

func marshalFlock(ls map[lockOwner]byte) []byte {
	b := utils.NewBuffer(uint32(len(ls)) * 17)
	for o, l := range ls {
		b.Put64(o.sid)
		b.Put64(o.owner)
		b.Put8(l)
	}
	return b.Bytes()
}

func unmarshalFlock(buf []byte) map[lockOwner]byte {
	b := utils.FromBuffer(buf)
	var ls = make(map[lockOwner]byte)
	for b.HasMore() {
		sid := b.Get64()
		owner := b.Get64()
		ltype := b.Get8()
		ls[lockOwner{sid, owner}] = ltype
	}
	return ls
}

func (m *kvMeta) Flock(ctx Context, inode Ino, owner uint64, ltype uint32, block bool) syscall.Errno {
	ikey := m.flockKey(inode)
	ctx = ctx.WithValue(txMethodKey{}, "Flock"+strconv.Itoa(int(ltype)))
	var err error
	lkey := lockOwner{m.sid, owner}
	for {
		err = m.txn(ctx, func(tx *kvTxn) error {
			v := tx.get(ikey)
			ls := unmarshalFlock(v)
			switch ltype {
			case F_UNLCK:
				delete(ls, lkey)
			case F_RDLCK:
				for o, l := range ls {
					if l == 'W' && o != lkey {
						return syscall.EAGAIN
					}
				}
				ls[lkey] = 'R'
			case F_WRLCK:
				delete(ls, lkey)
				if len(ls) > 0 {
					return syscall.EAGAIN
				}
				ls[lkey] = 'W'
			default:
				return syscall.EINVAL
			}
			if len(ls) == 0 {
				tx.delete(ikey)
			} else {
				tx.set(ikey, marshalFlock(ls))
			}
			return nil
		}, inode)

		if !block || err != syscall.EAGAIN {
			break
		}
		if ltype == F_WRLCK {
			time.Sleep(time.Millisecond * 1)
		} else {
			time.Sleep(time.Millisecond * 10)
		}
		if ctx.Canceled() {
			return syscall.EINTR
		}
	}
	return errno(err)
}

func marshalPlock(ls map[lockOwner][]byte) []byte {
	var size uint32
	for _, l := range ls {
		size += 8 + 8 + 4 + uint32(len(l))
	}
	b := utils.NewBuffer(size)
	for k, records := range ls {
		b.Put64(k.sid)
		b.Put64(k.owner)
		b.Put32(uint32(len(records)))
		b.Put(records)
	}
	return b.Bytes()
}

func unmarshalPlock(buf []byte) map[lockOwner][]byte {
	b := utils.FromBuffer(buf)
	var ls = make(map[lockOwner][]byte)
	for b.HasMore() {
		sid := b.Get64()
		owner := b.Get64()
		records := b.Get(int(b.Get32()))
		ls[lockOwner{sid, owner}] = records
	}
	return ls
}

func (m *kvMeta) Getlk(ctx Context, inode Ino, owner uint64, ltype *uint32, start, end *uint64, pid *uint32) syscall.Errno {
	if *ltype == F_UNLCK {
		*start = 0
		*end = 0
		*pid = 0
		return 0
	}
	v, err := m.get(m.plockKey(inode))
	if err != nil {
		return errno(err)
	}
	owners := unmarshalPlock(v)
	delete(owners, lockOwner{m.sid, owner})
	for o, records := range owners {
		ls := loadLocks(records)
		for _, l := range ls {
			// find conflicted locks
			if (*ltype == F_WRLCK || l.Type == F_WRLCK) && *end >= l.Start && *start <= l.End {
				*ltype = l.Type
				*start = l.Start
				*end = l.End
				if o.sid == m.sid {
					*pid = l.Pid
				} else {
					*pid = 0
				}
				return 0
			}
		}
	}
	*ltype = F_UNLCK
	*start = 0
	*end = 0
	*pid = 0
	return 0
}

func (m *kvMeta) Setlk(ctx Context, inode Ino, owner uint64, block bool, ltype uint32, start, end uint64, pid uint32) syscall.Errno {
	ikey := m.plockKey(inode)
	ctx = ctx.WithValue(txMethodKey{}, "Setlk"+strconv.Itoa(int(ltype)))
	var err error
	lock := plockRecord{ltype, pid, start, end}
	lkey := lockOwner{m.sid, owner}
	for {
		err = m.txn(ctx, func(tx *kvTxn) error {
			owners := unmarshalPlock(tx.get(ikey))
			if ltype == F_UNLCK {
				records := owners[lkey]
				ls := loadLocks(records)
				if len(ls) == 0 {
					return nil // change nothing
				}
				ls = updateLocks(ls, lock)
				if len(ls) == 0 {
					delete(owners, lkey)
				} else {
					owners[lkey] = dumpLocks(ls)
				}
			} else {
				ls := loadLocks(owners[lkey])
				delete(owners, lkey)
				for _, d := range owners {
					ls := loadLocks(d)
					for _, l := range ls {
						// find conflicted locks
						if (ltype == F_WRLCK || l.Type == F_WRLCK) && end >= l.Start && start <= l.End {
							return syscall.EAGAIN
						}
					}
				}
				ls = updateLocks(ls, lock)
				owners[lkey] = dumpLocks(ls)
			}
			if len(owners) == 0 {
				tx.delete(ikey)
			} else {
				tx.set(ikey, marshalPlock(owners))
			}
			return nil
		}, inode)

		if !block || err != syscall.EAGAIN {
			break
		}
		if ltype == F_WRLCK {
			time.Sleep(time.Millisecond * 1)
		} else {
			time.Sleep(time.Millisecond * 10)
		}
		if ctx.Canceled() {
			return syscall.EINTR
		}
	}
	return errno(err)
}

func (m *kvMeta) ListLocks(ctx context.Context, inode Ino) ([]PLockItem, []FLockItem, error) {
	fKey := m.flockKey(inode)
	pKey := m.plockKey(inode)

	var flocks []FLockItem
	var plocks []PLockItem
	fv, err := m.get(fKey)
	if err != nil {
		return nil, nil, err
	}
	fs := unmarshalFlock(fv)
	for k, t := range fs {
		flocks = append(flocks, FLockItem{ownerKey{k.sid, k.owner}, string(t)})
	}

	pv, err := m.get(pKey)
	if err != nil {
		return nil, nil, err
	}
	owners := unmarshalPlock(pv)
	for k, records := range owners {
		ls := loadLocks(records)
		for _, l := range ls {
			plocks = append(plocks, PLockItem{ownerKey{k.sid, k.owner}, l})
		}
	}
	return plocks, flocks, nil
}


================================================
FILE: pkg/meta/tkv_mem.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"os"
	"strings"
	"sync"

	"github.com/google/btree"
)

func init() {
	Register("memkv", newKVMeta)
	drivers["memkv"] = newMockClient
}

const settingPath = "/tmp/juicefs.memkv.setting.json"

func newMockClient(addr string) (tkvClient, error) {
	client := &memKV{items: btree.New(2), temp: &kvItem{}}
	if d, err := os.ReadFile(settingPath); err == nil {
		var buffer map[string][]byte
		if err = json.Unmarshal(d, &buffer); err == nil {
			for k, v := range buffer {
				client.set(k, v) // not locked
			}
		}
	}
	return client, nil
}

type memTxn struct {
	store    *memKV
	observed map[string]int
	buffer   map[string][]byte
}

func (tx *memTxn) get(key []byte) []byte {
	k := string(key)
	if v, ok := tx.buffer[k]; ok {
		return v
	}
	tx.store.Lock()
	defer tx.store.Unlock()
	it := tx.store.get(k)
	if it != nil {
		tx.observed[k] = it.ver
		return it.value
	} else {
		tx.observed[k] = 0
		return nil
	}
}

func (tx *memTxn) gets(keys ...[]byte) [][]byte {
	values := make([][]byte, len(keys))
	for i, key := range keys {
		values[i] = tx.get(key)
	}
	return values
}

func (tx *memTxn) scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool) {
	tx.store.Lock()
	defer tx.store.Unlock()
	tx.store.items.AscendGreaterOrEqual(&kvItem{key: string(begin)}, func(i btree.Item) bool {
		it := i.(*kvItem)
		key := []byte(it.key)
		if bytes.Compare(key, end) >= 0 {
			return false
		}
		tx.observed[it.key] = it.ver
		return handler(key, it.value)
	})
}

func nextKey(key []byte) []byte {
	if len(key) == 0 {
		return nil
	}
	next := make([]byte, len(key))
	copy(next, key)
	p := len(next) - 1
	for {
		next[p]++
		if next[p] != 0 {
			break
		}
		p--
		if p < 0 {
			panic("can't scan keys for 0xFF")
		}
	}
	return next
}

func (tx *memTxn) exist(prefix []byte) bool {
	var ret bool
	tx.store.Lock()
	defer tx.store.Unlock()
	tx.store.items.AscendGreaterOrEqual(&kvItem{key: string(prefix)}, func(i btree.Item) bool {
		it := i.(*kvItem)
		if strings.HasPrefix(it.key, string(prefix)) {
			tx.observed[it.key] = it.ver
			ret = true
		}
		return false
	})
	return ret
}

func (tx *memTxn) set(key, value []byte) {
	tx.buffer[string(key)] = value
}

func (tx *memTxn) append(key []byte, value []byte) {
	new := append(tx.get(key), value...)
	tx.set(key, new)
}

func (tx *memTxn) incrBy(key []byte, value int64) int64 {
	buf := tx.get(key)
	new := parseCounter(buf)
	if value != 0 {
		new += value
		tx.set(key, packCounter(new))
	}
	return new
}

func (tx *memTxn) delete(key []byte) {
	tx.buffer[string(key)] = nil
}

type kvItem struct {
	key   string
	ver   int
	value []byte
}

func (it *kvItem) Less(o btree.Item) bool {
	return it.key < o.(*kvItem).key
}

type memKV struct {
	sync.Mutex
	items *btree.BTree
	temp  *kvItem
}

func (c *memKV) name() string {
	return "memkv"
}

func (c *memKV) shouldRetry(err error) bool {
	return strings.Contains(err.Error(), "write conflict")
}

func (c *memKV) config(key string) interface{} {
	return nil
}

func (c *memKV) get(key string) *kvItem {
	c.temp.key = key
	it := c.items.Get(c.temp)
	if it != nil {
		return it.(*kvItem)
	}
	return nil
}

func (c *memKV) set(key string, value []byte) {
	c.temp.key = key
	if value == nil {
		c.items.Delete(c.temp)
		return
	}
	it := c.items.Get(c.temp)
	if it != nil {
		it.(*kvItem).ver++
		it.(*kvItem).value = value
	} else {
		c.items.ReplaceOrInsert(&kvItem{key: key, ver: 1, value: value})
	}
}

func (c *memKV) simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	return c.txn(ctx, f, retry)
}

func (c *memKV) txn(ctx context.Context, f func(*kvTxn) error, retry int) error {
	tx := &memTxn{
		store:    c,
		observed: make(map[string]int),
		buffer:   make(map[string][]byte),
	}
	if err := f(&kvTxn{tx, retry}); err != nil {
		return err
	}

	if len(tx.buffer) == 0 {
		return nil
	}
	c.Lock()
	defer c.Unlock()
	for k, ver := range tx.observed {
		it := c.get(k)
		if it == nil && ver != 0 {
			return fmt.Errorf("write conflict: %s was version %d, now deleted", k, ver)
		} else if it != nil && it.ver > ver {
			return fmt.Errorf("write conflict: %s %d > %d", k, it.ver, ver)
		}
	}
	if _, ok := tx.buffer["setting"]; ok {
		d, _ := json.Marshal(tx.buffer)
		if err := os.WriteFile(settingPath, d, 0644); err != nil {
			return err
		}
	}
	for k, value := range tx.buffer {
		c.set(k, value)
	}
	return nil
}

func (c *memKV) scan(prefix []byte, handler func(key []byte, value []byte) bool) error {
	c.Lock()
	snap := c.items.Clone()
	c.Unlock()
	begin := string(prefix)
	end := string(nextKey(prefix))
	snap.AscendGreaterOrEqual(&kvItem{key: begin}, func(i btree.Item) bool {
		it := i.(*kvItem)
		if end != "" && it.key >= end {
			return false
		}
		return handler([]byte(it.key), it.value)
	})
	return nil
}

func (c *memKV) reset(prefix []byte) error {
	if len(prefix) == 0 {
		c.Lock()
		c.items = btree.New(2)
		c.temp = &kvItem{}
		c.Unlock()
		return nil
	}
	return c.txn(Background(), func(kt *kvTxn) error {
		return c.scan(prefix, func(key, value []byte) bool {
			kt.delete(key)
			return true
		})
	}, 0)
}

func (c *memKV) close() error {
	return nil
}

func (c *memKV) gc() {}


================================================
FILE: pkg/meta/tkv_prefix.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"fmt"
)

type prefixTxn struct {
	*kvTxn
	prefix []byte
}

func (tx *prefixTxn) realKey(key []byte) []byte {
	k := make([]byte, len(tx.prefix)+len(key))
	copy(k, tx.prefix)
	copy(k[len(tx.prefix):], key)
	return k
}

func (tx *prefixTxn) origKey(key []byte) []byte {
	return key[len(tx.prefix):]
}

func (tx *prefixTxn) get(key []byte) []byte {
	return tx.kvTxn.get(tx.realKey(key))
}

func (tx *prefixTxn) gets(keys ...[]byte) [][]byte {
	realKeys := make([][]byte, len(keys))
	for i, key := range keys {
		realKeys[i] = tx.realKey(key)
	}
	return tx.kvTxn.gets(realKeys...)
}

func (tx *prefixTxn) scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool) {
	tx.kvTxn.scan(tx.realKey(begin), tx.realKey(end), keysOnly, func(k, v []byte) bool {
		return handler(tx.origKey(k), v)
	})
}

func (tx *prefixTxn) exist(prefix []byte) bool {
	return tx.kvTxn.exist(tx.realKey(prefix))
}

func (tx *prefixTxn) set(key, value []byte) {
	tx.kvTxn.set(tx.realKey(key), value)
}

func (tx *prefixTxn) append(key []byte, value []byte) {
	tx.kvTxn.append(tx.realKey(key), value)
}

func (tx *prefixTxn) incrBy(key []byte, value int64) int64 {
	return tx.kvTxn.incrBy(tx.realKey(key), value)
}

func (tx *prefixTxn) delete(key []byte) {
	tx.kvTxn.delete(tx.realKey(key))
}

type prefixClient struct {
	tkvClient
	prefix []byte
}

func (c *prefixClient) simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	return c.tkvClient.simpleTxn(ctx, func(tx *kvTxn) error {
		return f(&kvTxn{&prefixTxn{tx, c.prefix}, retry})
	}, retry)
}

func (c *prefixClient) txn(ctx context.Context, f func(*kvTxn) error, retry int) error {
	return c.tkvClient.txn(ctx, func(tx *kvTxn) error {
		return f(&kvTxn{&prefixTxn{tx, c.prefix}, retry})
	}, retry)
}

func (c *prefixClient) scan(prefix []byte, handler func(key, value []byte) bool) error {
	k := make([]byte, len(c.prefix)+len(prefix))
	copy(k, c.prefix)
	copy(k[len(c.prefix):], prefix)
	return c.tkvClient.scan(k, func(key, value []byte) bool {
		return handler(key[len(c.prefix):], value)
	})
}

func (c *prefixClient) reset(prefix []byte) error {
	if prefix != nil {
		return fmt.Errorf("prefix must be nil, but got %v", prefix)
	}
	return c.tkvClient.reset(c.prefix)
}

func withPrefix(client tkvClient, prefix []byte) tkvClient {
	return &prefixClient{client, prefix}
}


================================================
FILE: pkg/meta/tkv_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//mutate:disable
//nolint:errcheck
package meta

import (
	"bytes"
	"fmt"
	"os"
	"sort"
	"testing"

	"github.com/dgraph-io/badger/v4"
)

func TestMemKVClient(t *testing.T) {
	_ = os.Remove(settingPath)
	m, err := newKVMeta("memkv", "jfs-unit-test", testConfig())
	if err != nil || m.Name() != "memkv" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestTiKVClient(t *testing.T) { //skip mutate
	m, err := newKVMeta("tikv", "127.0.0.1:2379/jfs-unit-test", testConfig())
	if err != nil || m.Name() != "tikv" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestBadgerClient(t *testing.T) {
	m, err := newKVMeta("badger", "badger", testConfig())
	if err != nil || m.Name() != "badger" {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func TestEtcdClient(t *testing.T) { //skip mutate
	if os.Getenv("SKIP_NON_CORE") == "true" {
		t.Skipf("skip non-core test")
	}
	m, err := newKVMeta("etcd", os.Getenv("ETCD_ADDR"), testConfig())
	if err != nil {
		t.Fatalf("create meta: %s", err)
	}
	testMeta(t, m)
}

func testTKV(t *testing.T, c tkvClient) {
	txn := func(f func(kt *kvTxn)) {
		if err := c.txn(Background(), func(kt *kvTxn) error {
			f(kt)
			return nil
		}, 0); err != nil {
			t.Fatal(err)
		}
	}
	// basic
	err := c.reset(nil)
	if err != nil {
		t.Fatalf("reset: %s", err)
	}
	var hasKey bool
	txn(func(kt *kvTxn) { hasKey = kt.exist(nil) })
	if hasKey {
		t.Fatalf("has key after reset")
	}
	k := []byte("k")
	v := []byte("value")

	txn(func(kt *kvTxn) {
		kt.set(k, v)
		kt.append(k, v)
	})
	var r []byte
	txn(func(kt *kvTxn) { r = kt.get(k) })
	if !bytes.Equal(r, []byte("valuevalue")) {
		t.Fatalf("expect 'valuevalue', but got %v", string(r))
	}
	txn(func(kt *kvTxn) {
		kt.set([]byte("k2"), v)
		kt.set([]byte("v"), k)
	})
	var ks [][]byte
	txn(func(kt *kvTxn) { ks = kt.gets([]byte("k1"), []byte("k2")) })
	if ks[0] != nil || string(ks[1]) != "value" {
		t.Fatalf("gets k1,k2: %+v != %+v", ks, [][]byte{nil, []byte("value")})
	}

	var keys [][]byte
	c.scan([]byte("k"), func(key, value []byte) bool {
		keys = append(keys, key)
		return true
	})
	if len(keys) != 2 || string(keys[0]) != "k" || string(keys[1]) != "k2" {
		t.Fatalf("keys: %+v", keys)
	}
	keys = keys[:0]
	txn(func(kt *kvTxn) {
		kt.scan([]byte("a"), []byte("z"), true, func(k, v []byte) bool {
			if len(k) == 1 {
				keys = append(keys, k)
			}
			return true
		})
	})
	if len(keys) != 2 || string(keys[0]) != "k" || string(keys[1]) != "v" {
		t.Fatalf("keys: %+v", keys)
	}
	keys = keys[:0]
	txn(func(kt *kvTxn) {
		kt.scan([]byte("k"), []byte("l"), true, func(k, v []byte) bool {
			keys = append(keys, k)
			return true
		})
	})
	if len(keys) != 2 || string(keys[0]) != "k" || string(keys[1]) != "k2" {
		t.Fatalf("keys: %+v", keys)
	}
	keys = keys[:0]
	txn(func(kt *kvTxn) {
		kt.scan([]byte("a"), []byte("z"), true, func(k, v []byte) bool {
			keys = append(keys, k)
			return true
		})
	})
	if len(keys) != 3 || string(keys[0]) != "k" || string(keys[1]) != "k2" || string(keys[2]) != "v" {
		t.Fatalf("keys: %+v", keys)
	}
	values := make(map[string][]byte)
	txn(func(kt *kvTxn) {
		kt.scan([]byte("k"), nextKey([]byte("k")), false, func(k, v []byte) bool {
			if len(v) == 5 {
				values[string(k)] = v
			}
			return true
		})
	})
	if len(values) != 1 || string(values["k2"]) != "value" {
		t.Fatalf("scan values: %+v", values)
	}
	values = make(map[string][]byte)
	txn(func(kt *kvTxn) {
		kt.scan([]byte("k2"), []byte("v"),
			false, func(k, v []byte) bool {
				values[string(k)] = v
				return true
			})
	})
	if len(values) != 1 || string(values["k2"]) != "value" {
		t.Fatalf("scanRange: %+v", values)
	}

	// exists
	txn(func(kt *kvTxn) { hasKey = kt.exist([]byte("k")) })
	if !hasKey {
		t.Fatalf("has key k*")
	}
	txn(func(kt *kvTxn) {
		for _, key := range keys {
			kt.delete(key)
		}
	})
	txn(func(kt *kvTxn) { r = kt.get(k) })
	if r != nil {
		t.Fatalf("expect nil, but got %v", string(r))
	}
	keys = keys[:0]
	txn(func(kt *kvTxn) {
		kt.scan([]byte("a"), []byte("z"), true, func(k, v []byte) bool {
			keys = append(keys, k)
			return true
		})
	})
	if len(keys) != 0 {
		t.Fatalf("no keys: %+v", keys)
	}
	txn(func(kt *kvTxn) { hasKey = kt.exist(nil) })
	if hasKey {
		t.Fatalf("has not keys")
	}

	// counters
	var count int64
	c.txn(Background(), func(tx *kvTxn) error {
		count = tx.incrBy([]byte("counter"), -1)
		return nil
	}, 0)
	if count != -1 {
		t.Fatalf("counter should be -1, but got %d", count)
	}
	c.txn(Background(), func(tx *kvTxn) error {
		count = tx.incrBy([]byte("counter"), 0)
		return nil
	}, 0)
	if count != -1 {
		t.Fatalf("counter should be -1, but got %d", count)
	}
	c.txn(Background(), func(tx *kvTxn) error {
		count = tx.incrBy([]byte("counter"), 2)
		return nil
	}, 0)
	if count != 1 {
		t.Fatalf("counter should be 1, but got %d", count)
	}

	// key with zeros
	k = []byte("k\x001")
	txn(func(kt *kvTxn) {
		kt.set(k, v)
	})
	var v2 []byte
	txn(func(kt *kvTxn) {
		v2 = kt.get(k)
	})
	if !bytes.Equal(v2, v) {
		t.Fatalf("expect %v but got %v", v, v2)
	}

	// scan many key-value pairs
	keys = make([][]byte, 0, 100000)
	for i := 0; i < 1000; i++ {
		txn(func(kt *kvTxn) {
			for j := 0; j < 100; j++ {
				k := []byte(fmt.Sprintf("Key_%d_%d", i, j))
				v := []byte(fmt.Sprintf("Value_%d_%d", i, j))
				kt.set(k, v)
				keys = append(keys, k)
			}
		})
	}
	kvs := make([][]byte, 0, 200000)
	txn(func(kt *kvTxn) {
		kt.scan([]byte("A"), []byte("Z"), false, func(k, v []byte) bool {
			kvs = append(kvs, k, v)
			return true
		})
	})
	sort.Slice(keys, func(i, j int) bool { return bytes.Compare(keys[i], keys[j]) < 0 })
	for i, k := range keys {
		if !bytes.Equal(k, kvs[i*2]) || !bytes.Equal([]byte(fmt.Sprintf("Value%s", k[3:])), kvs[i*2+1]) {
			t.Fatalf("expect %s but got %s, %s", k, keys[i*2], keys[i*2+1])
		}
	}
}

func TestBadgerKV(t *testing.T) {
	c, err := newBadgerClient("test_badger")
	if err != nil {
		t.Fatal(err)
	}
	testTKV(t, c)
}

func TestEtcd(t *testing.T) { //skip mutate
	if os.Getenv("SKIP_NON_CORE") == "true" {
		t.Skipf("skip non-core test")
	}
	c, err := newEtcdClient(fmt.Sprintf("%s/jfs", os.Getenv("ETCD_ADDR")))
	if err != nil {
		t.Fatal(err)
	}
	testTKV(t, c)
}

func TestMemKV(t *testing.T) {
	c, _ := newTkvClient("memkv", "")
	c = withPrefix(c, []byte("jfs"))
	testTKV(t, c)
}

func TestBadgerScanKeysOnlyNilValues(t *testing.T) {
	c, err := newBadgerClient(t.TempDir())
	if err != nil {
		t.Fatal(err)
	}
	defer c.close()

	if err := c.txn(Background(), func(kt *kvTxn) error {
		kt.set([]byte("key1"), []byte("value1"))
		kt.set([]byte("key2"), []byte("value2"))
		return nil
	}, 0); err != nil {
		t.Fatal(err)
	}

	var scanned int
	if err := c.txn(Background(), func(kt *kvTxn) error {
		kt.scan([]byte("key"), nextKey([]byte("key")), true, func(k, v []byte) bool {
			if v != nil {
				t.Errorf("keysOnly=true: expected nil value for key %q, got %q", k, v)
			}
			scanned++
			return true
		})
		return nil
	}, 0); err != nil {
		t.Fatal(err)
	}
	if scanned != 2 {
		t.Fatalf("expected 2 keys scanned, got %d", scanned)
	}
}

func TestBadgerDeleteTxnTooBig(t *testing.T) {
	dir := t.TempDir()

	opt := badger.DefaultOptions(dir)
	opt.Logger = nil
	opt.MetricsEnabled = false
	opt.MemTableSize = 1 << 20
	opt.ValueThreshold = 1 << 10
	db, err := badger.Open(opt)
	if err != nil {
		t.Fatal(err)
	}
	defer db.Close()

	const numKeys = 5000
	wb := db.NewWriteBatch()
	for i := 0; i < numKeys; i++ {
		if err := wb.Set([]byte(fmt.Sprintf("txbig_%05d", i)), []byte("v")); err != nil {
			t.Fatal(err)
		}
	}
	if err := wb.Flush(); err != nil {
		t.Fatal(err)
	}

	var keys [][]byte
	rtx := db.NewTransaction(false)
	it := rtx.NewIterator(badger.IteratorOptions{Prefix: []byte("txbig_"), PrefetchValues: false})
	for it.Rewind(); it.Valid(); it.Next() {
		keys = append(keys, it.Item().KeyCopy(nil))
	}
	it.Close()
	rtx.Discard()

	client := &badgerClient{client: db, done: make(chan struct{})}

	err = client.txn(Background(), func(kt *kvTxn) error {
		for _, key := range keys {
			kt.delete(key)
		}
		return nil
	}, 0)

	if err != badger.ErrTxnTooBig {
		t.Fatalf("expected ErrTxnTooBig, got %v", err)
	}
}


================================================
FILE: pkg/meta/tkv_tikv.go
================================================
//go:build !notikv
// +build !notikv

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"math"
	"net/url"
	"os"
	"strings"
	"syscall"
	"time"

	plog "github.com/pingcap/log"
	"github.com/pkg/errors"
	"github.com/sirupsen/logrus"
	"github.com/tikv/client-go/v2/config"
	tikverr "github.com/tikv/client-go/v2/error"
	"github.com/tikv/client-go/v2/oracle"
	"github.com/tikv/client-go/v2/tikv"
	"github.com/tikv/client-go/v2/txnkv"
	"github.com/tikv/client-go/v2/txnkv/txnutil"
	pd "github.com/tikv/pd/client"
	"go.uber.org/zap"
)

func init() {
	Register("tikv", newKVMeta)
	drivers["tikv"] = newTikvClient

}

func newTikvClient(addr string) (tkvClient, error) {
	var plvl string // TiKV (PingCap) uses uber-zap logging, make it less verbose
	switch logger.Level {
	case logrus.TraceLevel:
		plvl = "debug"
	case logrus.DebugLevel:
		plvl = "info"
	case logrus.InfoLevel, logrus.WarnLevel:
		plvl = "warn"
	case logrus.ErrorLevel:
		plvl = "error"
	default:
		plvl = "dpanic"
	}
	l, prop, _ := plog.InitLogger(&plog.Config{Level: plvl}, zap.Fields(zap.String("component", "tikv"), zap.Int("pid", os.Getpid())))
	plog.ReplaceGlobals(l, prop)

	tUrl, err := url.Parse("tikv://" + addr)
	if err != nil {
		return nil, err
	}
	query := tUrl.Query()
	config.UpdateGlobal(func(conf *config.Config) {
		conf.Security = config.NewSecurity(
			query.Get("ca"),
			query.Get("cert"),
			query.Get("key"),
			strings.Split(query.Get("verify-cn"), ","))
	})
	interval := time.Hour * 3
	if dur, err := time.ParseDuration(query.Get("gc-interval")); err == nil {
		if dur != 0 && dur < time.Hour {
			logger.Warnf("TiKV gc-interval (%s) is too short, and is reset to 1h", dur)
			dur = time.Hour
		}
		interval = dur
	}
	logger.Infof("TiKV gc interval is set to %s", interval)

	client, err := txnkv.NewClient(strings.Split(tUrl.Host, ","))
	if err != nil {
		return nil, err
	}

	if strings.ToLower(query.Get("open-tso-follower-proxy")) == "true" {
		if err := client.KVStore.GetPDClient().UpdateOption(pd.EnableTSOFollowerProxy, true); err != nil {
			logger.Warnf("Failed to enable TSO Follower Proxy: %v", err)
		} else {
			logger.Infof("Enabling TSO Follower Proxy")
		}
	}

	if waitStr := query.Get("max-tso-batch-wait-interval"); waitStr != "" {
		if waitDur, err := time.ParseDuration(waitStr); err == nil {
			if err := client.KVStore.GetPDClient().UpdateOption(pd.MaxTSOBatchWaitInterval, waitDur); err != nil {
				logger.Warnf("Failed to set MaxTSOBatchWaitInterval: %v", err)
			} else {
				logger.Infof("Set MaxTSOBatchWaitInterval to %s", waitDur)
			}
		} else {
			logger.Warnf("Failed to parse max-tso-batch-wait-interval (%s): %v", waitStr, err)
		}
	}

	prefix := strings.TrimLeft(tUrl.Path, "/")
	return withPrefix(&tikvClient{client.KVStore, interval}, append([]byte(prefix), 0xFD)), nil
}

type tikvTxn struct {
	*tikv.KVTxn
}

func (tx *tikvTxn) get(key []byte) []byte {
	value, err := tx.Get(context.TODO(), key)
	if tikverr.IsErrNotFound(err) {
		return nil
	}
	if err != nil {
		panic(err)
	}
	return value
}

func (tx *tikvTxn) gets(keys ...[]byte) [][]byte {
	ret, err := tx.BatchGet(context.TODO(), keys)
	if err != nil {
		panic(err)
	}
	values := make([][]byte, len(keys))
	for i, key := range keys {
		values[i] = ret[string(key)]
	}
	return values
}

func (tx *tikvTxn) scan(begin, end []byte, keysOnly bool, handler func(k, v []byte) bool) {
	snap := tx.GetSnapshot()
	snap.SetScanBatchSize(10240)
	snap.SetNotFillCache(true)
	snap.SetKeyOnly(keysOnly)
	it, err := tx.Iter(begin, end)
	if err != nil {
		panic(err)
	}
	defer it.Close()
	for it.Valid() && handler(it.Key(), it.Value()) {
		if err = it.Next(); err != nil {
			panic(err)
		}
	}
}

func (tx *tikvTxn) exist(prefix []byte) bool {
	it, err := tx.Iter(prefix, nextKey(prefix))
	if err != nil {
		panic(err)
	}
	defer it.Close()
	return it.Valid()
}

func (tx *tikvTxn) set(key, value []byte) {
	if err := tx.Set(key, value); err != nil {
		panic(err)
	}
}

func (tx *tikvTxn) append(key []byte, value []byte) {
	new := append(tx.get(key), value...)
	tx.set(key, new)
}

func (tx *tikvTxn) incrBy(key []byte, value int64) int64 {
	buf := tx.get(key)
	new := parseCounter(buf)
	if value != 0 {
		new += value
		tx.set(key, packCounter(new))
	}
	return new
}

func (tx *tikvTxn) delete(key []byte) {
	if err := tx.Delete(key); err != nil {
		panic(err)
	}
}

type tikvClient struct {
	client     *tikv.KVStore
	gcInterval time.Duration
}

func (c *tikvClient) name() string {
	return "tikv"
}

func (c *tikvClient) shouldRetry(err error) bool {
	return strings.Contains(err.Error(), "write conflict") || strings.Contains(err.Error(), "TxnLockNotFound")
}

func (c *tikvClient) config(key string) interface{} {
	if key == "startTS" {
		ts, err := c.client.CurrentTimestamp(oracle.GlobalTxnScope)
		if err != nil {
			logger.Warnf("TiKV get startTS: %s", err)
			return nil
		}
		return ts
	}
	return nil
}

func (c *tikvClient) simpleTxn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	tx, err := c.client.Begin(tikv.WithStartTS(math.MaxUint64)) // math.MaxUint64 means to point get the latest committed data without PD access
	if err != nil {
		return errors.Wrap(err, "failed to begin transaction")
	}
	defer func() {
		if r := recover(); r != nil {
			if e, ok := r.(error); ok {
				err = e
			} else {
				err = errors.Errorf("panic in point get transaction: %v", r)
			}
		}
	}()
	if err = f(&kvTxn{&tikvTxn{tx}, retry}); err != nil {
		return err
	}
	if !tx.IsReadOnly() {
		return syscall.EINVAL
	}
	return nil
}

func (c *tikvClient) txn(ctx context.Context, f func(*kvTxn) error, retry int) (err error) {
	var opts []tikv.TxnOption
	if val := ctx.Value(txSessionKey{}); val != nil {
		opts = append(opts, tikv.WithStartTS(val.(uint64)))
	}

	tx, err := c.client.Begin(opts...)
	if err != nil {
		return err
	}
	defer func() {
		if r := recover(); r != nil {
			fe, ok := r.(error)
			if ok {
				err = fe
			} else {
				err = errors.Errorf("tikv client txn func error: %v", r)
			}
		}
	}()
	if err = f(&kvTxn{&tikvTxn{tx}, retry}); err != nil {
		return err
	}
	if !tx.IsReadOnly() {
		tx.SetEnable1PC(true)
		tx.SetEnableAsyncCommit(true)
		err = tx.Commit(ctx)
	}
	return err
}

func (c *tikvClient) scan(prefix []byte, handler func(key, value []byte) bool) error {
	end := nextKey(prefix)
	start := prefix
OUT:
	for {
		ts, err := c.client.CurrentTimestamp(oracle.GlobalTxnScope)
		if err != nil {
			return err
		}
		snap := c.client.GetSnapshot(ts)
		snap.SetScanBatchSize(10240)
		snap.SetNotFillCache(true)
		snap.SetPriority(txnutil.PriorityLow)
		it, err := snap.Iter(start, end)
		if err != nil {
			return err
		}
		var lastKey []byte
		for it.Valid() && handler(it.Key(), it.Value()) {
			lastKey = it.Key()
			if err = it.Next(); err != nil {
				it.Close()
				if _, ok := err.(*tikverr.ErrGCTooEarly); !ok {
					logger.Warnf("scan next key: %s", err)
					return err
				} else { // restart scan
					start = nextKey(lastKey)
					continue OUT
				}
			}
		}
		it.Close()
		return nil
	}
}

func (c *tikvClient) reset(prefix []byte) error {
	_, err := c.client.DeleteRange(context.Background(), prefix, nextKey(prefix), 1)
	return err
}

func (c *tikvClient) close() error {
	return c.client.Close()
}

func (c *tikvClient) gc() {
	if c.gcInterval == 0 {
		return
	}

	currentTs, err := c.client.CurrentTimestamp(oracle.GlobalTxnScope)
	if err != nil {
		logger.Warnf("TiKV GC was skipped due to failure in obtaining the current timestamp.")
		return
	}

	safePoint, err := c.client.GC(context.Background(), oracle.GoTimeToTS(oracle.GetTimeFromTS(currentTs).Add(-c.gcInterval)))
	if err == nil {
		logger.Debugf("TiKV GC returns new safe point: %d (%s)", safePoint, oracle.GetTimeFromTS(safePoint))
	} else {
		logger.Warnf("TiKV GC: %s", err)
	}
}


================================================
FILE: pkg/meta/utils.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"errors"
	"fmt"
	"net/url"
	"path"
	"runtime"
	"runtime/debug"
	"sort"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/redis/go-redis/v9"
)

const (
	aclCounter      = "aclMaxId"
	usedSpace       = "usedSpace"
	totalInodes     = "totalInodes"
	legacySessions  = "sessions"
	krbTokenCounter = "krbTokenMaxId"
)

var counterNames = []string{usedSpace, totalInodes, "nextInode", "nextChunk", "nextSession", "nextTrash"}

const (
	// fallocate
	fallocKeepSize  = 0x01
	fallocPunchHole = 0x02
	// RESERVED: fallocNoHideStale   = 0x04
	fallocCollapesRange = 0x08
	fallocZeroRange     = 0x10
	fallocInsertRange   = 0x20
)
const (
	// clone mode
	CLONE_MODE_CAN_OVERWRITE      = 0x01
	CLONE_MODE_PRESERVE_ATTR      = 0x02
	CLONE_MODE_PRESERVE_HARDLINKS = 0x08

	// clone concurrency
	CLONE_DEFAULT_CONCURRENCY = 4

	// atime mode
	NoAtime     = "noatime"
	RelAtime    = "relatime"
	StrictAtime = "strictatime"
)

const (
	MODE_MASK_R = 0b100
	MODE_MASK_W = 0b010
	MODE_MASK_X = 0b001
)

type msgCallbacks struct {
	sync.Mutex
	callbacks map[uint32]MsgCallback
}

type freeID struct {
	next  uint64
	maxid uint64
}

var logger = utils.GetLogger("juicefs")

type queryMap struct {
	*url.Values
}

func (qm *queryMap) duration(key, originalKey string, d time.Duration) time.Duration {
	val := qm.Get(key)
	if val == "" {
		oVal := qm.Get(originalKey)
		if oVal == "" {
			return d
		}
		val = oVal
	}

	qm.Del(key)
	if dur, err := time.ParseDuration(val); err == nil {
		return dur
	} else {
		logger.Warnf("Parse duration %s for key %s: %s", val, key, err)
		return d
	}
}

func (qm *queryMap) getInt(key, originalKey string, defaultValue int) int {
	val := qm.Get(key)
	if val == "" {
		oVal := qm.Get(originalKey)
		if oVal == "" {
			return defaultValue
		}
		val = oVal
	}

	qm.Del(key)
	if i, err := strconv.ParseInt(val, 10, 32); err == nil {
		return int(i)
	} else {
		logger.Warnf("Parse int %s for key %s: %s", val, key, err)
		return defaultValue
	}
}

func (qm *queryMap) pop(key string) string {
	defer qm.Del(key)
	return qm.Get(key)
}

func errno(err error) syscall.Errno {
	if err == nil {
		return 0
	}
	if err == context.Canceled {
		return syscall.EINTR
	}
	if errors.Is(err, context.DeadlineExceeded) {
		return syscall.ETIMEDOUT
	}
	if eno, ok := err.(syscall.Errno); ok {
		return eno
	}
	if err == redis.Nil {
		return syscall.ENOENT
	}
	if strings.HasPrefix(err.Error(), "OOM") {
		return syscall.ENOSPC
	}
	logger.Errorf("error: %s\n%s", err, debug.Stack())
	return syscall.EIO
}

func accessMode(attr *Attr, uid uint32, gids []uint32) uint8 {
	if uid == 0 {
		return 0x7
	}
	mode := attr.Mode
	if uid == attr.Uid {
		return uint8(mode>>6) & 7
	}
	for _, gid := range gids {
		if gid == attr.Gid {
			return uint8(mode>>3) & 7
		}
	}
	return uint8(mode & 7)
}

func align4K(length uint64) int64 {
	if length == 0 {
		return 1 << 12
	}
	return int64((((length - 1) >> 12) + 1) << 12)
}

type plockRecord struct {
	Type  uint32
	Pid   uint32
	Start uint64
	End   uint64
}

type ownerKey struct {
	Sid   uint64
	Owner uint64
}

type PLockItem struct {
	ownerKey
	plockRecord
}

type FLockItem struct {
	ownerKey
	Type string
}

func parseOwnerKey(key string) (*ownerKey, error) {
	pair := strings.Split(key, "_")
	if len(pair) != 2 {
		return nil, fmt.Errorf("invalid owner key: %s", key)
	}
	sid, err := strconv.ParseUint(pair[0], 10, 64)
	if err != nil {
		return nil, err
	}
	owner, err := strconv.ParseUint(pair[1], 16, 64)
	if err != nil {
		return nil, err
	}
	return &ownerKey{sid, owner}, nil
}

func loadLocks(d []byte) []plockRecord {
	var ls []plockRecord
	rb := utils.FromBuffer(d)
	for rb.HasMore() {
		ls = append(ls, plockRecord{rb.Get32(), rb.Get32(), rb.Get64(), rb.Get64()})
	}
	return ls
}

func dumpLocks(ls []plockRecord) []byte {
	wb := utils.NewBuffer(uint32(len(ls)) * 24)
	for _, l := range ls {
		wb.Put32(l.Type)
		wb.Put32(l.Pid)
		wb.Put64(l.Start)
		wb.Put64(l.End)
	}
	return wb.Bytes()
}

func updateLocks(ls []plockRecord, nl plockRecord) []plockRecord {
	// ls is ordered by l.start without overlap
	size := len(ls)
	for i := 0; i < size && nl.Start <= nl.End; i++ {
		l := ls[i]
		if nl.Start < l.Start && nl.End >= l.Start {
			// split nl
			ls = append(ls, nl)
			ls[len(ls)-1].End = l.Start - 1
			nl.Start = l.Start
		}
		if nl.Start > l.Start && nl.Start <= l.End {
			// split l
			l.End = nl.Start - 1
			ls = append(ls, l)
			ls[i].Start = nl.Start
			l = ls[i]
		}
		if nl.Start == l.Start {
			ls[i].Type = nl.Type // update l
			ls[i].Pid = nl.Pid
			if l.End > nl.End {
				// split l
				ls[i].End = nl.End
				l.Start = nl.End + 1
				ls = append(ls, l)
			}
			nl.Start = ls[i].End + 1
		}
	}
	if nl.Start <= nl.End {
		ls = append(ls, nl)
	}
	sort.Slice(ls, func(i, j int) bool { return ls[i].Start < ls[j].Start })
	for i := 0; i < len(ls); {
		if ls[i].Type == F_UNLCK || ls[i].Start > ls[i].End {
			// remove empty one
			copy(ls[i:], ls[i+1:])
			ls = ls[:len(ls)-1]
		} else {
			if i+1 < len(ls) && ls[i].Type == ls[i+1].Type && ls[i].Pid == ls[i+1].Pid && ls[i].End+1 == ls[i+1].Start {
				// combine continuous range
				ls[i].End = ls[i+1].End
				ls[i+1].Start = ls[i+1].End + 1
			}
			i++
		}
	}
	return ls
}

func (m *baseMeta) emptyDir(ctx Context, inode Ino, skipCheckTrash bool, count *uint64, concurrent chan int) syscall.Errno {
	for {
		var entries []*Entry
		if st := m.en.doReaddir(ctx, inode, 0, &entries, 10000); st != 0 && st != syscall.ENOENT {
			return st
		}
		if len(entries) == 0 {
			return 0
		}
		if st := m.Access(ctx, inode, MODE_MASK_W|MODE_MASK_X, nil); st != 0 {
			return st
		}
		var wg sync.WaitGroup
		var status syscall.Errno
		var nonDirEntries []*Entry
		for i, e := range entries {
			if e.Attr.Typ == TypeDirectory {
				select {
				case concurrent <- 1:
					wg.Add(1)
					go func(child Ino, name string) {
						defer wg.Done()
						st := m.emptyEntry(ctx, inode, name, child, skipCheckTrash, count, concurrent)
						if st != 0 && st != syscall.ENOENT {
							status = st
						}
						<-concurrent
					}(e.Inode, string(e.Name))
				default:
					if st := m.emptyEntry(ctx, inode, string(e.Name), e.Inode, skipCheckTrash, count, concurrent); st != 0 && st != syscall.ENOENT {
						ctx.Cancel()
						return st
					}
				}
			} else {
				nonDirEntries = append(nonDirEntries, e)
			}
			if ctx.Canceled() {
				return syscall.EINTR
			}
			entries[i] = nil // release memory
		}
		wg.Wait()

		if status == 0 {
			status = m.BatchUnlink(ctx, inode, nonDirEntries, count, skipCheckTrash)
		}

		if status != 0 || inode == TrashInode { // try only once for .trash
			return status
		}
	}
}

func (m *baseMeta) emptyEntry(ctx Context, parent Ino, name string, inode Ino, skipCheckTrash bool, count *uint64, concurrent chan int) syscall.Errno {
	if ctx.Canceled() {
		return syscall.EINTR
	}
	st := m.emptyDir(ctx, inode, skipCheckTrash, count, concurrent)
	if st == 0 && !inode.IsTrash() {
		st = m.Rmdir(ctx, parent, name, skipCheckTrash)
		if st == syscall.ENOTEMPTY {
			// redo when concurrent conflict may happen
			st = m.emptyEntry(ctx, parent, name, inode, skipCheckTrash, count, concurrent)
		} else if count != nil {
			atomic.AddUint64(count, 1)
		}
	}
	return st
}

func (m *baseMeta) Remove(ctx Context, parent Ino, name string, skipTrash bool, numThreads int, count *uint64) syscall.Errno {
	parent = m.checkRoot(parent)
	if st := m.Access(ctx, parent, MODE_MASK_W|MODE_MASK_X, nil); st != 0 {
		return st
	}
	var inode Ino
	var attr Attr
	if st := m.Lookup(ctx, parent, name, &inode, &attr, false); st != 0 {
		return st
	}
	if attr.Typ != TypeDirectory {
		if count != nil {
			atomic.AddUint64(count, 1)
		}
		return m.Unlink(ctx, parent, name, skipTrash)
	}
	if numThreads <= 0 {
		logger.Infof("invalid threads number %d , auto adjust to %d", numThreads, RmrDefaultThreads)
		numThreads = RmrDefaultThreads
	} else if numThreads > 255 {
		logger.Infof("threads number %d too large, auto adjust to 255 .", numThreads)
		numThreads = 255
	}
	logger.Debugf("Start emptyEntry with %d concurrent threads .", numThreads)
	concurrent := make(chan int, numThreads)
	return m.emptyEntry(ctx, parent, name, inode, skipTrash, count, concurrent)
}

func (m *baseMeta) GetSummary(ctx Context, inode Ino, summary *Summary, recursive bool, strict bool) syscall.Errno {
	var attr Attr
	if st := m.GetAttr(ctx, inode, &attr); st != 0 {
		return st
	}
	if attr.Typ != TypeDirectory {
		summary.Files++
		summary.Size += uint64(align4K(attr.Length))
		if attr.Typ == TypeFile {
			summary.Length += attr.Length
		}
		return 0
	}
	summary.Dirs++
	summary.Size += uint64(align4K(0))
	concurrent := make(chan struct{}, 50)
	inode = m.checkRoot(inode)
	return m.getDirSummary(ctx, inode, summary, recursive, strict, concurrent, nil)
}

func (m *baseMeta) getDirSummary(ctx Context, inode Ino, summary *Summary, recursive bool, strict bool, concurrent chan struct{}, updateProgress func(count uint64, bytes uint64)) syscall.Errno {
	var entries []*Entry
	var err syscall.Errno
	format := m.getFormat()
	if strict || !format.DirStats {
		err = m.en.doReaddir(ctx, inode, 1, &entries, -1)
	} else {
		var st *dirStat
		st, err = m.GetDirStat(ctx, inode)
		if err != 0 {
			return err
		}
		atomic.AddUint64(&summary.Size, uint64(st.space))
		atomic.AddUint64(&summary.Length, uint64(st.length))
		if updateProgress != nil {
			updateProgress(uint64(st.inodes), uint64(st.space))
		}
		var attr Attr
		err = m.en.doGetAttr(ctx, inode, &attr)
		if err == 0 {
			if attr.Nlink > 2 {
				err = m.en.doReaddir(ctx, inode, 0, &entries, -1)
			} else {
				atomic.AddUint64(&summary.Files, uint64(st.inodes))
			}
		}
	}
	if err != 0 {
		return err
	}

	var wg sync.WaitGroup
	var errCh = make(chan syscall.Errno, 1)
	for _, e := range entries {
		if e.Attr.Typ == TypeDirectory {
			atomic.AddUint64(&summary.Dirs, 1)
		} else {
			atomic.AddUint64(&summary.Files, 1)
		}
		if strict || !format.DirStats {
			atomic.AddUint64(&summary.Size, uint64(align4K(e.Attr.Length)))
			if e.Attr.Typ == TypeFile {
				atomic.AddUint64(&summary.Length, e.Attr.Length)
			}
			if updateProgress != nil {
				updateProgress(1, uint64(align4K(e.Attr.Length)))
			}
		}
		if e.Attr.Typ != TypeDirectory || !recursive {
			continue
		}
		select {
		case <-ctx.Done():
			return syscall.EINTR
		case err := <-errCh:
			// TODO: cancel others
			return err
		case concurrent <- struct{}{}:
			wg.Add(1)
			go func(e *Entry) {
				defer wg.Done()
				err := m.getDirSummary(ctx, e.Inode, summary, recursive, strict, concurrent, updateProgress)
				<-concurrent
				if err != 0 && err != syscall.ENOENT {
					select {
					case errCh <- err:
					default:
					}
				}
			}(e)
		default:
			if err := m.getDirSummary(ctx, e.Inode, summary, recursive, strict, concurrent, updateProgress); err != 0 && err != syscall.ENOENT {
				return err
			}
		}
	}
	wg.Wait()
	select {
	case err = <-errCh:
	default:
	}
	return err
}

func (m *baseMeta) GetTreeSummary(ctx Context, root *TreeSummary, depth, topN uint8, strict bool,
	updateProgress func(count uint64, bytes uint64)) syscall.Errno {
	var attr Attr
	if st := m.GetAttr(ctx, root.Inode, &attr); st != 0 {
		return st
	}
	if updateProgress != nil {
		updateProgress(1, uint64(align4K(0)))
	}
	if attr.Typ != TypeDirectory {
		root.Files++
		root.Size += uint64(align4K(attr.Length))
		return 0
	}
	root.Dirs++
	root.Size += uint64(align4K(0))
	concurrent := make(chan struct{}, 50)
	root.Inode = m.checkRoot(root.Inode)
	return m.getTreeSummary(ctx, root, depth, topN, strict, concurrent, updateProgress)
}

func (m *baseMeta) getTreeSummary(ctx Context, tree *TreeSummary, depth, topN uint8, strict bool, concurrent chan struct{},
	updateProgress func(count uint64, bytes uint64)) syscall.Errno {
	if depth <= 0 {
		var summary Summary
		err := m.getDirSummary(ctx, tree.Inode, &summary, true, strict, concurrent, updateProgress)
		if err == 0 {
			tree.Dirs += summary.Dirs
			tree.Files += summary.Files
			tree.Size += summary.Size
		}
		return err
	}

	var entries []*Entry
	if err := m.en.doReaddir(ctx, tree.Inode, 1, &entries, -1); err != 0 {
		return err
	}
	var wg sync.WaitGroup
	tree.Children = make([]*TreeSummary, len(entries))
	errCh := make(chan syscall.Errno, 1)
	var err syscall.Errno
	for i, e := range entries {
		child := &TreeSummary{
			Inode: e.Inode,
			Path:  path.Join(tree.Path, string(e.Name)),
			Type:  e.Attr.Typ,
			Size:  uint64(align4K(e.Attr.Length)),
		}
		tree.Children[i] = child
		if updateProgress != nil {
			updateProgress(1, uint64(align4K(e.Attr.Length)))
		}
		if e.Attr.Typ != TypeDirectory {
			child.Files++
			continue
		}
		child.Dirs++
		select {
		case <-ctx.Done():
			return syscall.EINTR
		case err = <-errCh:
			return err
		case concurrent <- struct{}{}:
			wg.Add(1)
			go func() {
				defer wg.Done()
				err := m.getTreeSummary(ctx, child, depth-1, topN, strict, concurrent, updateProgress)
				<-concurrent
				if err != 0 && err != syscall.ENOENT {
					select {
					case errCh <- err:
					default:
					}
				}
			}()
		default:
			if err = m.getTreeSummary(ctx, child, depth-1, topN, strict, concurrent, updateProgress); err != 0 && err != syscall.ENOENT {
				return err
			}
		}
	}
	wg.Wait()
	select {
	case err = <-errCh:
		return err
	default:
	}

	// pick top N
	for _, c := range tree.Children {
		tree.Dirs += c.Dirs
		tree.Files += c.Files
		tree.Size += c.Size
	}
	sort.Slice(tree.Children, func(i, j int) bool {
		return tree.Children[i].Size > tree.Children[j].Size
	})
	if len(tree.Children) > int(topN) {
		omitChild := &TreeSummary{
			Path: path.Join(tree.Path, "..."),
			Type: TypeFile,
		}
		for _, child := range tree.Children[topN:] {
			omitChild.Size += child.Size
			omitChild.Files += child.Files
			omitChild.Dirs += child.Dirs
		}
		tree.Children = append(tree.Children[:topN], omitChild)
	}
	return 0
}

func (m *baseMeta) atimeNeedsUpdate(attr *Attr, now time.Time) bool {
	return m.conf.AtimeMode != NoAtime && relatimeNeedUpdate(attr, now) ||
		// update atime only for > 1 second accesses
		m.conf.AtimeMode == StrictAtime && now.Sub(time.Unix(attr.Atime, int64(attr.Atimensec))) > time.Second
}

// With relative atime, only update atime if the previous atime is earlier than either the ctime or
// mtime or if at least a day has passed since the last atime update.
func relatimeNeedUpdate(attr *Attr, now time.Time) bool {
	atime := time.Unix(attr.Atime, int64(attr.Atimensec))
	mtime := time.Unix(attr.Mtime, int64(attr.Mtimensec))
	ctime := time.Unix(attr.Ctime, int64(attr.Ctimensec))
	return mtime.After(atime) || ctime.After(atime) || now.Sub(atime) > 24*time.Hour
}

type txMethodKey struct{}

type txMethod string

func (m *txMethod) name(ctx context.Context) string {
	if *m == "" {
		*m = txMethod(callerName(ctx)) // lazy evaluation
	}
	return string(*m)
}

func callerName(ctx context.Context) string {
	if method, ok := ctx.Value(txMethodKey{}).(string); ok {
		return method // Fast path, prefer explicitly provided method name
	}
	const minSkip = 3
	for i := minSkip; i < 20; i++ { // Slow path, find the real caller
		pc, _, _, ok := runtime.Caller(i)
		if !ok {
			break
		}
		fn := runtime.FuncForPC(pc)
		if fn == nil {
			continue
		}
		name := fn.Name()
		// Skip frames containing anonymous functions (indicated by dot+number)
		if !strings.Contains(name, ".func") {
			return utils.MethodName(name)
		}
	}
	return "unknown"
}


================================================
FILE: pkg/meta/utils_darwin.go
================================================
package meta

import (
	"syscall"

	sys "golang.org/x/sys/unix"
)

const ENOATTR = syscall.ENOATTR
const (
	F_UNLCK = syscall.F_UNLCK
	F_RDLCK = syscall.F_RDLCK
	F_WRLCK = syscall.F_WRLCK
)

const (
	XattrCreateOrReplace = 0
	XattrCreate          = sys.XATTR_CREATE
	XattrReplace         = sys.XATTR_REPLACE
)


================================================
FILE: pkg/meta/utils_linux.go
================================================
package meta

import (
	"syscall"

	sys "golang.org/x/sys/unix"
)

const ENOATTR = syscall.ENODATA
const (
	F_UNLCK = syscall.F_UNLCK
	F_RDLCK = syscall.F_RDLCK
	F_WRLCK = syscall.F_WRLCK
)

const (
	XattrCreateOrReplace = 0
	XattrCreate          = sys.XATTR_CREATE
	XattrReplace         = sys.XATTR_REPLACE
)


================================================
FILE: pkg/meta/utils_test.go
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import (
	"context"
	"testing"
	"time"
)

func TestRelatimeNeedUpdate(t *testing.T) {
	attr := &Attr{
		Atime: 1000,
	}
	if !relatimeNeedUpdate(attr, time.Now()) {
		t.Fatal("atime not updated for 24 hours")
	}

	now := time.Now()
	attr.Atime = now.Unix()
	attr.Ctime = now.Unix() + 10
	if !relatimeNeedUpdate(attr, time.Now()) {
		t.Fatal("atime not updated for ctime")
	}

	now = time.Now()
	attr.Atime = now.Unix()
	attr.Mtime = now.Unix() + 10
	if !relatimeNeedUpdate(attr, time.Now()) {
		t.Fatal("atime not updated for mtime")
	}

	now = time.Now()
	attr.Atime = now.Unix()
	attr.Mtime = now.Unix()
	attr.Ctime = now.Unix()
	if relatimeNeedUpdate(attr, now) {
		t.Fatal("atime should not be updated")
	}
}

func TestAtimeNeedsUpdate(t *testing.T) {
	m := &baseMeta{
		conf: &Config{
			AtimeMode: NoAtime,
		},
	}
	attr := &Attr{
		Atime: 1000,
	}
	now := time.Now()
	if m.atimeNeedsUpdate(attr, now) {
		t.Fatal("atime updated for noatime")
	}

	m.conf.AtimeMode = RelAtime
	if !m.atimeNeedsUpdate(attr, now) {
		t.Fatal("atime not updated for relatime")
	}
	attr.Atime = now.Unix()
	if m.atimeNeedsUpdate(attr, now) {
		t.Fatal("atime updated for relatime")
	}

	m.conf.AtimeMode = StrictAtime
	attr.Atime = now.Unix() - 2
	if !m.atimeNeedsUpdate(attr, now) {
		t.Fatal("atime not updated for strictatime")
	}

	attr.Atime = now.Unix() - 1
	attr.Atimensec = uint32(now.Nanosecond())
	if m.atimeNeedsUpdate(attr, now) {
		t.Fatal("atime updated for strictatime when < 1s")
	}
}

func Test_getCallerName(t *testing.T) {
	ctx := context.WithValue(context.Background(), txMethodKey{}, "test")
	var method txMethod
	if method.name(ctx) != "test" {
		t.Fatalf("expected %q, got %q", "test", method)
	}
	func() {
		var method txMethod
		if method.name(context.Background()) != "Test_getCallerName" {
			t.Fatalf("expected %q, got %q", "Test_getCallerName", method)
		}
	}()
}


================================================
FILE: pkg/meta/utils_windows.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package meta

import "syscall"

const ENOATTR = syscall.ENODATA

const (
	F_UNLCK = 1
	F_RDLCK = 2
	F_WRLCK = 3
)

const (
	XattrCreateOrReplace = 0
	XattrCreate          = 1
	XattrReplace         = 2
)


================================================
FILE: pkg/metric/metrics.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package metric

import (
	"fmt"
	"net"
	"os"
	"strconv"
	"time"

	consulapi "github.com/hashicorp/consul/api"
	"github.com/hashicorp/go-hclog"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
)

var logger = utils.GetLogger("juicefs")

var (
	start = time.Now()
	cpu   = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "cpu_usage",
		Help: "Accumulated CPU usage in seconds.",
	}, func() float64 {
		ru := utils.GetRusage()
		return ru.GetStime() + ru.GetUtime()
	})
	memory = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "memory",
		Help: "Used memory in bytes.",
	}, func() float64 {
		_, rss := utils.MemoryUsage()
		return float64(rss)
	})
	uptime = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "uptime",
		Help: "Total running time in seconds.",
	}, func() float64 {
		return time.Since(start).Seconds()
	})
)

func UpdateMetrics(registerer prometheus.Registerer) {
	if registerer == nil {
		return
	}
	registerer.MustRegister(cpu)
	registerer.MustRegister(memory)
	registerer.MustRegister(uptime)
}

func RegisterToConsul(consulAddr, metricsAddr string, metadata map[string]string) {
	if metricsAddr == "" {
		logger.Errorf("Metrics server start err,so can't register to consul")
		return
	}
	localIp, portStr, err := net.SplitHostPort(metricsAddr)
	if err != nil {
		logger.Errorf("Metrics url format err:%s", err)
		return
	}

	// Don't register 0.0.0.0 to consul
	if localIp == "0.0.0.0" || localIp == "::" {
		localIp, err = utils.GetLocalIp(consulAddr)
		if err != nil {
			logger.Errorf("Get local ip failed: %v", err)
			return
		}
	}
	port, err := strconv.Atoi(portStr)
	if err != nil {
		logger.Errorf("Metrics port set err:%s", err)
		return
	}
	config := consulapi.DefaultConfigWithLogger(hclog.New(&hclog.LoggerOptions{ //nolint:typecheck
		Name:   "consul-api",
		Output: logger.Out,
	}))
	config.Address = consulAddr
	client, err := consulapi.NewClient(config)
	if err != nil {
		logger.Errorf("Creat consul client failed:%s", err)
		return
	}

	hostname, err := os.Hostname()
	if err != nil {
		logger.Errorf("Get hostname failed:%s", err)
		return
	}
	metadata["hostName"] = hostname
	var id, name string
	if mp, ok := metadata["mountPoint"]; ok {
		id = fmt.Sprintf("%s:%s", localIp, mp)
		name = "juicefs"
	} else {
		// for sync metrics, id format: 127.0.0.1;src->dst;pid=6666
		id = fmt.Sprintf("%s;%s->%s;pid=%s", localIp, metadata["src"], metadata["dst"], metadata["pid"])
		delete(metadata, "src")
		delete(metadata, "dst")
		name = "juicefs-sync"
	}

	check := &consulapi.AgentServiceCheck{
		HTTP:                           fmt.Sprintf("http://%s:%d/metrics", localIp, port),
		Timeout:                        "5s",
		Interval:                       "5s",
		DeregisterCriticalServiceAfter: "30s",
	}

	registration := consulapi.AgentServiceRegistration{
		ID:      id,
		Name:    name,
		Port:    port,
		Address: localIp,
		Meta:    metadata,
		Check:   check,
	}
	if err = client.Agent().ServiceRegister(&registration); err != nil {
		logger.Errorf("Service register failed: %s", err)
	} else {
		logger.Infof("Juicefs register to consul success, id: %q, port: %d", id, port)
	}
}


================================================
FILE: pkg/object/azure.go
================================================
//go:build !noazure
// +build !noazure

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"net"
	"net/url"
	"os"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"

	"github.com/Azure/azure-sdk-for-go/sdk/azcore"
	"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
	blob2 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
	"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
)

type wasb struct {
	DefaultObjectStorage
	container    *container.Client
	azblobCli    *azblob.Client
	sc           string
	cName        string
	useTokenAuth bool // true when using managed identity/token-based auth, false for shared key/connection string
}

func (b *wasb) String() string {
	return fmt.Sprintf("wasb://%s/", b.cName)
}

func (b *wasb) Create(ctx context.Context) error {
	_, err := b.container.Create(ctx, nil)
	if err != nil {
		if e, ok := err.(*azcore.ResponseError); ok && e.ErrorCode == string(bloberror.ContainerAlreadyExists) {
			return nil
		}
	}
	return err
}

func (b *wasb) Head(ctx context.Context, key string) (Object, error) {
	properties, err := b.container.NewBlobClient(key).GetProperties(ctx, nil)
	if err != nil {
		if e, ok := err.(*azcore.ResponseError); ok && e.ErrorCode == string(bloberror.BlobNotFound) {
			err = os.ErrNotExist
		}
		return nil, err
	}

	return &obj{
		key,
		*properties.ContentLength,
		*properties.LastModified,
		strings.HasSuffix(key, "/"),
		*properties.AccessTier,
	}, nil
}

func (b *wasb) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	download, err := b.container.NewBlobClient(key).DownloadStream(ctx, &azblob.DownloadStreamOptions{Range: blob2.HTTPRange{Offset: off, Count: limit}})
	if err != nil {
		return nil, err
	}
	attrs := ApplyGetters(getters...)
	// TODO fire another property request to get the actual storage class
	attrs.SetRequestID(aws.ToString(download.RequestID)).SetStorageClass(b.sc)
	return download.Body, err
}

func str2Tier(tier string) *blob2.AccessTier {
	for _, v := range blob2.PossibleAccessTierValues() {
		if string(v) == tier {
			return &v
		}
	}
	return nil
}

func (b *wasb) Put(ctx context.Context, key string, data io.Reader, getters ...AttrGetter) error {
	options := azblob.UploadStreamOptions{}
	if b.sc != "" {
		options.AccessTier = str2Tier(b.sc)
	}
	resp, err := b.azblobCli.UploadStream(ctx, b.cName, key, data, &options)
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(aws.ToString(resp.RequestID)).SetStorageClass(b.sc)
	return err
}

func (b *wasb) Copy(ctx context.Context, dst, src string) error {
	dstCli := b.container.NewBlobClient(dst)
	srcCli := b.container.NewBlobClient(src)
	options := &blob2.CopyFromURLOptions{}
	if b.sc != "" {
		options.Tier = str2Tier(b.sc)
	}

	var srcURL string
	var err error

	if b.useTokenAuth {
		// Token-based authentication: use direct blob URL
		// Azure will authenticate using the OAuth token from the credential chain
		srcURL = srcCli.URL()
		logger.Debugf("Using token-based authentication for Copy operation (direct URL without SAS)")
	} else {
		// Shared key authentication: generate SAS token for source blob
		srcURL, err = srcCli.GetSASURL(sas.BlobPermissions{Read: true}, time.Now().Add(10*time.Second), nil)
		if err != nil {
			return err
		}
		logger.Debugf("Using shared key authentication for Copy operation (SAS URL)")
	}

	_, err = dstCli.CopyFromURL(ctx, srcURL, options)
	return err
}

func (b *wasb) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	resp, err := b.container.NewBlobClient(key).Delete(ctx, nil)
	if err != nil {
		if e, ok := err.(*azcore.ResponseError); ok && e.ErrorCode == string(bloberror.BlobNotFound) {
			err = nil
		}
	}
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(aws.ToString(resp.RequestID))
	return err
}

func (b *wasb) List(ctx context.Context, prefix, startAfter, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "" {
		return nil, false, "", notSupported
	}

	limit32 := int32(limit)
	pager := b.azblobCli.NewListBlobsFlatPager(b.cName, &azblob.ListBlobsFlatOptions{Prefix: &prefix, Marker: &token, MaxResults: &limit32})
	page, err := pager.NextPage(ctx)
	if err != nil {
		return nil, false, "", err
	}
	var n int
	if page.Segment != nil {
		n = len(page.Segment.BlobItems)
	}
	objs := make([]Object, 0, n)
	for i := 0; i < n; i++ {
		blob := page.Segment.BlobItems[i]
		if *blob.Name <= startAfter {
			continue
		}
		mtime := blob.Properties.LastModified
		objs = append(objs, &obj{
			*blob.Name,
			*blob.Properties.ContentLength,
			*mtime,
			strings.HasSuffix(*blob.Name, "/"),
			string(*blob.Properties.AccessTier),
		})
	}

	var nextMarker string
	if pager.More() {
		nextMarker = *page.NextMarker
	}
	return objs, pager.More(), nextMarker, nil
}

func (b *wasb) SetStorageClass(sc string) error {
	b.sc = sc
	return nil
}

// createAzureCredential creates a credential for Azure authentication.
// Uses DefaultAzureCredential which attempts authentication via:
// - Environment variables (service principal)
// - Workload Identity (Kubernetes)
// - Managed Identity (system-assigned and user-assigned)
// - Azure CLI
// - Azure Developer CLI
func createAzureCredential() (azcore.TokenCredential, error) {
	logger.Debugf("Creating DefaultAzureCredential for token-based authentication")
	cred, err := azidentity.NewDefaultAzureCredential(nil)
	if err != nil {
		logger.Debugf("Failed to create DefaultAzureCredential: %v", err)
		return nil, err
	}
	return cred, nil
}

func autoWasbEndpoint(containerName, accountName, scheme string, credential *azblob.SharedKeyCredential) (string, error) {
	baseURLs := []string{"blob.core.windows.net", "blob.core.chinacloudapi.cn"}
	endpoint := ""
	for _, baseURL := range baseURLs {
		if _, err := net.LookupIP(fmt.Sprintf("%s.%s", accountName, baseURL)); err != nil {
			logger.Debugf("Attempt to resolve domain name %s failed: %s", baseURL, err)
			continue
		}
		client, err := azblob.NewClientWithSharedKeyCredential(fmt.Sprintf("%s://%s.%s", scheme, accountName, baseURL), credential, nil)
		if err != nil {
			return "", err
		}
		if _, err = client.ServiceClient().GetProperties(ctx, nil); err != nil {
			logger.Debugf("Try to get containers properties at %s failed: %s", baseURL, err)
			continue
		}
		endpoint = baseURL
		break
	}

	if endpoint == "" {
		return "", fmt.Errorf("fail to get endpoint for container %s", containerName)
	}
	return endpoint, nil
}

func autoWasbEndpointWithToken(containerName, accountName, scheme string, credential azcore.TokenCredential) (string, error) {
	baseURLs := []string{"blob.core.windows.net", "blob.core.chinacloudapi.cn"}
	endpoint := ""
	for _, baseURL := range baseURLs {
		if _, err := net.LookupIP(fmt.Sprintf("%s.%s", accountName, baseURL)); err != nil {
			logger.Debugf("Attempt to resolve domain name %s failed: %s", baseURL, err)
			continue
		}
		client, err := azblob.NewClient(fmt.Sprintf("%s://%s.%s", scheme, accountName, baseURL), credential, nil)
		if err != nil {
			return "", err
		}
		if _, err = client.ServiceClient().GetProperties(ctx, nil); err != nil {
			logger.Debugf("Try to get service properties at %s failed: %s", baseURL, err)
			continue
		}
		endpoint = baseURL
		break
	}

	if endpoint == "" {
		return "", fmt.Errorf("fail to get endpoint for container %s", containerName)
	}
	return endpoint, nil
}

func newWasb(endpoint, accountName, accountKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err)
	}
	hostParts := strings.SplitN(uri.Host, ".", 2)
	containerName := hostParts[0]

	// Priority 1: Connection string support
	// DefaultEndpointsProtocol=[http|https];AccountName=***;AccountKey=***;EndpointSuffix=[core.windows.net|core.chinacloudapi.cn]
	if connString := os.Getenv("AZURE_STORAGE_CONNECTION_STRING"); connString != "" {
		logger.Debugf("Using Azure connection string authentication")
		var client *azblob.Client
		if client, err = azblob.NewClientFromConnectionString(connString, nil); err != nil {
			return nil, err
		}
		return &wasb{container: client.ServiceClient().NewContainerClient(containerName), azblobCli: client, cName: containerName, useTokenAuth: false}, nil
	}

	// Priority 2: Try managed identity / token-based authentication if no account key provided
	if accountKey == "" {
		logger.Debugf("No account key provided, attempting token-based authentication (managed identity, Azure CLI, etc.)")
		tokenCred, err := createAzureCredential()
		if err != nil {
			return nil, fmt.Errorf("Failed to create Azure credential (managed identity/Azure CLI): %v", err)
		}

		var domain string
		if len(hostParts) > 1 {
			domain = hostParts[1]
			if !strings.HasPrefix(hostParts[1], "blob") {
				domain = fmt.Sprintf("blob.%s", hostParts[1])
			}
		} else if domain, err = autoWasbEndpointWithToken(containerName, accountName, uri.Scheme, tokenCred); err != nil {
			return nil, fmt.Errorf("Unable to get endpoint of container %s: %s", containerName, err)
		}

		serviceURL := fmt.Sprintf("%s://%s.%s", uri.Scheme, accountName, domain)
		client, err := azblob.NewClient(serviceURL, tokenCred, nil)
		if err != nil {
			return nil, fmt.Errorf("Failed to create Azure blob client with token credential: %v", err)
		}
		logger.Debugf("Successfully authenticated using token-based credential")
		return &wasb{container: client.ServiceClient().NewContainerClient(containerName), azblobCli: client, cName: containerName, useTokenAuth: true}, nil
	}

	// Priority 3: Shared key authentication (existing behavior)
	logger.Debugf("Using Azure shared key authentication")
	credential, err := azblob.NewSharedKeyCredential(accountName, accountKey)
	if err != nil {
		return nil, err
	}

	var domain string
	if len(hostParts) > 1 {
		domain = hostParts[1]
		if !strings.HasPrefix(hostParts[1], "blob") {
			domain = fmt.Sprintf("blob.%s", hostParts[1])
		}
	} else if domain, err = autoWasbEndpoint(containerName, accountName, uri.Scheme, credential); err != nil {
		return nil, fmt.Errorf("Unable to get endpoint of container %s: %s", containerName, err)
	}

	client, err := azblob.NewClientWithSharedKeyCredential(fmt.Sprintf("%s://%s.%s", uri.Scheme, accountName, domain), credential, nil)
	if err != nil {
		return nil, err
	}
	return &wasb{container: client.ServiceClient().NewContainerClient(containerName), azblobCli: client, cName: containerName, useTokenAuth: false}, nil
}

func init() {
	Register("wasb", newWasb)
}


================================================
FILE: pkg/object/b2.go
================================================
//go:build !nob2
// +build !nob2

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"strings"
	"time"

	"gopkg.in/kothar/go-backblaze.v0"
)

type b2client struct {
	DefaultObjectStorage
	bucket *backblaze.Bucket
}

func (c *b2client) String() string {
	return fmt.Sprintf("b2://%s/", c.bucket.Name)
}

func (c *b2client) Create(ctx context.Context) error {
	return nil
}

func (c *b2client) getFileInfo(key string) (*backblaze.File, error) {
	var f *backblaze.File
	var r io.ReadCloser
	var err error
	f, r, err = c.bucket.DownloadFileRangeByName(key, &backblaze.FileRange{Start: 0, End: 1})
	if err != nil {
		//	get empty file info
		if e, ok := err.(*backblaze.B2Error); ok && e.Status == http.StatusRequestedRangeNotSatisfiable {
			f, r, err = c.bucket.DownloadFileRangeByName(key, nil)
		}
	}
	if err != nil {
		return nil, err
	}
	var buf [2]byte
	_, _ = r.Read(buf[:])
	_ = r.Close()
	return f, nil
}

func (c *b2client) Head(ctx context.Context, key string) (Object, error) {
	f, err := c.getFileInfo(key)
	if err != nil {
		if e, ok := err.(*backblaze.B2Error); ok && e.Status == http.StatusNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		f.Name,
		f.ContentLength,
		time.Unix(f.UploadTimestamp/1000, 0),
		strings.HasSuffix(f.Name, "/"),
		"",
	}, nil
}

func (c *b2client) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	if off == 0 && limit == -1 {
		_, r, err := c.bucket.DownloadFileByName(key)
		return r, err
	}
	if limit == -1 {
		limit = 1 << 50
	}
	rang := &backblaze.FileRange{Start: off, End: off + limit - 1}
	_, r, err := c.bucket.DownloadFileRangeByName(key, rang)
	return r, err
}

func (c *b2client) Put(ctx context.Context, key string, data io.Reader, getters ...AttrGetter) error {
	_, err := c.bucket.UploadFile(key, nil, data)
	return err
}

func (c *b2client) Copy(ctx context.Context, dst, src string) error {
	f, err := c.getFileInfo(src)
	if err != nil {
		return err
	}
	// destinationBucketId must be set,otherwise it will return 400 Bad destinationBucketId
	_, err = c.bucket.CopyFile(f.ID, dst, c.bucket.ID, backblaze.FileMetaDirectiveCopy)
	return err
}

func (c *b2client) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	f, err := c.getFileInfo(key)
	if err != nil {
		if strings.HasPrefix(err.Error(), "not_found") {
			return nil
		}
		return err
	}
	_, err = c.bucket.DeleteFileVersion(key, f.ID)
	return err
}

func (c *b2client) List(ctx context.Context, prefix, startAfter, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}

	resp, err := c.bucket.ListFileNamesWithPrefix(startAfter, int(limit), prefix, delimiter)
	if err != nil {
		return nil, false, "", err
	}

	n := len(resp.Files)
	objs := make([]Object, 0, n)
	for i := 0; i < n; i++ {
		if resp.Files[i].Name <= startAfter {
			continue
		}
		f := resp.Files[i]
		objs = append(objs, &obj{
			f.Name,
			f.ContentLength,
			time.Unix(f.UploadTimestamp/1000, 0),
			strings.HasSuffix(f.Name, "/"),
			"",
		})
	}
	return objs, resp.NextFileName != "", resp.NextFileName, nil
}

// TODO: support multipart upload using S3 client

func newB2(endpoint, keyID, applicationKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err)
	}
	hostParts := strings.Split(uri.Host, ".")
	name := hostParts[0]
	client, err := backblaze.NewB2(backblaze.Credentials{
		KeyID:          keyID,
		ApplicationKey: applicationKey,
	})
	if err != nil {
		return nil, fmt.Errorf("create B2 client: %s", err)
	}
	client.MaxIdleUploads = 20
	bucket, err := client.Bucket(name)
	if err != nil {
		logger.Warnf("access bucket %s: %s", name, err)
	}
	if err == nil && bucket == nil {
		bucket, err = client.CreateBucket(name, "allPrivate")
		if err != nil {
			return nil, fmt.Errorf("create bucket %s: %s", name, err)
		}
	}
	if bucket == nil {
		return nil, fmt.Errorf("can't find bucket %s with provided Key ID", name)
	}
	return &b2client{bucket: bucket}, nil
}

func init() {
	Register("b2", newB2)
}


================================================
FILE: pkg/object/bos.go
================================================
//go:build !nobos
// +build !nobos

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"hash/crc32"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/baidubce/bce-sdk-go/bce"
	"github.com/baidubce/bce-sdk-go/services/bos"
	"github.com/baidubce/bce-sdk-go/services/bos/api"
	"github.com/juicedata/juicefs/pkg/utils"
)

type bosclient struct {
	DefaultObjectStorage
	bucket string
	sc     string
	c      *bos.Client
}

func (q *bosclient) String() string {
	return fmt.Sprintf("bos://%s/", q.bucket)
}

func (q *bosclient) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              100 << 10,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (q *bosclient) SetStorageClass(sc string) error {
	q.sc = sc
	return nil
}

func (q *bosclient) Create(ctx context.Context) error {
	_, err := q.c.PutBucket(q.bucket)
	if err == nil && q.sc != "" {
		if err := q.c.PutBucketStorageclass(q.bucket, q.sc); err != nil {
			logger.Warnf("failed to set storage class: %v", err)
		}
	}
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}

func (q *bosclient) Head(ctx context.Context, key string) (Object, error) {
	r, err := q.c.GetObjectMeta(q.bucket, key)
	if err != nil {
		if e, ok := err.(*bce.BceServiceError); ok && e.StatusCode == http.StatusNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}
	mtime, _ := time.Parse(time.RFC1123, r.LastModified)
	return &obj{
		key,
		r.ContentLength,
		mtime,
		strings.HasSuffix(key, "/"),
		r.StorageClass,
	}, nil
}

func (q *bosclient) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (resp io.ReadCloser, err error) {
	var r *api.GetObjectResult
	var needCheck bool
	if limit > 0 {
		r, err = q.c.GetObject(q.bucket, key, nil, off, off+limit-1)
	} else if off > 0 {
		r, err = q.c.GetObject(q.bucket, key, nil, off)
	} else {
		r, err = q.c.GetObject(q.bucket, key, nil)
		needCheck = true
	}
	if err != nil {
		return
	}
	if needCheck {
		if r.UserMeta[checksumAlgr] != "" {
			resp = verifyChecksum(r.Body, r.UserMeta[checksumAlgr], r.ContentLength)
		} else {
			resp = verifyChecksum0(r.Body, r.ContentCrc32, r.ContentLength, crc32.IEEETable)
		}
	} else {
		resp = r.Body
	}
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass(r.StorageClass)
	return
}

func (q *bosclient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	b, vlen, err := findLen(in)
	if err != nil {
		return err
	}
	var data []byte
	if bf, ok := b.(*bytes.Buffer); ok {
		data = bf.Bytes()
	} else {
		data = utils.Alloc0(int(vlen))
		defer utils.Free0(data)
		_, err = io.ReadFull(b, data)
		if err != nil {
			return err
		}
	}
	body, err := bce.NewBodyFromBytes(data)
	if err != nil {
		return err
	}
	args := new(api.PutObjectArgs)
	if q.sc != "" {
		args.StorageClass = q.sc
	}
	args.UserMeta = make(map[string]string)
	args.UserMeta[checksumAlgr] = strconv.Itoa(int(crc32.Update(0, crc32c, data)))
	_, err = q.c.PutObject(q.bucket, key, body, args)
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass(q.sc)
	return err
}

func (q *bosclient) Copy(ctx context.Context, dst, src string) error {
	var args *api.CopyObjectArgs
	if q.sc != "" {
		args = &api.CopyObjectArgs{ObjectMeta: api.ObjectMeta{StorageClass: q.sc}}
	}
	_, err := q.c.CopyObject(q.bucket, dst, q.bucket, src, args)
	return err
}

func (q *bosclient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	err := q.c.DeleteObject(q.bucket, key)
	if err != nil && strings.Contains(err.Error(), "NoSuchKey") {
		err = nil
	}
	return err
}

func (q *bosclient) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}
	limit_ := int(limit)
	out, err := q.c.SimpleListObjects(q.bucket, prefix, limit_, start, delimiter)
	if err != nil {
		return nil, false, "", err
	}
	n := len(out.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		k := out.Contents[i]
		mod, _ := time.Parse("2006-01-02T15:04:05Z", k.LastModified)
		objs[i] = &obj{k.Key, int64(k.Size), mod, strings.HasSuffix(k.Key, "/"), k.StorageClass}
	}
	if delimiter != "" {
		for _, p := range out.CommonPrefixes {
			objs = append(objs, &obj{p.Prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, out.IsTruncated, out.NextMarker, nil
}

func (q *bosclient) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	args := new(api.InitiateMultipartUploadArgs)
	if q.sc != "" {
		args.StorageClass = q.sc
	}
	r, err := q.c.InitiateMultipartUpload(q.bucket, key, "", args)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: r.UploadId, MinPartSize: 4 << 20, MaxCount: 10000}, nil
}

func (q *bosclient) UploadPart(ctx context.Context, key string, uploadID string, num int, data []byte) (*Part, error) {
	body, _ := bce.NewBodyFromBytes(data)
	etag, err := q.c.BasicUploadPart(q.bucket, key, uploadID, num, body)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, Size: len(data), ETag: etag}, nil
}

func (q *bosclient) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	result, err := q.c.UploadPartCopy(q.bucket, key, q.bucket, srcKey, uploadID, num,
		&api.UploadPartCopyArgs{SourceRange: fmt.Sprintf("bytes=%d-%d", off, off+size-1)})

	if err != nil {
		return nil, err
	}
	return &Part{Num: num, Size: int(size), ETag: result.ETag}, nil
}

func (q *bosclient) AbortUpload(ctx context.Context, key string, uploadID string) {
	_ = q.c.AbortMultipartUpload(q.bucket, key, uploadID)
}

func (q *bosclient) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	oparts := make([]api.UploadInfoType, len(parts))
	for i := range parts {
		oparts[i] = api.UploadInfoType{
			PartNumber: parts[i].Num,
			ETag:       parts[i].ETag,
		}
	}
	ps := api.CompleteMultipartUploadArgs{Parts: oparts}
	_, err := q.c.CompleteMultipartUploadFromStruct(q.bucket, key, uploadID, &ps)
	return err
}

func (q *bosclient) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	result, err := q.c.ListMultipartUploads(q.bucket, &api.ListMultipartUploadsArgs{
		MaxUploads: 1000,
		KeyMarker:  marker,
	})
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{u.Key, u.UploadId, time.Time{}}
	}
	return parts, result.NextKeyMarker, nil
}

func autoBOSEndpoint(bucketName, accessKey, secretKey string) (string, error) {
	region := bce.DEFAULT_REGION
	if r := os.Getenv("BDCLOUD_DEFAULT_REGION"); r != "" {
		region = r
	}

	endpoint := fmt.Sprintf("https://%s.bcebos.com", region)
	bosCli, err := bos.NewClient(accessKey, secretKey, endpoint)
	if err != nil {
		return "", err
	}

	if location, err := bosCli.GetBucketLocation(bucketName); err != nil {
		return "", err
	} else {
		return fmt.Sprintf("%s.bcebos.com", location), nil
	}
}

func newBOS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err)
	}
	hostParts := strings.SplitN(uri.Host, ".", 2)
	if len(hostParts) != 2 {
		return nil, fmt.Errorf("Invalid endpoint: %v", endpoint)
	}
	bucketName := hostParts[0]
	if accessKey == "" {
		accessKey = os.Getenv("BDCLOUD_ACCESS_KEY")
		secretKey = os.Getenv("BDCLOUD_SECRET_KEY")
	}
	endpoint = hostParts[1]
	if hostParts[1] == "bcebos.com" {
		if endpoint, err = autoBOSEndpoint(bucketName, accessKey, secretKey); err != nil {
			return nil, fmt.Errorf("Fail to get location of bucket %q: %s", bucketName, err)
		}
	}
	endpoint = fmt.Sprintf("%s://%s", uri.Scheme, endpoint)
	logger.Debugf("Use endpoint: %s", endpoint)
	// endpoint format like https://bj.bcebos.com
	bosClient, err := bos.NewClient(accessKey, secretKey, endpoint)
	if err != nil {
		return nil, err
	}
	bosClient.Config.Retry = bce.NewNoRetryPolicy()
	bosClient.Config.UserAgent = UserAgent
	return &bosclient{bucket: bucketName, c: bosClient}, nil
}

func init() {
	Register("bos", newBOS)
}


================================================
FILE: pkg/object/bunny.go
================================================
//go:build bunny
// +build bunny

/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"math"
	"net/url"
	"os"
	"path"
	"strings"
	"time"

	bunny "github.com/l0wl3vel/bunny-storage-go-sdk"
)

type bunnyClient struct {
	DefaultObjectStorage
	client   *bunny.Client
	endpoint string
}

// Description of the object storage.
func (b *bunnyClient) String() string {
	return fmt.Sprintf("bunny://%v", b.endpoint)
}

// Get the data for the given object specified by key.
func (b *bunnyClient) Get(ctx context.Context, key string, off int64, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	var end int64
	if limit == -1 {
		end = math.MaxInt64
	} else {
		end = off + limit - 1
	}
	body, err := b.client.DownloadPartial(key, off, end)
	if err != nil {
		return nil, err
	}
	return io.NopCloser(bytes.NewReader(body)), nil
}

// Put data read from a reader to an object specified by key.
func (b *bunnyClient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	content, readErr := io.ReadAll(in)
	if readErr != nil {
		return readErr
	}
	return b.client.Upload(key, content, true)
}

// Delete a object.
func (b *bunnyClient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	err := b.client.Delete(key, false)
	if err != nil && err.Error() == "Not Found" {
		err = nil
	}
	return err
}

func (b *bunnyClient) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}
	var output []Object
	var dir = prefix
	if !strings.HasSuffix(dir, dirSuffix) { // If no Directory list in parent directory
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	}

	listedObjects, err := b.client.List(dir)
	if err != nil {
		if os.IsNotExist(err) {
			err = nil
		}
		return nil, false, "", err
	}

	for _, o := range listedObjects {
		normalizedPath := normalizedObjectNameWithinZone(o)
		if !strings.HasPrefix(normalizedPath, prefix) || (marker != "" && normalizedPath <= marker) {
			continue
		}
		output = append(output, parseObjectMetadata(o))
		if len(output) == int(limit) {
			break
		}
	}

	return generateListResult(output, limit)
}

// The Object Path returned by the Bunny API contains the Storage Zone Name, which this function removes
func normalizedObjectNameWithinZone(o bunny.Object) string {
	normalizedPath := path.Join(o.Path, o.ObjectName)
	if o.IsDirectory {
		normalizedPath = normalizedPath + "/" // Append a trailing slash to allow deletion of directories
	}
	return strings.TrimPrefix(normalizedPath, "/"+o.StorageZoneName+"/")
}

func parseObjectMetadata(object bunny.Object) Object {
	lastChanged, _ := time.Parse("2006-01-02T15:04:05", object.LastChanged)

	key := normalizedObjectNameWithinZone(object)
	if object.IsDirectory && !strings.HasSuffix(key, "/") {
		key = key + "/"
	}
	return &obj{
		key,
		int64(object.Length),
		lastChanged,
		object.IsDirectory,
		"",
	}
}

func (b *bunnyClient) Head(ctx context.Context, key string) (Object, error) {
	object, err := b.client.Describe(key)
	if err != nil {
		if err.Error() == "Not Found" {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return parseObjectMetadata(object), nil
}

func newBunny(endpoint, accessKey, password, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	endpoint_url, err := url.Parse(endpoint)
	if err != nil {
		return nil, err
	}

	client := bunny.NewClient(*endpoint_url, password)
	return &bunnyClient{client: &client, endpoint: endpoint}, nil
}

func init() {
	Register("bunny", newBunny)
}


================================================
FILE: pkg/object/ceph.go
================================================
//go:build ceph
// +build ceph

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"net/url"
	"os"
	"reflect"
	"sort"
	"strings"
	"sync"

	"github.com/ceph/go-ceph/rados"
)

type ceph struct {
	DefaultObjectStorage
	name string
	conn *rados.Conn
	free chan *rados.IOContext
}

func (c *ceph) String() string {
	return fmt.Sprintf("ceph://%s/", c.name)
}

func (c *ceph) Shutdown() {
	c.conn.Shutdown()
}

func (c *ceph) Create(_ context.Context) error {
	names, err := c.conn.ListPools()
	if err != nil {
		return err
	}
	for _, name := range names {
		if name == c.name {
			return nil
		}
	}
	return c.conn.MakePool(c.name)
}

func (c *ceph) newContext() (*rados.IOContext, error) {
	select {
	case ctx := <-c.free:
		return ctx, nil
	default:
		ctx, err := c.conn.OpenIOContext(c.name)
		if err == nil {
			_ = ctx.SetPoolFullTry()
		}
		return ctx, err
	}
}

func (c *ceph) release(ctx *rados.IOContext) {
	select {
	case c.free <- ctx:
	default:
		ctx.Destroy()
	}
}

func (c *ceph) do(f func(ctx *rados.IOContext) error) (err error) {
	ctx, err := c.newContext()
	if err != nil {
		return err
	}
	err = f(ctx)
	if err != nil {
		ctx.Destroy()
	} else {
		c.release(ctx)
	}
	return
}

type cephReader struct {
	c     *ceph
	ctx   *rados.IOContext
	key   string
	off   int64
	limit int64
}

func (r *cephReader) Read(buf []byte) (n int, err error) {
	if r.limit == 0 {
		return 0, io.EOF
	}
	if r.limit > 0 && int64(len(buf)) > r.limit {
		buf = buf[:r.limit]
	}
	n, err = r.ctx.Read(r.key, buf, uint64(r.off))
	r.off += int64(n)
	if r.limit > 0 {
		r.limit -= int64(n)
	}
	if err == nil && n < len(buf) {
		err = io.EOF
	}
	return
}

func (r *cephReader) Close() error {
	if r.ctx != nil {
		r.c.release(r.ctx)
		r.ctx = nil
	}
	return nil
}

func (c *ceph) Get(_ context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	if _, err := c.Head(context.TODO(), key); err != nil {
		return nil, err
	}
	ctx, err := c.newContext()
	if err != nil {
		return nil, err
	}
	return &cephReader{c, ctx, key, off, limit}, nil
}

var cephPool = sync.Pool{
	New: func() interface{} {
		return make([]byte, 1<<20)
	},
}

func (c *ceph) Put(_ context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	// ceph default osd_max_object_size = 128M
	return c.do(func(ctx *rados.IOContext) error {
		if b, ok := in.(*bytes.Reader); ok {
			v := reflect.ValueOf(b)
			data := v.Elem().Field(0).Bytes()
			if len(data) == 0 {
				return notSupported
			}
			// If the data exceeds 90M, ceph will report an error: 'rados: ret=-90, Message too long'
			if len(data) < 85<<20 {
				return ctx.WriteFull(key, data)
			}
		}
		buf := cephPool.Get().([]byte)
		defer cephPool.Put(buf)
		var off uint64
		for {
			n, err := in.Read(buf)
			if n > 0 {
				if err = ctx.Write(key, buf[:n], off); err != nil {
					return err
				}
				off += uint64(n)
			} else {
				if err == io.EOF {
					if off == 0 {
						return errors.New("ceph: can't put empty file")
					}
					return nil
				}
				return err
			}
		}
	})
}

func (c *ceph) Delete(_ context.Context, key string, getters ...AttrGetter) error {
	err := c.do(func(ctx *rados.IOContext) error {
		return ctx.Delete(key)
	})
	if err == rados.ErrNotFound {
		err = nil
	}
	return err
}

func (c *ceph) Head(_ context.Context, key string) (Object, error) {
	var o *obj
	err := c.do(func(ctx *rados.IOContext) error {
		stat, err := ctx.Stat(key)
		if err != nil {
			return err
		}
		o = &obj{key, int64(stat.Size), stat.ModTime, strings.HasSuffix(key, "/"), ""}
		return nil
	})
	if err == rados.ErrNotFound {
		err = os.ErrNotExist
	}
	return o, err
}

func (c *ceph) ListAll(_ context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	ctx, err := c.newContext()
	if err != nil {
		return nil, err
	}
	iter, err := ctx.Iter()
	if err != nil {
		ctx.Destroy()
		return nil, err
	}
	defer iter.Close()

	// FIXME: this will be really slow for many objects
	keys := make([]string, 0, 1000)
	for iter.Next() {
		key := iter.Value()
		if key <= marker || !strings.HasPrefix(key, prefix) {
			continue
		}
		keys = append(keys, key)
	}
	// the keys are not ordered, sort them first
	sort.Strings(keys)
	c.release(ctx)

	var objs = make(chan Object, 1000)
	var concurrent = 20
	ms := make([]sync.Mutex, concurrent)
	conds := make([]*sync.Cond, concurrent)
	ready := make([]bool, concurrent)
	results := make([]Object, concurrent)
	errs := make([]error, concurrent)
	for j := 0; j < concurrent; j++ {
		conds[j] = sync.NewCond(&ms[j])
		if j < len(keys) {
			go func(j int) {
				ctx, err := c.newContext()
				if err != nil {
					logger.Errorf("new context: %s", err)
					errs[j] = err
					return
				}
				defer ctx.Destroy()
				for i := j; i < len(keys); i += concurrent {
					key := keys[i]
					st, err := ctx.Stat(key)
					if err != nil {
						if errors.Is(err, rados.ErrNotFound) {
							logger.Debugf("Skip non-existent key: %s", key)
							results[j] = nil
						} else {
							logger.Errorf("Stat key %s: %s", key, err)
							errs[j] = err
						}
					} else {
						results[j] = &obj{key, int64(st.Size), st.ModTime, strings.HasSuffix(key, "/"), ""}
					}

					ms[j].Lock()
					ready[j] = true
					conds[j].Signal()
					if errs[j] != nil {
						ms[j].Unlock()
						break
					}
					for ready[j] {
						conds[j].Wait()
					}
					ms[j].Unlock()
				}
			}(j)
		}
	}
	go func() {
		defer close(objs)
		for i := range keys {
			j := i % concurrent
			ms[j].Lock()
			for !ready[j] {
				conds[j].Wait()
			}
			if errs[j] != nil {
				objs <- nil
				ms[j].Unlock()
				// some goroutines will be leaked, but it's ok
				// since we won't call ListAll() many times in a process
				break
			} else if results[j] != nil {
				objs <- results[j]
			}
			ready[j] = false
			conds[j].Signal()
			ms[j].Unlock()
		}
	}()
	return objs, nil
}

func newCeph(endpoint, cluster, user, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("ceph://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	name := uri.Host
	conn, err := rados.NewConnWithClusterAndUser(cluster, user)
	if err != nil {
		return nil, fmt.Errorf("Can't create connection to cluster %s for user %s: %s", cluster, user, err)
	}
	if opt := os.Getenv("CEPH_ADMIN_SOCKET"); opt != "none" {
		if opt == "" {
			opt = "$run_dir/jfs-$cluster-$name-$pid.asok"
		}
		if err = conn.SetConfigOption("admin_socket", opt); err != nil {
			logger.Warnf("Failed to set admin_socket to %s: %s", opt, err)
		}
	}
	if opt := os.Getenv("CEPH_LOG_FILE"); opt != "none" {
		if opt == "" {
			opt = "/var/log/ceph/jfs-$cluster-$name.log"
		}
		if err = conn.SetConfigOption("log_file", opt); err != nil {
			logger.Warnf("Failed to set log_file to %s: %s", opt, err)
		}
	}
	if os.Getenv("JFS_NO_CHECK_OBJECT_STORAGE") == "" {
		if err := conn.ReadDefaultConfigFile(); err != nil {
			return nil, fmt.Errorf("Can't read default config file: %s", err)
		}
		if err := conn.Connect(); err != nil {
			return nil, fmt.Errorf("Can't connect to cluster %s: %s", cluster, err)
		}
	}
	return &ceph{
		name: name,
		conn: conn,
		free: make(chan *rados.IOContext, 50),
	}, nil
}

func init() {
	Register("ceph", newCeph)
}


================================================
FILE: pkg/object/checksum.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"fmt"
	"hash/crc32"
	"io"
	"reflect"
	"strconv"
)

const checksumAlgr = "Crc32c"

var crc32c = crc32.MakeTable(crc32.Castagnoli)

func generateChecksum(in io.ReadSeeker) string {
	if b, ok := in.(*bytes.Reader); ok {
		v := reflect.ValueOf(b)
		data := v.Elem().Field(0).Bytes()
		return strconv.Itoa(int(crc32.Update(0, crc32c, data)))
	}
	var hash uint32
	crcBuffer := bufPool.Get().(*[]byte)
	defer bufPool.Put(crcBuffer)
	defer func() { _, _ = in.Seek(0, io.SeekStart) }()
	for {
		n, err := in.Read(*crcBuffer)
		hash = crc32.Update(hash, crc32c, (*crcBuffer)[:n])
		if err != nil {
			if err != io.EOF {
				return ""
			}
			break
		}
	}
	return strconv.Itoa(int(hash))
}

type checksumReader struct {
	io.ReadCloser
	expected        uint32
	checksum        uint32
	remainingLength int64
	table           *crc32.Table
}

func (c *checksumReader) Read(buf []byte) (n int, err error) {
	n, err = c.ReadCloser.Read(buf)
	c.checksum = crc32.Update(c.checksum, c.table, buf[:n])
	c.remainingLength -= int64(n)
	if (err == io.EOF || c.remainingLength == 0) && c.checksum != c.expected {
		return 0, fmt.Errorf("verify checksum failed: %d != %d", c.checksum, c.expected)
	}
	return
}
func verifyChecksum(in io.ReadCloser, checksum string, contentLength int64) io.ReadCloser {
	return verifyChecksum0(in, checksum, contentLength, crc32c)
}
func verifyChecksum0(in io.ReadCloser, checksum string, contentLength int64, table *crc32.Table) io.ReadCloser {
	if checksum == "" {
		return in
	}
	expected, err := strconv.Atoi(checksum)
	if err != nil {
		logger.Errorf("invalid crc32c: %s", checksum)
		return in
	}
	return &checksumReader{in, uint32(expected), 0, contentLength, table}
}


================================================
FILE: pkg/object/checksum_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"hash/crc32"
	"io"
	"strconv"
	"strings"
	"testing"

	"github.com/juicedata/juicefs/pkg/utils"
)

func TestChecksum(t *testing.T) {
	b := []byte("hello")
	expected := crc32.Update(0, crc32c, b)
	actual := generateChecksum(bytes.NewReader(b))
	if actual != strconv.Itoa(int(expected)) {
		t.Errorf("expect %d but got %s", expected, actual)
		t.FailNow()
	}

	actual = generateChecksum(bytes.NewReader(b))
	if actual != strconv.Itoa(int(expected)) {
		t.Errorf("expect %d but got %s", expected, actual)
		t.FailNow()
	}
}

func TestChecksumRead(t *testing.T) {
	length := 10240
	content := make([]byte, length)
	utils.RandRead(content)
	actual := generateChecksum(bytes.NewReader(content))

	// content length equal buff length case
	lens := []int64{-1, int64(length)}
	for _, contentLength := range lens {
		reader := verifyChecksum(io.NopCloser(bytes.NewReader(content)), actual, contentLength)
		n, err := reader.Read(make([]byte, length))
		if n != length || (err != nil && err != io.EOF) {
			t.Fatalf("verify checksum should success")
		}
	}

	// verify success case
	for _, contentLength := range lens {
		reader := verifyChecksum(io.NopCloser(bytes.NewReader(content)), actual, contentLength)
		n, err := reader.Read(make([]byte, length+100))
		if n != length || (err != nil && err != io.EOF) {
			t.Fatalf("verify checksum should success")
		}
	}

	// verify failed case
	for _, contentLength := range lens {
		content[0] = 'a'
		reader := verifyChecksum(io.NopCloser(bytes.NewReader(content)), actual, contentLength)
		n, err := reader.Read(make([]byte, length))
		if contentLength == -1 && (err != nil && err != io.EOF || n != length) {
			t.Fatalf("dont verify checksum when content length is -1")
		}
		if contentLength != -1 && (err == nil || err == io.EOF || !strings.HasPrefix(err.Error(), "verify checksum failed")) {
			t.Fatalf("verify checksum should failed")
		}
	}

	// verify read length less than content length case
	for _, contentLength := range lens {
		reader := verifyChecksum(io.NopCloser(bytes.NewReader(content)), actual, contentLength)
		n, err := reader.Read(make([]byte, length-100))
		if err != nil || n != length-100 {
			t.Fatalf("error should be nil and read length should be %d", length-100)
		}
	}
}


================================================
FILE: pkg/object/cifs.go
================================================
//go:build !nocifs
// +build !nocifs

/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net"
	"net/url"
	"os"
	"path"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/cloudsoda/go-smb2"
)

type cifsConn struct {
	session  *smb2.Session
	share    *smb2.Share
	lastUsed time.Time
}

var _ ObjectStorage = (*cifsStore)(nil)
var _ FileSystem = (*cifsStore)(nil)

type cifsStore struct {
	DefaultObjectStorage
	host            string
	port            string
	share           string
	user            string
	password        string
	pool            chan *cifsConn
	connIdleTimeout time.Duration
}

// Chmod changes the mode of the file to mode.
//
// Note: SAMBA protocol has limited support for Unix file permissions.
// it controls the FILE_ATTRIBUTE_READONLY attribute. All other permission bits are ignored.
//
// Examples:
//   - chmod(0644), chmod(0666), chmod(0755) -> file becomes writable(666)
//   - chmod(0444), chmod(0400), chmod(0555) -> file becomes read-only(444)
//
// The returned mode from Stat() will always be either 0666 (writable) or 0444 (read-only)
// regardless of the specific mode bits passed to this function.
func (c *cifsStore) Chmod(path string, mode os.FileMode) error {
	return c.withConn(context.Background(), func(share *smb2.Share) error {
		return share.Chmod(path, mode)
	})
}

// Chown implements FileSystem.
func (c *cifsStore) Chown(path string, owner string, group string) error {
	return notSupported
}

// Chtimes implements MtimeChanger.
func (c *cifsStore) Chtimes(path string, mtime time.Time) error {
	return c.withConn(context.Background(), func(share *smb2.Share) error {
		return share.Chtimes(path, time.Time{}, mtime)
	})
}

func (c *cifsStore) String() string {
	return fmt.Sprintf("cifs://%s@%s:%s/%s/", c.user, c.host, c.port, c.share)
}

// getConnection returns a CIFS connection from the pool or creates a new one
func (c *cifsStore) getConnection(ctx context.Context) (*cifsConn, error) {
	now := time.Now()
	for {
		select {
		case conn := <-c.pool:
			if conn.session == nil {
				continue
			}
			if now.Sub(conn.lastUsed) > c.connIdleTimeout {
				c.closeConnectionAsync(conn)
				continue
			}
			conn.lastUsed = now
			return conn, nil
		case <-ctx.Done():
			return nil, ctx.Err()
		default:
			goto CREATE
		}
	}

CREATE:
	// Create new connection
	// FIXME: may create a large number of connection in a short period, exceeding the limit.
	conn := &cifsConn{}
	conn.lastUsed = now

	// Establish SMB connection
	address := net.JoinHostPort(c.host, c.port)
	d := &smb2.Dialer{
		Initiator: &smb2.NTLMInitiator{
			User:     c.user,
			Password: c.password,
		},
	}

	var err error
	conn.session, err = d.Dial(ctx, address)
	if err != nil {
		return nil, fmt.Errorf("SMB authentication failed: %v", err)
	}

	conn.share, err = conn.session.WithContext(ctx).Mount(c.share)
	if err != nil {
		c.closeConnection(conn)
		return nil, fmt.Errorf("failed to mount SMB share %s: %v", c.share, err)
	}

	return conn, nil
}

func (c *cifsStore) closeConnection(conn *cifsConn) {
	if conn == nil || conn.session == nil {
		return
	}

	session := conn.session
	conn.session = nil
	conn.share = nil

	_ = session.WithContext(context.Background()).Logoff()
}

func (c *cifsStore) closeConnectionAsync(conn *cifsConn) {
	go c.closeConnection(conn)
}

// releaseConnection returns a connection to the pool or closes it if there's an error
func (c *cifsStore) releaseConnection(conn *cifsConn, err error) {
	if conn == nil {
		return
	}

	if err == nil {
		select {
		case c.pool <- conn:
			return
		default:
		}
	}

	// close connection if there's an error or if the pool is full
	if conn.session != nil {
		_ = conn.session.Logoff()
	}
}

func (c *cifsStore) withConn(ctx context.Context, f func(*smb2.Share) error) (err error) {
	conn, err := c.getConnection(ctx)
	if err != nil {
		return err
	}
	defer func() {
		c.releaseConnection(conn, err)
	}()
	return f(conn.share.WithContext(ctx))
}

func (c *cifsStore) Head(ctx context.Context, key string) (oj Object, err error) {
	err = c.withConn(ctx, func(share *smb2.Share) error {
		fi, err := share.Lstat(key)
		if err != nil {
			return err
		}
		isSymlink := fi.Mode()&os.ModeSymlink != 0
		if isSymlink {
			// SMB doesn't fully support symlinks like POSIX, but we'll try our best
			fi, err = share.Stat(key)
			if err != nil {
				return err
			}
		}
		oj = c.fileInfo(key, fi, isSymlink)
		return nil
	})
	return oj, err
}

// cifsReadCloser wraps a file reader and releases the connection when closed
type cifsReadCloser struct {
	io.ReadCloser
	store *cifsStore
	conn  *cifsConn
}

func (r *cifsReadCloser) Close() error {
	err := r.ReadCloser.Close()
	r.store.releaseConnection(r.conn, err)
	return err
}

func (c *cifsStore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	if off < 0 {
		off = 0
	}

	conn, err := c.getConnection(ctx)
	if err != nil {
		return nil, err
	}

	share := conn.share.WithContext(ctx)
	f, err := share.Open(key)
	if err != nil {
		c.releaseConnection(conn, err)
		return nil, err
	}

	finfo, err := f.Stat()
	if err != nil {
		_ = f.Close()
		c.releaseConnection(conn, err)
		return nil, err
	}

	if finfo.IsDir() || off >= finfo.Size() {
		_ = f.Close()
		c.releaseConnection(conn, nil)
		return io.NopCloser(bytes.NewBuffer([]byte{})), nil
	}

	var readCloser io.ReadCloser
	if limit > 0 {
		readCloser = &SectionReaderCloser{
			SectionReader: io.NewSectionReader(f, off, limit),
			Closer:        f,
		}
	} else {
		// When limit <= 0, read from off to end of file
		if off > 0 {
			readCloser = &SectionReaderCloser{
				SectionReader: io.NewSectionReader(f, off, finfo.Size()-off),
				Closer:        f,
			}
		} else {
			readCloser = f
		}
	}

	return &cifsReadCloser{
		ReadCloser: readCloser,
		store:      c,
		conn:       conn,
	}, nil
}

func (c *cifsStore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) (err error) {
	return c.withConn(ctx, func(share *smb2.Share) error {
		p := key
		if strings.HasSuffix(key, dirSuffix) {
			// perm will not take effect, is not used
			// ref: https://github.com/cloudsoda/go-smb2/blob/c8e61c7a5fa7bcd1143359f071f9425a9f4dda3f/client.go#L341-L370
			return share.MkdirAll(p, 0755)
		}

		var tmp string
		if PutInplace {
			tmp = p
		} else {
			name := path.Base(p)
			if len(name) > 200 {
				name = name[:200]
			}
			tmp = TmpFilePath(p, name)
			defer func() {
				if err != nil {
					_ = share.Remove(tmp)
				}
			}()
		}

		f, err := share.Create(tmp)
		if err != nil && os.IsNotExist(err) {
			dirPath := path.Dir(p)
			if dirPath != "/" {
				err = share.MkdirAll(dirPath, 0755)
				if err != nil {
					return err
				}
			}
			f, err = share.Create(tmp)
		}
		if err != nil {
			return err
		}

		buf := bufPool.Get().(*[]byte)
		defer bufPool.Put(buf)
		_, err = io.CopyBuffer(f, in, *buf)
		if err != nil {
			_ = f.Close()
			return err
		}

		err = f.Close()
		if err != nil {
			return err
		}

		if !PutInplace {
			err = share.Rename(tmp, p)
		}
		return err
	})
}

func (c *cifsStore) Delete(ctx context.Context, key string, getters ...AttrGetter) (err error) {
	return c.withConn(ctx, func(share *smb2.Share) error {
		p := strings.TrimRight(key, dirSuffix)
		err = share.Remove(p)
		if err != nil && os.IsNotExist(err) {
			err = nil
		}
		return err
	})
}

func (c *cifsStore) fileInfo(key string, fi os.FileInfo, isSymlink bool) Object {
	owner, group := "nobody", "nobody"
	ff := &file{
		obj{key, fi.Size(), fi.ModTime(), fi.IsDir(), ""},
		owner,
		group,
		fi.Mode(),
		isSymlink,
	}
	if fi.IsDir() {
		if key != "" && !strings.HasSuffix(key, "/") {
			ff.key += "/"
		}
	}
	return ff
}

func (c *cifsStore) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}

	dir := prefix
	var objs []Object
	if !strings.HasSuffix(dir, "/") {
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := c.Head(ctx, prefix)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}
	var mEntries []*mEntry
	err := c.withConn(ctx, func(share *smb2.Share) error {
		// Ensure directory exists before listing
		_, err := share.Stat(dir)
		if err != nil {
			return err
		}

		// Read directory entries
		entries, err := share.ReadDir(dir)
		if err != nil {
			return err
		}

		// Process entries
		mEntries = make([]*mEntry, 0, len(entries))
		for _, e := range entries {
			isSymlink := e.Mode()&os.ModeSymlink != 0
			if e.IsDir() {
				mEntries = append(mEntries, &mEntry{e, e.Name() + dirSuffix, nil, false})
			} else if isSymlink && followLink {
				// SMB doesn't fully support symlinks like POSIX, but we'll try our best
				fi, err := share.Stat(path.Join(dir, e.Name()))
				if err != nil {
					mEntries = append(mEntries, &mEntry{e, e.Name(), nil, true})
					continue
				}
				name := e.Name()
				if fi.IsDir() {
					name = e.Name() + dirSuffix
				}
				mEntries = append(mEntries, &mEntry{e, name, fi, false})
			} else {
				mEntries = append(mEntries, &mEntry{e, e.Name(), nil, isSymlink})
			}
		}
		return nil
	})
	if os.IsNotExist(err) || os.IsPermission(err) {
		logger.Warnf("skip %s: %s", dir, err)
		return nil, false, "", nil
	}

	// Sort entries by name
	sort.Slice(mEntries, func(i, j int) bool { return mEntries[i].Name() < mEntries[j].Name() })

	// Generate object list
	for _, e := range mEntries {
		p := path.Join(dir, e.Name())
		if e.IsDir() && !strings.HasSuffix(p, "/") {
			p = p + "/"
		}
		key := p
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}

		info := e.Info()
		f := c.fileInfo(key, info, e.isSymlink)
		objs = append(objs, f)
		if len(objs) == int(limit) {
			break
		}
	}

	return generateListResult(objs, limit)
}

func (c *cifsStore) Copy(ctx context.Context, dst, src string) error {
	r, err := c.Get(ctx, src, 0, -1)
	if err != nil {
		return err
	}
	defer r.Close()
	return c.Put(ctx, dst, r)
}

func parseEndpoint(endpoint string) (host, port, share string, err error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = "cifs://" + endpoint
	}
	u, err := url.Parse(endpoint)
	if err != nil {
		return
	}
	if u.Scheme != "" && (u.Scheme != "cifs" && u.Scheme != "smb") {
		err = fmt.Errorf("invalid scheme %s, should be cifs:// or smb://", u.Scheme)
		return
	}

	host = u.Hostname()
	port = u.Port()
	if port == "" {
		port = "445" // Default SMB port
	}
	parts := strings.Split(u.Path, "/")
	if len(parts) < 2 || parts[1] == "" {
		err = fmt.Errorf("endpoint should be a valid share name (%s)", "\\\\<server>\\<share>")
		return
	}
	if len(parts) > 2 && parts[2] != "" {
		err = fmt.Errorf("endpoint should be a valid share name (%s)", "\\\\<server>\\<share>")
		return
	}
	share = parts[1]
	return
}

func newCifs(endpoint, username, password, _ string) (ObjectStorage, error) {
	host, port, share, err := parseEndpoint(endpoint)
	if err != nil {
		return nil, err
	}
	if username == "" {
		return nil, fmt.Errorf("CIFS username/ak is required")
	}

	if password == "" {
		return nil, fmt.Errorf("CIFS password/sk is required")
	}

	maxPool := 8
	if v := os.Getenv("JFS_CIFS_MAX_POOL"); v != "" {
		if n, err := strconv.Atoi(v); err == nil {
			maxPool = n
		}
	}

	store := &cifsStore{
		host:            host,
		port:            port,
		share:           share,
		user:            username,
		password:        password,
		connIdleTimeout: 5 * time.Minute,
		pool:            make(chan *cifsConn, maxPool),
	}

	// Test connection
	conn, err := store.getConnection(context.Background())
	if err != nil {
		return nil, err
	}
	store.releaseConnection(conn, nil)

	return store, nil
}

func init() {
	// Allow both cifs:// and smb:// schemes
	Register("cifs", newCifs)
	Register("smb", newCifs)
}


================================================
FILE: pkg/object/cos.go
================================================
//go:build !nocos
// +build !nocos

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/pkg/errors"

	"github.com/tencentyun/cos-go-sdk-v5"
)

const (
	cosChecksumKey        = "x-cos-meta-" + checksumAlgr
	cosRequestIDKey       = "X-Cos-Request-Id"
	cosStorageClassHeader = "X-Cos-Storage-Class"
)

type COS struct {
	c        *cos.Client
	endpoint string
	sc       string
}

func (c *COS) String() string {
	return fmt.Sprintf("cos://%s/", strings.Split(c.endpoint, ".")[0])
}

func (c *COS) Create(ctx context.Context) error {
	_, err := c.c.Bucket.Put(ctx, nil)
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}

func (c *COS) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              1 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (c *COS) Head(ctx context.Context, key string) (Object, error) {
	resp, err := c.c.Object.Head(ctx, key, nil)
	if err != nil {
		if exist, err := c.c.Object.IsExist(ctx, key); err == nil && !exist {
			return nil, os.ErrNotExist
		}
		return nil, err
	}
	header := resp.Header
	var size int64
	if val, ok := header["Content-Length"]; ok {
		if length, err := strconv.ParseInt(val[0], 10, 64); err == nil {
			size = length
		}
	}
	var mtime time.Time
	if val, ok := header["Last-Modified"]; ok {
		mtime, _ = time.Parse(time.RFC1123, val[0])
	}
	var sc string
	if val := header.Get(cosStorageClassHeader); val != "" {
		sc = val
	} else {
		// https://cloud.tencent.com/document/product/436/7745
		// This header is returned only if the object is not STANDARD storage class.
		sc = "STANDARD"
	}
	return &obj{key, size, mtime, strings.HasSuffix(key, "/"), sc}, nil
}

func (c *COS) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	params := &cos.ObjectGetOptions{Range: getRange(off, limit)}
	resp, err := c.c.Object.Get(ctx, key, params)
	if err != nil {
		return nil, err
	}
	if err = checkGetStatus(resp.StatusCode, params.Range != ""); err != nil {
		_ = resp.Body.Close()
		return nil, err
	}
	if off == 0 && limit == -1 {
		length, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64)
		if err != nil {
			length = -1
			logger.Warnf("failed to parse content-length %s: %s", resp.Header.Get("Content-Length"), err)
		}
		resp.Body = verifyChecksum(resp.Body, resp.Header.Get(cosChecksumKey), length)
	}
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.Header.Get(cosRequestIDKey)).SetStorageClass(resp.Header.Get(cosStorageClassHeader))
	}
	return resp.Body, nil
}

func (c *COS) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	var options cos.ObjectPutOptions
	if ins, ok := in.(io.ReadSeeker); ok {
		header := http.Header(map[string][]string{
			cosChecksumKey: {generateChecksum(ins)},
		})
		options.ObjectPutHeaderOptions = &cos.ObjectPutHeaderOptions{XCosMetaXXX: &header}
	}
	if c.sc != "" {
		if options.ObjectPutHeaderOptions == nil {
			options.ObjectPutHeaderOptions = &cos.ObjectPutHeaderOptions{}
		}
		options.ObjectPutHeaderOptions.XCosStorageClass = c.sc
	}
	resp, err := c.c.Object.Put(ctx, key, in, &options)
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.Header.Get(cosRequestIDKey)).SetStorageClass(c.sc)
	}
	return err
}

func (c *COS) Copy(ctx context.Context, dst, src string) error {
	var opt cos.ObjectCopyOptions
	if c.sc != "" {
		opt.ObjectCopyHeaderOptions = &cos.ObjectCopyHeaderOptions{XCosStorageClass: c.sc}
	}
	source := fmt.Sprintf("%s/%s", c.endpoint, src)
	_, _, err := c.c.Object.Copy(ctx, dst, source, &opt)
	return err
}

func (c *COS) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	resp, err := c.c.Object.Delete(ctx, key)
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.Header.Get(cosRequestIDKey))
	}
	return err
}

func (c *COS) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	param := cos.BucketGetOptions{
		Prefix:       prefix,
		Marker:       start,
		MaxKeys:      int(limit),
		Delimiter:    delimiter,
		EncodingType: "url",
	}
	resp, _, err := c.c.Bucket.Get(ctx, &param)
	if err != nil {
		return nil, false, "", err
	}
	n := len(resp.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		o := resp.Contents[i]
		t, _ := time.Parse(time.RFC3339, o.LastModified)
		key, err := cos.DecodeURIComponent(o.Key)
		if err != nil {
			return nil, false, "", errors.WithMessagef(err, "failed to decode key %s", o.Key)
		}
		objs[i] = &obj{key, int64(o.Size), t, strings.HasSuffix(key, "/"), o.StorageClass}
	}
	if delimiter != "" {
		for _, p := range resp.CommonPrefixes {
			key, err := cos.DecodeURIComponent(p)
			if err != nil {
				return nil, false, "", errors.WithMessagef(err, "failed to decode commonPrefixes %s", p)
			}
			objs = append(objs, &obj{key, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, resp.IsTruncated, resp.NextMarker, nil
}

func (c *COS) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (c *COS) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	var options cos.InitiateMultipartUploadOptions
	if c.sc != "" {
		options.ObjectPutHeaderOptions = &cos.ObjectPutHeaderOptions{XCosStorageClass: c.sc}
	}
	resp, _, err := c.c.Object.InitiateMultipartUpload(ctx, key, &options)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: resp.UploadID, MinPartSize: 5 << 20, MaxCount: 10000}, nil
}

func (c *COS) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	resp, err := c.c.Object.UploadPart(ctx, key, uploadID, num, bytes.NewReader(body), nil)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: resp.Header.Get("Etag")}, nil
}

func (c *COS) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	result, _, err := c.c.Object.CopyPart(ctx, key, uploadID, num, c.endpoint+"/"+srcKey, &cos.ObjectCopyPartOptions{
		XCosCopySourceRange: fmt.Sprintf("bytes=%d-%d", off, off+size-1),
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: result.ETag}, nil
}

func (c *COS) AbortUpload(ctx context.Context, key string, uploadID string) {
	_, _ = c.c.Object.AbortMultipartUpload(ctx, key, uploadID)
}

func (c *COS) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	var cosParts []cos.Object
	for i := range parts {
		cosParts = append(cosParts, cos.Object{ETag: parts[i].ETag, PartNumber: parts[i].Num})
	}
	_, _, err := c.c.Object.CompleteMultipartUpload(ctx, key, uploadID, &cos.CompleteMultipartUploadOptions{Parts: cosParts})
	return err
}

func (c *COS) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	input := &cos.ListMultipartUploadsOptions{
		KeyMarker: marker,
	}
	result, _, err := c.c.Bucket.ListMultipartUploads(ctx, input)
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		t, _ := time.Parse(time.RFC3339, u.Initiated)
		parts[i] = &PendingPart{u.Key, u.UploadID, t}
	}
	return parts, result.NextKeyMarker, nil
}

func (c *COS) SetStorageClass(sc string) error {
	c.sc = sc
	return nil
}

func autoCOSEndpoint(bucketName, accessKey, secretKey, token string) (string, error) {
	client := cos.NewClient(nil, &http.Client{
		Transport: &cos.AuthorizationTransport{
			SecretID:     accessKey,
			SecretKey:    secretKey,
			SessionToken: token,
		},
	})
	client.UserAgent = UserAgent
	s, _, err := client.Service.Get(ctx)
	if err != nil {
		return "", err
	}

	for _, b := range s.Buckets {
		// fmt.Printf("%#v\n", b)
		if b.Name == bucketName {
			return fmt.Sprintf("https://%s.cos.%s.myqcloud.com", b.Name, b.Region), nil
		}
	}

	return "", fmt.Errorf("bucket %q doesn't exist", bucketName)
}

func newCOS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	hostParts := strings.SplitN(uri.Host, ".", 2)

	if accessKey == "" {
		accessKey = os.Getenv("COS_SECRETID")
		secretKey = os.Getenv("COS_SECRETKEY")
	}

	if len(hostParts) == 1 {
		if endpoint, err = autoCOSEndpoint(hostParts[0], accessKey, secretKey, token); err != nil {
			return nil, fmt.Errorf("Unable to get endpoint of bucket %s: %s", hostParts[0], err)
		}
		if uri, err = url.ParseRequestURI(endpoint); err != nil {
			return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
		}
		logger.Debugf("Use endpoint %q", endpoint)
	}

	b := &cos.BaseURL{BucketURL: uri}
	client := cos.NewClient(b, &http.Client{
		Transport: &cos.AuthorizationTransport{
			SecretID:     accessKey,
			SecretKey:    secretKey,
			SessionToken: token,
			Transport:    httpClient.Transport,
		},
	})
	client.UserAgent = UserAgent
	disableChecksum := strings.EqualFold(uri.Query().Get("disable-checksum"), "true")
	if disableChecksum {
		logger.Infof("default CRC checksum is disabled")
	}
	client.Conf.EnableCRC = !disableChecksum
	return &COS{c: client, endpoint: uri.Host}, nil
}

func init() {
	Register("cos", newCOS)
}


================================================
FILE: pkg/object/dragonfly.go
================================================
//go:build !nodragonfly
// +build !nodragonfly

/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"mime/multipart"
	"net/http"
	"net/url"
	"os"
	"path"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/go-http-utils/headers"
)

const (
	// AsyncWriteBack writes the object asynchronously to the backend.
	AsyncWriteBack = iota

	// WriteBack writes the object synchronously to the backend.
	WriteBack

	// Ephemeral only writes the object to the dfdaemon.
	// It is only provided for creating temporary objects between peers,
	// and users are not allowed to use this mode.
	Ephemeral
)

const (
	// HeaderDragonflyObjectMetaLastModifiedTime is used for last modified time of object storage.
	HeaderDragonflyObjectMetaLastModifiedTime = "X-Dragonfly-Object-Meta-Last-Modified-Time"

	// HeaderDragonflyObjectMetaStorageClass is used for storage class of object storage.
	HeaderDragonflyObjectMetaStorageClass = "X-Dragonfly-Object-Meta-Storage-Class"

	// HeaderDragonflyObjectOperation is used for object storage operation.
	HeaderDragonflyObjectOperation = "X-Dragonfly-Object-Operation"
)

const (
	// Upper limit of maxGetObjectMetadatas.
	MaxGetObjectMetadatasLimit = 1000

	// DefaultMaxReplicas is the default value of maxReplicas.
	DefaultMaxReplicas = 0

	// Upper limit of maxReplicas.
	MaxReplicasLimit = 100
)

const (
	// CopyOperation is the operation of copying object.
	CopyOperation = "copy"
)

const (
	// FilterOSS is the filter of oss url for generating task id.
	FilterOSS = "Expires&Signature"

	// FilterS3 is the filter of s3 url for generating task id.
	FilterS3 = "X-Amz-Algorithm&X-Amz-Credential&X-Amz-Date&X-Amz-Expires&X-Amz-SignedHeaders&X-Amz-Signature"

	// FilterOBS is the filter of obs url for generating task id.
	FilterOBS = "X-Amz-Algorithm&X-Amz-Credential&X-Amz-Date&X-Obs-Date&X-Amz-Expires&X-Amz-SignedHeaders&X-Amz-Signature"
)

// ObjectMetadatas is the object metadata list.
type ObjectMetadatas struct {
	// CommonPrefixes are similar prefixes in object storage.
	CommonPrefixes []string `json:"CommonPrefixes"`

	// Metadatas are object metadata.
	Metadatas []*ObjectMetadata `json:"Metadatas"`
}

// ObjectMetadata is the object metadata.
type ObjectMetadata struct {
	// Key is object key.
	Key string

	// ContentDisposition is Content-Disposition header.
	ContentDisposition string

	// ContentEncoding is Content-Encoding header.
	ContentEncoding string

	// ContentLanguage is Content-Language header.
	ContentLanguage string

	// ContentLength is Content-Length header.
	ContentLength int64

	// ContentType is Content-Type header.
	ContentType string

	// ETag is ETag header.
	ETag string

	// Digest is object digest.
	Digest string

	// LastModifiedTime is last modified time.
	LastModifiedTime time.Time

	// StorageClass is object storage class.
	StorageClass string
}

// ObjectStorageMetadata is the object storage metadata.
type ObjectStorageMetadata struct {
	// Name is object storage name of type, it can be s3, oss or obs.
	Name string

	// Region is storage region.
	Region string

	// Endpoint is datacenter endpoint.
	Endpoint string
}

// dragonfly is the dragonfly object storage.
type dragonfly struct {
	// DefaultObjectStorage is the default object storage.
	DefaultObjectStorage

	// Address of the object storage service.
	endpoint string

	// Filter is used to generate a unique Task ID by
	// filtering unnecessary query params in the URL,
	// it is separated by & character.
	filter string

	// Mode is the mode in which the backend is written,
	// including WriteBack and AsyncWriteBack.
	mode int

	// MaxReplicas is the maximum number of
	// replicas of an object cache in seed peers.
	maxReplicas int

	// ObjectStorage bucket name.
	bucket string

	// http client.
	client *http.Client
}

// String returns the string representation of the dragonfly.
func (d *dragonfly) String() string {
	return fmt.Sprintf("dragonfly://%s/", d.bucket)
}

// Create creates the object if it does not exist.
func (d *dragonfly) Create(ctx context.Context) error {
	if _, _, _, err := d.List(ctx, "", "", "", "", 1, false); err == nil {
		return nil
	}

	u, err := url.Parse(d.endpoint)
	if err != nil {
		return err
	}

	u.Path = path.Join("buckets", d.bucket)
	query := u.Query()
	u.RawQuery = query.Encode()
	req, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), nil)
	if err != nil && !isExists(err) {
		return err
	}

	resp, err := d.client.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return fmt.Errorf("bad response status %s", resp.Status)
	}

	return nil
}

// Head returns the object metadata if it exists.
func (d *dragonfly) Head(ctx context.Context, key string) (Object, error) {
	// get get object metadata request.
	u, err := url.Parse(d.endpoint)
	if err != nil {
		return nil, err
	}

	u.Path = path.Join("buckets", d.bucket, "objects", key)
	if strings.HasSuffix(key, "/") {
		u.Path += "/"
	}

	req, err := http.NewRequestWithContext(ctx, http.MethodHead, u.String(), nil)
	if err != nil {
		return nil, err
	}

	// Head object.
	resp, err := d.client.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		if resp.StatusCode == http.StatusNotFound {
			err = os.ErrNotExist
		}

		return nil, err
	}

	contentLength, err := strconv.ParseInt(resp.Header.Get(headers.ContentLength), 10, 64)
	if err != nil {
		return nil, err
	}

	lastModifiedTime, err := time.Parse(http.TimeFormat, resp.Header.Get(HeaderDragonflyObjectMetaLastModifiedTime))
	if err != nil {
		return nil, err
	}

	return &obj{
		key,
		int64(contentLength),
		lastModifiedTime,
		strings.HasSuffix(key, "/"),
		resp.Header.Get(HeaderDragonflyObjectMetaStorageClass),
	}, nil
}

// Get returns the object if it exists.
func (d *dragonfly) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	u, err := url.Parse(d.endpoint)
	if err != nil {
		return nil, err
	}

	u.Path = path.Join("buckets", d.bucket, "objects", key)
	if strings.HasSuffix(key, "/") {
		u.Path += "/"
	}

	query := u.Query()
	if d.filter != "" {
		query.Add("filter", d.filter)
	}

	u.RawQuery = query.Encode()
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
	if err != nil {
		return nil, err
	}

	req.Header.Set(headers.Range, getRange(off, limit))
	resp, err := d.client.Do(req)
	if err != nil {
		return nil, err
	}

	if resp.StatusCode/100 != 2 {
		return nil, fmt.Errorf("bad response status %s", resp.Status)
	}
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass(resp.Header.Get(HeaderDragonflyObjectMetaStorageClass))

	return resp.Body, nil
}

// Put creates or replaces the object.
func (d *dragonfly) Put(ctx context.Context, key string, data io.Reader, getters ...AttrGetter) error {
	body := &bytes.Buffer{}
	writer := multipart.NewWriter(body)

	// AsyncWriteBack mode is used by default.
	if err := writer.WriteField("mode", fmt.Sprint(d.mode)); err != nil {
		return err
	}

	if d.filter != "" {
		if err := writer.WriteField("filter", d.filter); err != nil {
			return err
		}
	}

	if d.maxReplicas > 0 {
		if err := writer.WriteField("maxReplicas", fmt.Sprint(d.maxReplicas)); err != nil {
			return err
		}
	}

	part, err := writer.CreateFormFile("file", path.Base(key))
	if err != nil {
		return err
	}

	if _, err := io.Copy(part, data); err != nil {
		return err
	}

	if err := writer.Close(); err != nil {
		return err
	}

	u, err := url.Parse(d.endpoint)
	if err != nil {
		return err
	}

	u.Path = path.Join("buckets", d.bucket, "objects", key)
	if strings.HasSuffix(key, "/") {
		u.Path += "/"
	}

	req, err := http.NewRequestWithContext(ctx, http.MethodPut, u.String(), body)
	if err != nil {
		return err
	}
	req.Header.Add(headers.ContentType, writer.FormDataContentType())

	// Put object.
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return fmt.Errorf("bad response status %s", resp.Status)
	}

	return nil
}

// Copy copies the object if it exists.
func (d *dragonfly) Copy(ctx context.Context, dst, src string) error {
	body := &bytes.Buffer{}
	writer := multipart.NewWriter(body)

	if err := writer.WriteField("source_object_key", src); err != nil {
		return err
	}

	if err := writer.Close(); err != nil {
		return err
	}

	u, err := url.Parse(d.endpoint)
	if err != nil {
		return err
	}

	u.Path = path.Join("buckets", d.bucket, "objects", dst)
	query := u.Query()
	u.RawQuery = query.Encode()
	req, err := http.NewRequestWithContext(ctx, http.MethodPut, u.String(), body)
	if err != nil {
		return err
	}

	req.Header.Add(headers.ContentType, writer.FormDataContentType())
	req.Header.Add(HeaderDragonflyObjectOperation, fmt.Sprint(CopyOperation))

	// copy object.
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return fmt.Errorf("bad response status %s", resp.Status)
	}

	return nil
}

// Delete deletes the object if it exists.
func (d *dragonfly) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	// get delete object request.
	u, err := url.Parse(d.endpoint)
	if err != nil {
		return err
	}

	u.Path = path.Join("buckets", d.bucket, "objects", key)
	if strings.HasSuffix(key, "/") {
		u.Path += "/"
	}

	req, err := http.NewRequestWithContext(ctx, http.MethodDelete, u.String(), nil)
	if err != nil {
		return err
	}

	// Delete object.
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return fmt.Errorf("bad response status %s", resp.Status)
	}

	return nil
}

// List lists the objects with the given prefix.
func (d *dragonfly) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > MaxGetObjectMetadatasLimit {
		limit = MaxGetObjectMetadatasLimit
	}

	u, err := url.Parse(d.endpoint)
	if err != nil {
		return nil, false, "", err
	}

	u.Path = path.Join("buckets", d.bucket, "metadatas")
	query := u.Query()
	if prefix != "" {
		query.Set("prefix", prefix)
	}

	if marker != "" {
		query.Set("marker", marker)
	}

	if delimiter != "" {
		query.Set("delimiter", delimiter)
	}

	if limit != 0 {
		query.Set("limit", fmt.Sprint(limit))
	}

	u.RawQuery = query.Encode()
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
	if err != nil {
		return nil, false, "", err
	}

	// List object.
	resp, err := d.client.Do(req)
	if err != nil {
		return nil, false, "", err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return nil, false, "", fmt.Errorf("bad response status %s", resp.Status)
	}

	var objectMetadatas ObjectMetadatas
	if err := json.NewDecoder(resp.Body).Decode(&objectMetadatas); err != nil {
		return nil, false, "", err
	}

	objs := make([]Object, 0, len(objectMetadatas.Metadatas))
	for _, meta := range objectMetadatas.Metadatas {
		objs = append(objs, &obj{
			meta.Key,
			meta.ContentLength,
			meta.LastModifiedTime,
			strings.HasSuffix(meta.Key, "/"),
			meta.StorageClass,
		})
	}

	if delimiter != "" {
		for _, o := range objectMetadatas.CommonPrefixes {
			objs = append(objs, &obj{o, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return generateListResult(objs, limit)
}

// newDragonfly creates a new dragonfly object storage.
func newDragonfly(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("http://%s", endpoint)
	}

	// Parse the endpoint.
	uri, err := url.Parse(endpoint)
	if err != nil {
		return nil, err
	}

	endpoint = uri.Scheme + "://" + uri.Host
	bucket := uri.Path
	if bucket == "" {
		return nil, fmt.Errorf("bucket name required")
	}

	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("http://%s", endpoint)
	}

	mode := WriteBack
	if value := uri.Query().Get("mode"); value != "" {
		mode, err = strconv.Atoi(value)
		if err != nil || (mode != WriteBack && mode != AsyncWriteBack) {
			return nil, fmt.Errorf("unexpected dragonfly mode: %s", value)
		}
	}

	maxReplicas := DefaultMaxReplicas
	if value := uri.Query().Get("maxReplicas"); value != "" {
		maxReplicas, err = strconv.Atoi(value)
		if err != nil || maxReplicas > MaxReplicasLimit || maxReplicas < 0 {
			return nil, fmt.Errorf("unexpected dragonfly max replicas: %s", value)
		}
	}

	metadata, err := getObjectStorageMetadata(endpoint)
	if err != nil {
		return nil, err
	}

	var filter string
	switch metadata.Name {
	case "s3":
		filter = FilterS3
	case "oss":
		filter = FilterOSS
	case "obs":
		filter = FilterOBS
	default:
		return nil, fmt.Errorf("unexpected dragonfly object storage name: %s", metadata.Name)
	}

	return &dragonfly{
		endpoint:    endpoint,
		filter:      filter,
		mode:        mode,
		maxReplicas: maxReplicas,
		bucket:      bucket,
		client:      httpClient,
	}, nil
}

// getObjectStorageMetadata returns the object storage metadata.
func getObjectStorageMetadata(endpoint string) (*ObjectStorageMetadata, error) {
	u, err := url.Parse(endpoint)
	if err != nil {
		return nil, nil
	}

	u.Path = path.Join("metadata")
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
	if err != nil {
		return nil, err
	}

	// Get object storage Metadata.
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return nil, fmt.Errorf("bad response status %s", resp.Status)
	}

	var objectStorageMetadata ObjectStorageMetadata
	if err := json.NewDecoder(resp.Body).Decode(&objectStorageMetadata); err != nil {
		return nil, err
	}

	return &objectStorageMetadata, nil
}

// init registers the dragonfly object storage.
func init() {
	Register("dragonfly", newDragonfly)
}


================================================
FILE: pkg/object/encrypt.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/aes"
	"crypto/cipher"
	"crypto/rand"
	"crypto/rsa"
	"crypto/sha256"
	"crypto/x509"
	"encoding/pem"
	"errors"
	"fmt"
	"io"
	"os"
	"strings"

	"github.com/emmansun/gmsm/pkcs8"
	"github.com/emmansun/gmsm/sm2"
	"github.com/emmansun/gmsm/sm4"
	"golang.org/x/crypto/chacha20poly1305"
)

type Encryptor interface {
	Encrypt(plaintext []byte) ([]byte, error)
	Decrypt(ciphertext []byte) ([]byte, error)
}

func ExportRsaPrivateKeyToPem(key *rsa.PrivateKey, passphrase string) string {
	buf := x509.MarshalPKCS1PrivateKey(key)
	block := &pem.Block{
		Type:  "RSA PRIVATE KEY",
		Bytes: buf,
	}
	if passphrase != "" {
		var err error
		// nolint:staticcheck
		block, _ = x509.EncryptPEMBlock(rand.Reader, block.Type, buf, []byte(passphrase), x509.PEMCipherAES256)
		if err != nil {
			panic(err)
		}
	}
	privPEM := pem.EncodeToMemory(block)
	return string(privPEM)
}

var ErrKeyNeedPasswd = errors.New("passphrase is required to private key")

func ParsePrivateKeyFromPem(enc []byte, passphrase []byte) (any, error) {
	block, _ := pem.Decode(enc)
	if block == nil {
		return nil, errors.New("failed to parse PEM block containing the key")
	}

	buf := block.Bytes
	if len(passphrase) == 0 {
		// nolint:staticcheck
		if strings.Contains(block.Headers["Proc-Type"], "ENCRYPTED") && x509.IsEncryptedPEMBlock(block) {
			return nil, ErrKeyNeedPasswd
		}
		if strings.Contains(block.Type, "ENCRYPTED") {
			return nil, ErrKeyNeedPasswd
		}
	} else {
		var err error
		// nolint:staticcheck
		buf, err = x509.DecryptPEMBlock(block, passphrase)
		if err != nil {
			if err == x509.IncorrectPasswordError {
				return nil, err
			}
			key, err := pkcs8.ParsePKCS8PrivateKey(block.Bytes, passphrase)
			if err == nil {
				return key, nil
			}
			key, err = pkcs8.ParsePKCS8PrivateKey(block.Bytes)
			if err == nil {
				return key, nil
			}
			if !strings.Contains(err.Error(), "ParsePKCS1PrivateKey") {
				return nil, fmt.Errorf("cannot decode encrypted private keys: %v", err)
			}
			buf = block.Bytes
		}
	}

	rsaKey, err := x509.ParsePKCS1PrivateKey(buf)
	if err == nil {
		return rsaKey, nil
	}
	key, err := pkcs8.ParsePKCS8PrivateKey(buf)
	if err != nil {
		return nil, err
	}
	return key, nil
}

func ParseRsaPrivateKeyFromPath(path, passphrase string) (any, error) {
	b, err := os.ReadFile(path)
	if err != nil {
		return nil, err
	}
	return ParsePrivateKeyFromPem(b, []byte(passphrase))
}

type rsaEncryptor struct {
	privKey *rsa.PrivateKey
	label   []byte
}

func NewRSAEncryptor(privKey *rsa.PrivateKey) Encryptor {
	return &rsaEncryptor{privKey, []byte("keys")}
}

func (e *rsaEncryptor) Encrypt(plaintext []byte) ([]byte, error) {
	return rsa.EncryptOAEP(sha256.New(), rand.Reader, &e.privKey.PublicKey, plaintext, e.label)
}

func (e *rsaEncryptor) Decrypt(ciphertext []byte) ([]byte, error) {
	return rsa.DecryptOAEP(sha256.New(), rand.Reader, e.privKey, ciphertext, e.label)
}

type sm2Encryptor struct {
	privKey *sm2.PrivateKey
}

func NewSM2Encryptor(privKey *sm2.PrivateKey) Encryptor {
	return &sm2Encryptor{privKey}
}

func (e *sm2Encryptor) Encrypt(plaintext []byte) ([]byte, error) {
	return sm2.EncryptASN1(rand.Reader, &e.privKey.PublicKey, plaintext)
}

func (e *sm2Encryptor) Decrypt(ciphertext []byte) ([]byte, error) {
	return sm2.Decrypt(e.privKey, ciphertext)
}

func NewKeyEncryptor(privKey any) Encryptor {
	switch k := privKey.(type) {
	case *rsa.PrivateKey:
		return NewRSAEncryptor(k)
	case *sm2.PrivateKey:
		return NewSM2Encryptor(k)
	}
	panic(fmt.Sprintf("unsupported key type %T", privKey)) // should not happen
}

type dataEncryptor struct {
	keyEncryptor Encryptor
	keyLen       int
	aead         func(key []byte) (cipher.AEAD, error)
}

const (
	AES256GCM_RSA = "aes256gcm-rsa"
	CHACHA20_RSA  = "chacha20-rsa"
	SM4GCM        = "sm4gcm"
)

func NewDataEncryptor(keyEncryptor Encryptor, algo string) (Encryptor, error) {
	switch algo {
	case "", AES256GCM_RSA:
		aead := func(key []byte) (cipher.AEAD, error) {
			block, err := aes.NewCipher(key)
			if err != nil {
				return nil, err
			}
			return cipher.NewGCM(block)
		}
		return &dataEncryptor{keyEncryptor, 32, aead}, nil
	case CHACHA20_RSA:
		return &dataEncryptor{keyEncryptor, chacha20poly1305.KeySize, chacha20poly1305.New}, nil
	case SM4GCM:
		// TODO: support other modes?
		// GCM not in [GB/T 17964-2021](http://c.gb688.cn/bzgk/gb/showGb?type=online&hcno=4F89D833626340B1F71068D25EAC737D)
		aead := func(key []byte) (cipher.AEAD, error) {
			block, err := sm4.NewCipher(key)
			if err != nil {
				return nil, err
			}
			return cipher.NewGCM(block)
		}
		return &dataEncryptor{keyEncryptor, 16, aead}, nil
	}
	return nil, fmt.Errorf("unsupport cipher: %s", algo)
}

func (e *dataEncryptor) Encrypt(plaintext []byte) ([]byte, error) {
	key := make([]byte, e.keyLen)
	if _, err := io.ReadFull(rand.Reader, key); err != nil {
		return nil, err
	}
	cipherkey, err := e.keyEncryptor.Encrypt(key)
	if err != nil {
		return nil, err
	}
	aead, err := e.aead(key)
	if err != nil {
		return nil, err
	}
	nonce := make([]byte, aead.NonceSize())
	if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
		return nil, err
	}

	headerSize := 3 + len(cipherkey) + len(nonce)
	buf := make([]byte, headerSize+len(plaintext)+aead.Overhead())
	buf[0] = byte(len(cipherkey) >> 8)
	buf[1] = byte(len(cipherkey) & 0xFF)
	buf[2] = byte(len(nonce))
	p := buf[3:]
	copy(p, cipherkey)
	p = p[len(cipherkey):]
	copy(p, nonce)
	p = p[len(nonce):]
	ciphertext := aead.Seal(p[:0], nonce, plaintext, nil)
	return buf[:headerSize+len(ciphertext)], nil
}

func (e *dataEncryptor) Decrypt(ciphertext []byte) ([]byte, error) {
	if len(ciphertext) < 3 {
		return nil, fmt.Errorf("received encrypted text length is less than 3, the object is corrupted")
	}
	keyLen := int(ciphertext[0])<<8 + int(ciphertext[1])
	nonceLen := int(ciphertext[2])
	if 3+keyLen+nonceLen >= len(ciphertext) {
		return nil, fmt.Errorf("malformed ciphertext: %d %d", keyLen, nonceLen)
	}
	ciphertext = ciphertext[3:]
	cipherkey := ciphertext[:keyLen]
	nonce := ciphertext[keyLen : keyLen+nonceLen]
	ciphertext = ciphertext[keyLen+nonceLen:]

	key, err := e.keyEncryptor.Decrypt(cipherkey)
	if err != nil {
		return nil, errors.New("decryt key: " + err.Error())
	}
	aead, err := e.aead(key)
	if err != nil {
		return nil, err
	}
	return aead.Open(ciphertext[:0], nonce, ciphertext, nil)
}

type encrypted struct {
	ObjectStorage
	enc Encryptor
}

// NewEncrypted returns a encrypted object storage
func NewEncrypted(o ObjectStorage, enc Encryptor) ObjectStorage {
	return &encrypted{o, enc}
}

func (e *encrypted) String() string {
	return fmt.Sprintf("%s(encrypted)", e.ObjectStorage)
}

func (e *encrypted) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	r, err := e.ObjectStorage.Get(ctx, key, 0, -1, getters...)
	if err != nil {
		return nil, err
	}
	defer r.Close()
	ciphertext, err := io.ReadAll(r)
	if err != nil {
		return nil, err
	}
	plain, err := e.enc.Decrypt(ciphertext)
	if err != nil {
		return nil, fmt.Errorf("Decrypt: %s", err)
	}
	l := int64(len(plain))
	if off > l {
		off = l
	}
	if limit == -1 || off+limit > l {
		limit = l - off
	}
	data := plain[off : off+limit]
	return io.NopCloser(bytes.NewBuffer(data)), nil
}

func (e *encrypted) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	plain, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	ciphertext, err := e.enc.Encrypt(plain)
	if err != nil {
		return err
	}
	return e.ObjectStorage.Put(ctx, key, bytes.NewReader(ciphertext), getters...)
}

var _ ObjectStorage = (*encrypted)(nil)


================================================
FILE: pkg/object/encrypt_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/rand"
	"crypto/rsa"
	"crypto/x509"
	"encoding/pem"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strings"
	"testing"

	"github.com/emmansun/gmsm/sm2"
	"github.com/stretchr/testify/require"
)

var rsaInPKCS8 = `-----BEGIN ENCRYPTED PRIVATE KEY-----
MIICzzBJBgkqhkiG9w0BBQ0wPDAbBgkqhkiG9w0BBQwwDgQIEEEvSFbVLkUCAggA
MB0GCWCGSAFlAwQBKgQQhuaBA77wcAHA7bl4dkbFsQSCAoBi5hgqWhK2ic3HBSUX
JBFFh7omdhU4uK7mQzVVx/RvnUCbw5T7ghfboJhP5bHkj+UnnFiKD6vFZfSgH/Q3
5KUjPIveLa0urly1bC1SMequXggjEgSPUe8XBjWJJcwkbELFiQzD76GSnveCMokZ
X7WvoZeY0AaSAnQwe5r1evAdilWXdb2fUmRA23gco8pgSrkdVPyz9lb+FbDjrd8j
7qiMDcoZ4qFrQ4v8IQJv+ED5f7fLen7UGpG5uOZT9Ez153f7Zw+eEAmp5qwE5SCP
JbVLsR++HXkntJg1q2Yw4rIOi7qing408jwroed/W6AzS8A49RvrI2/Ac5dHfEnB
LkC23Ep47/e9B8cZQCmIZXEnUpcjSwWKe5U4nCXyeskuhRhTtA3EpYFXx+/P3yNE
YISywz6brtAxDwfk8LNAGkZRQ5c+nIFh43M+m5LLHAOSug/TbIvVvgottdc0VRHl
Q72zeXu8X7PF8dhnoxVSBdKfRYCHQWg+PBw8IYn1KA1SfvwakeVnYcU8P4BMOXo5
36Q4CVDIW9zWCrW49Cq/dxi0yqYyoA5hw8lIqMzmewdiUH0BwlsaOBz0utz0GhOi
mBsK7O5819orKnuGmWzuvEETiRJ+HZTgkWAC0Wu1r7gjbMKow8grkygQz0iqMrSE
kY7gfcnT2mpR7ow0DbWqiidb4PrxYsk0X1hOswsAek62xL/sdqlA9C3eZuqPNfqa
yatWjKjQY4ukKUm7QplPOgOuP01GN0XF7zMEqXtl1GxPp9uDnKFzDopQau+3OrID
ljSQG+zYqxPFeLZ06zh3bYqS5E5RjlguF6055m5NaudQ9b+/7NjPDHdpWth6cQFx
OIGw
-----END ENCRYPTED PRIVATE KEY-----
`

var rsaInPKCS1 = `-----BEGIN RSA PRIVATE KEY-----
MIICXAIBAAKBgQCaPxhJMEfX0CaYIziQxwjlVlh75xw1xWlF14GGdpZYaM70BzMu
XdB22X7PnkK38PHk4saXKz0blhaf/qllJP7mcdqFEcs4sWsVhU1KoLdRNH/1AJQK
0/Oehr3vov1CjsR+51RRuDFcVOBd7lpglK5s0+TGRYyImFc+JhZ23RVFNwIDAQAB
AoGAGtobDzqxdxeMcHXJNiMAIHScqM098vpv7jGrIc5pM/Di/kZ2mX7JeLc6RUiG
0uDGK5NzAQQM+k1xmN7LfIkpOo2pSlL0gC/M6q0TAJqRLXBKjMVqlHLUytqKTtEg
4PeF93GnxJZt9NSqo5HH87OwkjXeG1brqhZTfKtL/tbRpRECQQDLJAIFri3pGzni
Xq4s2NogxUnC8cg9I7jEv4gTH/KuFQTsh/5i2+1tsGyFdXKzFd0A8DcZx+MzBm7q
qwF46vw5AkEAwmIN0s9EcUVeVyorgdphl81QV+x9TR5wZbksigPQcNqU2NJVZKtd
1f0o2H3E2XHV2DLjeLWctlmx+i0k3Sos7wJAJR/Sgsk/OK+yF22oNSf4TS7g+RCI
wKurk8FRE/WtuyS6PqPn2JdKv9YTLxy0tofTWN2NpFeEbQnK8XYJEdkX+QJAR/GC
rEOKUWIbSKeS8ryg4k5bLi+ZMLHTZ9LhaTOAMkS0UouGj3vdfxXzyCzEbrZzL1Gm
X0bYeaU4+h87RaAWgQJBALhNFDDGXnEd0Lj2pUhBcdaRXGqrg8PZWekr0GLDPEvO
s+yhHoqRlGKUwQtwwB3HCIEWxe7siOa0YTy9MJ5QySY=
-----END RSA PRIVATE KEY-----`

var sm2InPKCS8Plain = `-----BEGIN PRIVATE KEY-----
MIGHAgEAMBMGByqGSM49AgEGCCqBHM9VAYItBG0wawIBAQQgUWhfo4lpH0j/Toc7
ESiTd+1FsWJgIR9MlBVeQ0lYi62hRANCAASAuZzZAg6zj+ZXclqBx0UfZVNeN9+R
L5MzLV1dmrLZQqbt+j8oDAN3QU3VPXziKzGttdTvgItUrLavxaCMXOL+
-----END PRIVATE KEY-----`

var sm2InPKCS8WithSM4 = `-----BEGIN ENCRYPTED PRIVATE KEY-----
MIHzMF4GCSqGSIb3DQEFDTBRMDEGCSqGSIb3DQEFDDAkBBAm2QGHlzGOPNqAyOCZ
zWOrAgIIADAMBggqhkiG9w0CCQUAMBwGCCqBHM9VAWgCBBC0yGrxfFu2t51rF8RX
N/4+BIGQkIa24K2nOv+fkmohJHaya9b+LJUs6VR50K+2n3QuJokRvxlGB9TknxDs
e3ZJfNKRoksL7V4Ttd82pgF6a68jBB0//iOSysc6d/ovx5oKrJ8kx+t/U5NbxWRV
8UrHPN50rzxS4l6niklnwUM2q36Lf6R+xYduTVmTfWDAAPFSRIlKUDmhgPlT8MHB
jxqPfZVO
-----END ENCRYPTED PRIVATE KEY-----`

var sm2InPKCS8WithAES = `-----BEGIN ENCRYPTED PRIVATE KEY-----
MIHzMF4GCSqGSIb3DQEFDTBRMDAGCSqGSIb3DQEFDDAjBBA19eEcvLDwQqrQx0Yo
4vKAAgFkMAwGCCqGSIb3DQIJBQAwHQYJYIZIAWUDBAEqBBCniW2M8JL78D06Hqxk
hQtcBIGQd7zfctW4ry2MqfNpnsx5L2kT6Sv11ecehBJt8e9C/d33YLjBuAA9GTLO
Aoz7Z9lb9ivf/TZL0EXBI7llNQitxV+NEx32jCpwO3rEoFUqoGZZh2jcRmLsufS2
pwq8iHhypwUx6EDLJXTXOFlMsqgHYC1ZV9LqnmdLAKyqXQeHtGN9QZgDQwy221yi
xI3CLucj
-----END ENCRYPTED PRIVATE KEY-----`

func TestParsePrivateKey(t *testing.T) {
	var cases = []struct {
		name   string
		key    string
		pass   []byte
		expect bool
	}{
		{"rsa key in pkcs#1, parse without passphrase", rsaInPKCS1, nil, true},
		{"rsa key in pkcs#1, parse with passphrase", rsaInPKCS1, []byte("123"), true},
		{"rsa key in pkcs#8, parse with correct passphrase", rsaInPKCS8, []byte("12345678"), true},
		{"rsa key in pkcs#8, parse with incorrect passphrase", rsaInPKCS8, []byte("1234567"), false},
		{"rsa key in pkcs#8, parse without passphrase", rsaInPKCS8, nil, false},
		{"sm2 key in pkcs#8 plain, parse without passphrase", sm2InPKCS8Plain, nil, true},
		{"sm2 key in pkcs#8 plain, parse with passphrase", sm2InPKCS8Plain, []byte("any"), true},
		{"sm2 key in pkcs#8 with sm4, parse with correct passphrase", sm2InPKCS8WithSM4, []byte("12345678"), true},
		{"sm2 key in pkcs#8 with sm4, parse with incorrect passphrase", sm2InPKCS8WithSM4, []byte("1234567"), false},
		{"sm2 key in pkcs#8 with sm4, parse without passphrase", sm2InPKCS8WithSM4, nil, false},
		{"sm2 key in pkcs#8 with aes, parse with correct passphrase", sm2InPKCS8WithAES, []byte("12345678"), true},
		{"sm2 key in pkcs#8 with aes, parse with incorrect passphrase", sm2InPKCS8WithAES, []byte("1234567"), false},
		{"sm2 key in pkcs#8 with aes, parse without passphrase", sm2InPKCS8WithAES, nil, false},
	}

	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			_, err := ParsePrivateKeyFromPem([]byte(c.key), c.pass)
			require.Equal(t, c.expect, err == nil, "unexpected result: %v", err)
		})
	}
}

func genPrivateKey(typ string) any {
	switch typ {
	case "rsa":
		key, err := rsa.GenerateKey(rand.Reader, 2048)
		if err != nil {
			panic(err)
		}
		return key
	case "sm2":
		key, err := sm2.GenerateKey(rand.Reader)
		if err != nil {
			panic(err)
		}
		return key
	default:
		panic(fmt.Errorf("unknown key type: %s", typ))
	}
}

var rsaKey = genPrivateKey("rsa").(*rsa.PrivateKey)

func TestSM2(t *testing.T) {
	sm2Key := genPrivateKey("sm2").(*sm2.PrivateKey)
	sm2 := NewSM2Encryptor(sm2Key)
	cipherText, err := sm2.Encrypt([]byte("hello"))
	require.NoError(t, err)
	plainText, err := sm2.Decrypt(cipherText)
	require.NoError(t, err)
	require.Equal(t, []byte("hello"), plainText)
}

func TestRSA(t *testing.T) {
	c1 := NewRSAEncryptor(rsaKey)
	ciphertext, _ := c1.Encrypt([]byte("hello"))

	privPEM := ExportRsaPrivateKeyToPem(rsaKey, "abc")

	key2, _ := ParsePrivateKeyFromPem([]byte(privPEM), []byte("abc"))
	c2 := NewKeyEncryptor(key2)
	plaintext, _ := c2.Decrypt(ciphertext)
	if string(plaintext) != "hello" {
		t.Fail()
	}

	_, err := ParsePrivateKeyFromPem([]byte(privPEM), nil)
	if err == nil {
		t.Errorf("parse without passphrase should fail")
		t.Fail()
	}
	_, err = ParsePrivateKeyFromPem([]byte(privPEM), []byte("ab"))
	if err == nil {
		t.Errorf("parse with incorrect passphrase should return fail")
		t.Fail()
	}

	dir := t.TempDir()

	if err := genrsa(filepath.Join(dir, "private.pem"), ""); err != nil {
		t.Error(err)
		t.Fail()
	}
	if _, err = ParseRsaPrivateKeyFromPath(filepath.Join(dir, "private.pem"), ""); err != nil {
		t.Error(err)
		t.Fail()
	}

	if err := genrsa(filepath.Join(dir, "private.pem"), "abcd"); err != nil {
		t.Error(err)
		t.Fail()
	}
	if _, err = ParseRsaPrivateKeyFromPath(filepath.Join(dir, "private.pem"), "abcd"); err != nil {
		t.Error(err)
		t.Fail()
	}
}

func genrsa(path string, password string) error {
	key, err := rsa.GenerateKey(rand.Reader, 2048)
	if err != nil {
		return err
	}
	block := &pem.Block{
		Type:  "RSA PRIVATE KEY",
		Bytes: x509.MarshalPKCS1PrivateKey(key),
	}
	if password != "" {
		// nolint:staticcheck
		block, err = x509.EncryptPEMBlock(rand.Reader, block.Type, block.Bytes, []byte(password), x509.PEMCipherAES256)
		if err != nil {
			return err
		}
	}
	if err := os.WriteFile(path, pem.EncodeToMemory(block), 0755); err != nil {
		return err
	}
	return nil
}

func BenchmarkKeyEncryptionKey(b *testing.B) {
	secret := make([]byte, 32)
	keyTypes := []string{"rsa", "sm2"}

	for _, typ := range keyTypes {
		ke := NewKeyEncryptor(genPrivateKey(typ))
		cipherText, _ := ke.Encrypt(secret)
		b.ResetTimer()
		b.Run(typ+"_encrypt", func(b *testing.B) {
			for n := 0; n < b.N; n++ {
				_, _ = ke.Encrypt(secret)
			}
		})
		b.Run(typ+"_decrypt", func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				_, _ = ke.Decrypt(cipherText)
			}
		})
	}
}

func TestDataEncryptor(t *testing.T) {
	cases := []struct {
		name string
		kek  string
		algo string
	}{
		{"rsa_aesgcm", "rsa", AES256GCM_RSA},
		{"rsa_chacha20", "rsa", CHACHA20_RSA},
		{"sm2_sm4gcm", "sm2", SM4GCM},
	}
	data := []byte("hello")
	for _, c := range cases {
		t.Run(c.name, func(t *testing.T) {
			ke := NewKeyEncryptor(genPrivateKey(c.kek))
			de, err := NewDataEncryptor(ke, c.algo)
			require.NoError(t, err, "failed to create data encryptor")
			cipherText, err := de.Encrypt(data)
			require.NoError(t, err, "failed to encrypt data")
			plainText, err := de.Decrypt(cipherText)
			require.NoError(t, err, "failed to decrypt data")
			require.Equal(t, data, plainText, "decrypted data not equal to original")
		})
	}
}

func BenchmarkDataEncryptor(b *testing.B) {
	cases := []struct {
		name string
		kek  string
		algo string
	}{
		{"rsa_aesgcm", "rsa", AES256GCM_RSA},
		{"rsa_chacha20", "rsa", CHACHA20_RSA},
		{"sm2_sm4gcm", "sm2", SM4GCM},
	}
	data := make([]byte, 4<<20)
	if _, err := rand.Read(data); err != nil {
		b.Fatalf("failed to generate random data: %v", err)
	}
	for _, c := range cases {
		ke := NewKeyEncryptor(genPrivateKey(c.kek))
		de, err := NewDataEncryptor(ke, c.algo)
		if err != nil {
			b.Fatalf("failed to create data encryptor: %v", err)
		}
		cipherText, err := de.Encrypt(data)
		if err != nil {
			b.Fatalf("failed to encrypt data: %v", err)
		}
		b.Run(c.name+"_encrypt", func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				_, _ = de.Encrypt(data)
			}
		})
		b.Run(c.name+"_decrypt", func(b *testing.B) {
			for i := 0; i < b.N; i++ {
				_, _ = de.Decrypt(cipherText)
			}
		})
	}
}

func TestEncryptedStore(t *testing.T) {
	ctx := context.Background()
	s, _ := CreateStorage("mem", "", "", "", "")
	kc := NewRSAEncryptor(rsaKey)
	dc, _ := NewDataEncryptor(kc, AES256GCM_RSA)
	es := NewEncrypted(s, dc)
	_ = es.Put(ctx, "a", bytes.NewReader([]byte("hello")))
	r, err := es.Get(ctx, "a", 1, 2)
	if err != nil {
		t.Errorf("Get a: %s", err)
		t.Fail()
	}
	d, _ := io.ReadAll(r)
	if string(d) != "el" {
		t.Fail()
	}

	r, _ = es.Get(ctx, "a", 0, -1)
	d, _ = io.ReadAll(r)
	if string(d) != "hello" {
		t.Fail()
	}
	_ = s.Put(ctx, "emptyfile", bytes.NewReader([]byte("")))
	_, err = es.Get(ctx, "emptyfile", 0, -1)
	if err == nil || !strings.Contains(err.Error(), "the object is corrupted") {
		t.Fail()
	}
}


================================================
FILE: pkg/object/eos.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"fmt"
	"net/url"
	"os"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
)

type eos struct {
	s3client
}

func (s *eos) String() string {
	return fmt.Sprintf("eos://%s/", s.s3client.bucket)
}

func (s *eos) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              4 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func newEos(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("invalid endpoint %s: %s", endpoint, err)
	}
	ssl := strings.ToLower(uri.Scheme) == "https"
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	endpoint = uri.Scheme + "://" + uri.Host[len(bucket)+1:]
	region := "us-east-1"

	if accessKey == "" {
		accessKey = os.Getenv("EOS_ACCESS_KEY")
	}
	if secretKey == "" {
		secretKey = os.Getenv("EOS_SECRET_KEY")
	}
	if token == "" {
		token = os.Getenv("EOS_TOKEN")
	}
	cfg, err := config.LoadDefaultConfig(ctx,
		config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(cfg, func(options *s3.Options) {
		options.BaseEndpoint = aws.String(endpoint)
		options.Region = region
		options.EndpointOptions.DisableHTTPS = !ssl
		options.UsePathStyle = defaultPathStyle()
		options.HTTPClient = httpClient
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})

	return &eos{s3client{bucket: bucket, s3: client, region: region}}, nil
}

func init() {
	Register("eos", newEos)
}


================================================
FILE: pkg/object/etcd.go
================================================
//go:build !noetcd
// +build !noetcd

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/tls"
	"fmt"
	"io"
	"net"
	"net/url"
	"os"
	"strings"
	"time"

	etcd "go.etcd.io/etcd/client/v3"
	"go.etcd.io/etcd/pkg/transport"
)

type etcdClient struct {
	DefaultObjectStorage
	client *etcd.Client
	kv     etcd.KV
	addr   string
}

func (c *etcdClient) String() string {
	return fmt.Sprintf("etcd://%s/", c.addr)
}

func (c *etcdClient) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	resp, err := c.kv.Get(ctx, key, etcd.WithLimit(1))
	if err != nil {
		return nil, err
	}
	for _, pair := range resp.Kvs {
		if string(pair.Key) == key {
			if off > int64(len(pair.Value)) {
				off = int64(len(pair.Value))
			}
			data := pair.Value[off:]
			if limit > 0 && limit < int64(len(data)) {
				data = data[:limit]
			}
			return io.NopCloser(bytes.NewBuffer(data)), nil
		}
	}
	return nil, os.ErrNotExist
}

func (c *etcdClient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	d, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	_, err = c.kv.Put(ctx, key, string(d))
	return err
}

func (c *etcdClient) Head(ctx context.Context, key string) (Object, error) {
	resp, err := c.kv.Get(ctx, key, etcd.WithLimit(1))
	if err != nil {
		return nil, err
	}
	for _, p := range resp.Kvs {
		if string(p.Key) == key {
			return &obj{
				key,
				int64(len(p.Value)),
				time.Now(),
				strings.HasSuffix(key, "/"),
				"",
			}, nil
		}
	}
	return nil, os.ErrNotExist
}

func (c *etcdClient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	_, err := c.kv.Delete(ctx, key)
	return err
}

func genNextKey(key string) string {
	next := make([]byte, len(key))
	copy(next, key)
	p := len(next) - 1
	next[p]++
	for next[p] == 0 {
		p--
		next[p]++
	}
	return string(next)
}

func (c *etcdClient) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "" {
		return nil, false, "", notSupported
	}
	if start == "" {
		start = prefix
	}
	var opts = []etcd.OpOption{etcd.WithLimit(limit), etcd.WithSort(etcd.SortByKey, etcd.SortAscend)}
	if len(prefix) > 0 && prefix[0] != 0xFF {
		opts = append(opts, etcd.WithRange(genNextKey(prefix)))
	} else {
		opts = append(opts, etcd.WithFromKey())
	}
	resp, err := c.client.Get(ctx, start, opts...)
	if err != nil {
		return nil, false, "", fmt.Errorf("get start %v: %s", start, err)
	}
	var objs []Object
	for _, kv := range resp.Kvs {
		k := string(kv.Key)
		if !strings.HasPrefix(k, prefix) {
			break
		}
		objs = append(objs, &obj{
			k,
			int64(len(kv.Value)),
			time.Now(),
			strings.HasSuffix(k, "/"),
			"",
		})
	}
	var nextMarker string
	if resp.More && len(objs) > 0 {
		nextMarker = objs[len(objs)-1].Key()
	}
	return objs, resp.More, nextMarker, nil
}

func buildTlsConfig(u *url.URL) (*tls.Config, error) {
	var tsinfo transport.TLSInfo
	q := u.Query()
	tsinfo.CAFile = q.Get("cacert")
	tsinfo.CertFile = q.Get("cert")
	tsinfo.KeyFile = q.Get("key")
	tsinfo.ServerName = q.Get("server-name")
	tsinfo.InsecureSkipVerify = q.Get("insecure-skip-verify") != ""
	if tsinfo.CAFile != "" || tsinfo.CertFile != "" || tsinfo.KeyFile != "" || tsinfo.ServerName != "" {
		return tsinfo.ClientConfig()
	}
	return nil, nil
}

func newEtcd(addr, user, passwd, token string) (ObjectStorage, error) {
	if !strings.HasPrefix(addr, "etcd://") {
		addr = "etcd://" + addr
	}
	u, err := url.Parse(addr)
	if err != nil {
		return nil, fmt.Errorf("parse %s: %s", addr, err)
	}
	hosts := strings.Split(u.Host, ",")
	for i, h := range hosts {
		h, _, err := net.SplitHostPort(h)
		if err != nil {
			hosts[i] = net.JoinHostPort(h, "2379")
		}
	}
	conf := etcd.Config{
		Endpoints:        hosts,
		Username:         user,
		Password:         passwd,
		AutoSyncInterval: time.Minute,
	}
	conf.TLS, err = buildTlsConfig(u)
	if err != nil {
		return nil, fmt.Errorf("build tls config from %s: %s", u.RawQuery, err)
	}
	c, err := etcd.New(conf)
	if err != nil {
		return nil, err
	}
	return &etcdClient{DefaultObjectStorage{}, c, c.KV, u.Host}, nil
}

func init() {
	Register("etcd", newEtcd)
}


================================================
FILE: pkg/object/file.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"io/fs"
	"os"
	"path"
	"path/filepath"
	"runtime"
	"sort"
	"strings"

	"github.com/juicedata/juicefs/pkg/utils"
)

const (
	dirSuffix = "/"
)

var TryCFR bool // try copy_file_range
var PutInplace bool

type filestore struct {
	DefaultObjectStorage
	root string
}

func (d *filestore) Symlink(oldName, newName string) error {
	p := d.path(newName)
	if _, err := os.Stat(filepath.Dir(p)); err != nil && os.IsNotExist(err) {
		if err := os.MkdirAll(filepath.Dir(p), os.FileMode(0777)); err != nil {
			return err
		}
	} else if err != nil && !os.IsNotExist(err) {
		return err
	}
	return os.Symlink(oldName, p)
}

func (d *filestore) Readlink(name string) (string, error) {
	return os.Readlink(d.path(name))
}

func (d *filestore) String() string {
	if runtime.GOOS == "windows" {
		return "file:///" + d.root
	}
	return "file://" + d.root
}

func (d *filestore) path(key string) string {
	if strings.HasSuffix(d.root, dirSuffix) {
		return filepath.Join(d.root, key)
	}
	return filepath.Clean(d.root + key)
}

func (d *filestore) Head(ctx context.Context, key string) (Object, error) {
	p := d.path(key)
	fi, err := os.Lstat(p)
	if err != nil {
		return nil, err
	}
	isSymlink := fi.Mode()&os.ModeSymlink != 0
	if isSymlink {
		fi, err = os.Stat(p)
		if err != nil {
			return nil, err
		}
	}
	return toFile(key, fi, isSymlink, getOwnerGroup), nil
}

func toFile(key string, fi fs.FileInfo, isSymlink bool, ownerGetter func(fs.FileInfo) (string, string)) *file {
	size := fi.Size()
	if fi.IsDir() {
		size = 0
	}
	owner, group := ownerGetter(fi)
	return &file{
		obj{
			key,
			size,
			fi.ModTime(),
			fi.IsDir(),
			"",
		},
		owner,
		group,
		fi.Mode(),
		isSymlink,
	}
}

type SectionReaderCloser struct {
	*io.SectionReader
	io.Closer
}

func (d *filestore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	p := d.path(key)

	f, err := os.Open(p)
	if err != nil {
		return nil, err
	}

	finfo, err := f.Stat()
	if err != nil {
		_ = f.Close()
		return nil, err
	}
	if finfo.IsDir() || off >= finfo.Size() {
		_ = f.Close()
		return io.NopCloser(bytes.NewBuffer([]byte{})), nil
	}

	if limit > 0 {
		return &SectionReaderCloser{
			SectionReader: io.NewSectionReader(f, off, limit),
			Closer:        f,
		}, nil
	}
	return f, nil
}

func (d *filestore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) (err error) {
	p := d.path(key)

	if strings.HasSuffix(key, dirSuffix) || key == "" && strings.HasSuffix(d.root, dirSuffix) {
		return os.MkdirAll(p, os.FileMode(0777))
	}

	var tmp string
	if PutInplace {
		tmp = p
	} else {
		name := filepath.Base(p)
		if len(name) > 200 {
			name = name[:200]
		}
		tmp = TmpFilePath(p, name)
		defer func() {
			if err != nil {
				_ = os.Remove(tmp)
			}
		}()
	}
	f, err := os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
	if err != nil && os.IsNotExist(err) {
		if err := os.MkdirAll(filepath.Dir(p), os.FileMode(0777)); err != nil {
			return err
		}
		f, err = os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
	}
	if err != nil {
		return err
	}

	if TryCFR {
		_, err = io.Copy(f, in)
	} else {
		buf := bufPool.Get().(*[]byte)
		defer bufPool.Put(buf)
		_, err = io.CopyBuffer(onlyWriter{f}, in, *buf)
	}
	if err != nil {
		_ = f.Close()
		return err
	}
	err = f.Close()
	if err != nil {
		return err
	}
	if !PutInplace {
		err = os.Rename(tmp, p)
	}
	return err
}

func (d *filestore) Copy(ctx context.Context, dst, src string) error {
	r, err := d.Get(ctx, src, 0, -1)
	if err != nil {
		return err
	}
	defer r.Close()
	return d.Put(ctx, dst, r)
}

func (d *filestore) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	err := os.Remove(d.path(key))
	if err != nil && os.IsNotExist(err) {
		err = nil
	}
	return err
}

type mEntry struct {
	os.FileInfo
	name      string
	fi        os.FileInfo
	isSymlink bool
}

func (m *mEntry) Name() string {
	return m.name
}

func (m *mEntry) Info() os.FileInfo {
	if m.fi != nil {
		return m.fi
	}
	return m.FileInfo
}

func (m *mEntry) IsDir() bool {
	if m.fi != nil {
		return m.fi.IsDir()
	}
	return m.FileInfo.IsDir()
}

// readDirSorted reads the directory named by dir and returns
// a sorted list of directory entries.
func readDirSorted(dir string, followLink bool) ([]*mEntry, error) {
	f, err := os.Open(dir)
	if err != nil {
		return nil, err
	}
	defer f.Close()
	entries, err := f.Readdir(-1)
	if err != nil {
		return nil, err
	}

	mEntries := make([]*mEntry, len(entries))
	for i, e := range entries {
		isSymlink := e.Mode()&os.ModeSymlink != 0
		if e.IsDir() {
			mEntries[i] = &mEntry{e, e.Name() + dirSuffix, nil, false}
		} else if isSymlink && followLink {
			fi, err := os.Stat(filepath.Join(dir, e.Name()))
			if err != nil {
				mEntries[i] = &mEntry{e, e.Name(), nil, true}
				continue
			}
			name := e.Name()
			if fi.IsDir() {
				name = e.Name() + dirSuffix
			}
			mEntries[i] = &mEntry{e, name, fi, false}
		} else {
			mEntries[i] = &mEntry{e, e.Name(), nil, isSymlink}
		}
	}
	sort.Slice(mEntries, func(i, j int) bool { return mEntries[i].Name() < mEntries[j].Name() })
	return mEntries, err
}

func (d *filestore) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}
	var dir string = d.root + prefix
	var objs []Object
	if !strings.HasSuffix(dir, dirSuffix) {
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := d.Head(ctx, prefix)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}
	entries, err := readDirSorted(dir, followLink)
	if err != nil {
		if os.IsPermission(err) {
			logger.Warnf("skip %s: %s", dir, err)
			return nil, false, "", nil
		}
		if os.IsNotExist(err) {
			logger.Warnf("skip %s: %s", dir, err)
			return nil, false, "", nil
		}
		return nil, false, "", err
	}
	for _, e := range entries {
		p := path.Join(dir, e.Name())
		if e.IsDir() {
			p = p + "/"
		}
		if !strings.HasPrefix(p, d.root) {
			continue
		}
		key := p[len(d.root):]
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}
		info := e.Info()
		f := toFile(key, info, e.isSymlink, getOwnerGroup)
		objs = append(objs, f)
		if len(objs) == int(limit) {
			break
		}
	}
	return generateListResult(objs, limit)
}

func (d *filestore) Chmod(key string, mode os.FileMode) error {
	p := d.path(key)
	return os.Chmod(p, mode)
}

func (d *filestore) Chown(key string, owner, group string) error {
	p := d.path(key)
	uid := utils.LookupUser(owner)
	gid := utils.LookupGroup(group)
	if uid == -1 || gid == -1 {
		return fmt.Errorf("user(%s):group(%s) not found", owner, group)
	}
	return os.Lchown(p, uid, gid)
}

func newDisk(root, accesskey, secretkey, token string) (ObjectStorage, error) {
	// For Windows, the path looks like /C:/a/b/c/
	if runtime.GOOS == "windows" {
		root = strings.TrimPrefix(root, "/")
	}
	return &filestore{root: root}, nil
}

func init() {
	Register("file", newDisk)
}


================================================
FILE: pkg/object/file_darwin.go
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"os"
	"syscall"
	"time"

	"golang.org/x/sys/unix"
)

// nolint:unused
func getAtime(fi os.FileInfo) time.Time {
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return time.Unix(sst.Atimespec.Unix())
	} else {
		return fi.ModTime()
	}
}

func lchtimes(name string, atime time.Time, mtime time.Time) error {
	var ts = make([]unix.Timespec, 2)
	///Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include//sys/stat.h
	// define UTIME_NOW       -1
	// define UTIME_OMIT      -2
	// only change mtime
	ts[0] = unix.Timespec{Sec: -2, Nsec: -2}
	ts[1] = unix.NsecToTimespec(mtime.UnixNano())
	if e := unix.UtimesNanoAt(unix.AT_FDCWD, name, ts, unix.AT_SYMLINK_NOFOLLOW); e != nil {
		return &os.PathError{Op: "lchtimes", Path: name, Err: e}
	}
	return nil
}


================================================
FILE: pkg/object/file_linux.go
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"os"
	"syscall"
	"time"

	"golang.org/x/sys/unix"
)

// nolint:unused
func getAtime(fi os.FileInfo) time.Time {
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return time.Unix(sst.Atim.Unix())
	}
	return fi.ModTime()
}

func lchtimes(name string, atime time.Time, mtime time.Time) error {
	var ts = make([]unix.Timespec, 2)
	// only change mtime
	ts[0] = unix.Timespec{Sec: unix.UTIME_OMIT, Nsec: unix.UTIME_OMIT}
	ts[1] = unix.NsecToTimespec(mtime.UnixNano())

	if e := unix.UtimesNanoAt(unix.AT_FDCWD, name, ts, unix.AT_SYMLINK_NOFOLLOW); e != nil {
		return &os.PathError{Op: "lchtimes", Path: name, Err: e}
	}
	return nil
}


================================================
FILE: pkg/object/file_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"os"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/sftp"
)

func getOwnerGroup(info os.FileInfo) (string, string) {
	var owner, group string
	switch st := info.Sys().(type) {
	case *syscall.Stat_t:
		owner = utils.UserName(int(st.Uid))
		group = utils.GroupName(int(st.Gid))
	case *sftp.FileStat:
		owner = utils.UserName(int(st.UID))
		group = utils.GroupName(int(st.GID))
	}
	return owner, group
}

func (d *filestore) Chtimes(key string, mtime time.Time) error {
	p := d.path(key)
	return lchtimes(p, time.Time{}, mtime)
}


================================================
FILE: pkg/object/file_unix_test.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"os"
	"testing"
	"time"
)

func TestLChtimes(t *testing.T) {
	filePath := "/tmp/LChtimesTestAfile1"
	linkPath := "/tmp/LChtimesTestLink1"
	os.Remove(filePath)
	os.Remove(linkPath)
	_, err := os.Create(filePath)
	if err != nil {
		t.Fatalf("create file failed: %s", err)
	}
	err = os.Symlink(filePath, linkPath)
	if err != nil {
		t.Fatalf("symlink file failed: %s", err)
	}
	oldStat, err := os.Lstat(linkPath)
	if err != nil {
		t.Fatalf("lstat file failed: %s", err)
	}

	oldAtime := getAtime(oldStat)
	newMtime := oldStat.ModTime().Add(-time.Hour)
	err = lchtimes(linkPath, time.Time{}, newMtime)
	if err != nil {
		t.Fatalf("lchtimes file failed: %s", err)
	}
	newStat, err := os.Lstat(linkPath)
	if err != nil {
		t.Fatalf("lstat file failed: %s", err)
	}
	if newStat.ModTime() != newMtime {
		t.Fatalf("mtime change failed")
	}
	newAtime := getAtime(newStat)
	if newAtime != oldAtime {
		t.Fatalf("atime change failed")
	}
}


================================================
FILE: pkg/object/file_windows.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"os"
	"time"
)

func getOwnerGroup(info os.FileInfo) (string, string) {
	return "", ""
}

func lookupUser(name string) int {
	return 0
}

func lookupGroup(name string) int {
	return 0
}

func (d *filestore) Chtimes(key string, mtime time.Time) error {
	p := d.path(key)
	return os.Chtimes(p, time.Time{}, mtime)
}


================================================
FILE: pkg/object/filesystem_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"os"
	"strings"
	"testing"
)

func testKeysEqual(objs []Object, expectedKeys []string) error {
	gottenKeys := make([]string, len(objs))
	for idx, obj := range objs {
		gottenKeys[idx] = obj.Key()
	}
	if len(gottenKeys) != len(expectedKeys) {
		return fmt.Errorf("Expected {%s}, got {%s}", strings.Join(expectedKeys, ", "),
			strings.Join(gottenKeys, ", "))
	}

	for idx, key := range gottenKeys {
		if key != expectedKeys[idx] {
			return fmt.Errorf("Expected {%s}, got {%s}", strings.Join(expectedKeys, ", "),
				strings.Join(gottenKeys, ", "))
		}
	}
	return nil
}

func TestDisk2(t *testing.T) {
	diskPath := "/tmp/abc/"
	_ = os.RemoveAll(diskPath)
	s, _ := newDisk(diskPath, "", "", "")
	s = WithPrefix(s, "prefix/")
	testFileSystem(t, s)
}

func TestSftp2(t *testing.T) { //skip mutate
	if os.Getenv("SFTP_HOST") == "" {
		t.SkipNow()
	}
	sftp, err := newSftp(os.Getenv("SFTP_HOST"), os.Getenv("SFTP_USER"), os.Getenv("SFTP_PASS"), "")
	if err != nil {
		t.Fatalf("sftp: %s", err)
	}
	testFileSystem(t, sftp)
}

func TestCifs2(t *testing.T) { //skip mutate
	if os.Getenv("CIFS_ADDR") == "" {
		fmt.Println("skip CIFS test")
		t.SkipNow()
	}
	cifs, err := newCifs(os.Getenv("CIFS_ADDR"), os.Getenv("CIFS_USER"), os.Getenv("CIFS_PASSWORD"), "")
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testFileSystem(t, cifs)
}

func TestHDFS2(t *testing.T) { //skip mutate
	if os.Getenv("HDFS_ADDR") == "" {
		t.Skip()
	}
	dfs, _ := newHDFS(os.Getenv("HDFS_ADDR"), "testUser1", "", "")
	testFileSystem(t, dfs)
}

func TestNFS2(t *testing.T) { //skip mutate
	if os.Getenv("NFS_ADDR") == "" {
		t.SkipNow()
	}
	b, err := newNFSStore(os.Getenv("NFS_ADDR"), os.Getenv("NFS_ACCESS_KEY"), os.Getenv("NFS_SECRET_KEY"), "")
	if err != nil {
		t.Fatal(err)
	}
	testFileSystem(t, b)
}

func testFileSystem(t *testing.T, s ObjectStorage) {
	ctx := context.Background()
	keys := []string{
		"x/",
		"x/x.txt",
		"xy.txt",
		"xyz/",
		"xyz/xyz.txt",
	}
	// initialize directory tree
	for _, key := range keys {
		if err := s.Put(ctx, key, bytes.NewReader([]byte{'a', 'b'})); err != nil {
			t.Fatalf("PUT object `%s` failed: %q", key, err)
		}
	}
	if o, err := s.Head(ctx, "x/"); err != nil {
		t.Fatalf("Head x/: %s", err)
	} else if f, ok := o.(File); !ok {
		t.Fatalf("Head should return File")
	} else if !f.IsDir() {
		t.Fatalf("x/ should be a dir")
	}
	// cleanup
	defer func() {
		// delete reversely, directory only can be deleted when it's empty
		objs, err := listAll(ctx, s, "", "", 100, true)
		if err != nil {
			t.Fatalf("listall failed: %s", err)
		}
		gottenKeys := make([]string, len(objs))
		for idx, obj := range objs {
			gottenKeys[idx] = obj.Key()
		}
		idx := len(gottenKeys) - 1
		for ; idx >= 0; idx-- {
			if err := s.Delete(ctx, gottenKeys[idx]); err != nil {
				t.Fatalf("DELETE object `%s` failed: %q", gottenKeys[idx], err)
			}
		}
	}()
	objs, err := listAll(ctx, s, "x/", "", 100, true)
	if err != nil {
		t.Fatalf("list failed: %s", err)
	}
	expectedKeys := []string{"x/", "x/x.txt"}
	if err = testKeysEqual(objs, expectedKeys); err != nil {
		t.Fatalf("testKeysEqual fail: %s", err)
	}

	objs, err = listAll(ctx, s, "x", "", 100, true)
	if err != nil {
		t.Fatalf("list failed: %s", err)
	}
	expectedKeys = []string{"x/", "x/x.txt", "xy.txt", "xyz/", "xyz/xyz.txt"}
	if err = testKeysEqual(objs, expectedKeys); err != nil {
		t.Fatalf("testKeysEqual fail: %s", err)
	}

	if ss, ok := s.(FileSystem); ok {
		for _, mode := range []uint32{0022, 0122, 0422} {
			t.Logf("test mode %o", os.FileMode(mode))
			err := ss.Chmod("x/", os.FileMode(mode))
			if err != nil {
				t.Fatalf("chmod %ofailed: %s", mode, err)
			}

			objs, err = listAll(ctx, s, "x", "", 100, true)
			if err != nil {
				t.Fatalf("list failed: %s mode %o", err, mode)
			}
			expectedKeys = []string{"x/", "xy.txt", "xyz/", "xyz/xyz.txt"}
			if _, ok := ss.(*nfsStore); ok {
				expectedKeys = []string{"x/", "x/x.txt", "xy.txt", "xyz/", "xyz/xyz.txt"}
			}
			if _, ok := ss.(*cifsStore); ok {
				expectedKeys = []string{"x/", "x/x.txt", "xy.txt", "xyz/", "xyz/xyz.txt"}
			}
			if mode == 0422 {
				if strings.HasPrefix(s.String(), "gluster://") {
					expectedKeys = []string{"x/", "x/x.txt", "xy.txt", "xyz/", "xyz/xyz.txt"}
				}
			}
			if err = testKeysEqual(objs, expectedKeys); err != nil {
				t.Fatalf("testKeysEqual fail: %s mode %o", err, mode)
			}
			err = ss.Chmod("x/", os.FileMode(0777))
			if err != nil {
				t.Fatalf("chmod %o failed: %s", mode, err)
			}
		}
	}

	objs, err = listAll(ctx, s, "xy", "", 100, true)
	if err != nil {
		t.Fatalf("list failed: %s", err)
	}
	expectedKeys = []string{"xy.txt", "xyz/", "xyz/xyz.txt"}
	if err = testKeysEqual(objs, expectedKeys); err != nil {
		t.Fatalf("testKeysEqual fail: %s", err)
	}

	if ss, ok := s.(SupportSymlink); ok {
		// a< a- < a/ < a0    <    b< b- < b/ < b0
		_ = s.Put(ctx, "a-", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "a0", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "b-", bytes.NewReader([]byte{}))
		_ = s.Put(ctx, "b0", bytes.NewReader(make([]byte, 10)))
		_ = s.Put(ctx, "xyz/ol1/p.txt", bytes.NewReader([]byte{}))

		err = ss.Symlink("../b0", "bb/b1")
		if err != nil {
			t.Fatalf("symlink: %s", err)
		}
		if target, err := ss.Readlink("bb/b1"); err != nil {
			t.Fatalf("readlink: %s", err)
		} else if target != "../b0" {
			t.Fatalf("target should be ../b0, but got %s", target)
		}
		if fi, err := s.Head(ctx, "bb/b1"); err != nil || !fi.IsSymlink() || fi.Size() != 10 {
			t.Fatalf("haed of symlink: err=%s, size=%d isSymlink=%v", err, fi.Size(), fi.IsSymlink())
		}
		err = ss.Symlink("../notExist", "bb/brokenLink")
		if err != nil {
			t.Fatalf("symlink: %s", err)
		}
		if _, err := s.Head(ctx, "bb/brokenLink"); !errors.Is(err, os.ErrNotExist) {
			t.Fatalf("head broken symlink: err=%s, should be os.ErrNotExist", err)
		}
		_ = s.Delete(ctx, "bb/brokenLink")
		if err = ss.Symlink("xyz/ol1/", "a"); err != nil {
			t.Fatalf("symlink: a: %s", err)
		}
		_ = ss.Symlink("xyz/notExist/", "b")

		objs, err = listAll(ctx, s, "", "", 100, true)
		if err != nil {
			t.Fatalf("listall failed: %s", err)
		}
		expectedKeys = []string{"", "a-", "a/", "a/p.txt", "a0", "b", "b-", "b0", "bb/", "bb/b1", "x/", "x/x.txt", "xy.txt", "xyz/", "xyz/ol1/", "xyz/ol1/p.txt", "xyz/xyz.txt"}
		if err = testKeysEqual(objs, expectedKeys); err != nil {
			t.Fatalf("testKeysEqual fail: %s", err)
		}
		if objs[2].Size() != 0 {
			t.Fatalf("size of target(dir) should be 0")
		}
		if objs[9].Size() != 10 {
			t.Fatalf("size of target(file) should be 10")
		}

		// test don't follow symlink
		if _, ok := s.(*hdfsclient); !ok {
			objs, err = listAll(ctx, s, "", "", 100, false)
			expectedKeys = []string{"", "a", "a-", "a0", "b", "b-", "b0", "bb/", "bb/b1", "x/", "x/x.txt", "xy.txt", "xyz/", "xyz/ol1/", "xyz/ol1/p.txt", "xyz/xyz.txt"}
			if err = testKeysEqual(objs, expectedKeys); err != nil {
				t.Fatalf("testKeysEqual fail: %s", err)
			}
		}
	}

	// put a file with very long name
	longName := strings.Repeat("a", 255)
	if err := s.Put(ctx, "dir/"+longName, bytes.NewReader([]byte{0})); err != nil {
		t.Fatalf("PUT a file with long name `%s` failed: %q", longName, err)
	}
}


================================================
FILE: pkg/object/gluster.go
================================================
//go:build gluster
// +build gluster

/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"io/fs"
	"net/url"
	"os"
	"path"
	"path/filepath"
	"sort"
	"strconv"
	"strings"
	"sync/atomic"
	"time"

	"github.com/juicedata/gogfapi/gfapi"
)

type gluster struct {
	DefaultObjectStorage
	name string
	indx uint64
	vols []*gfapi.Volume
}

func (g *gluster) String() string {
	return fmt.Sprintf("gluster://%s/", g.name)
}

func (g *gluster) vol() *gfapi.Volume {
	if len(g.vols) == 1 {
		return g.vols[0]
	}
	n := atomic.AddUint64(&g.indx, 1)
	return g.vols[n%uint64(len(g.vols))]
}

func (g *gluster) Head(ctx context.Context, key string) (Object, error) {
	fi, err := g.vol().Stat(key)
	if err != nil {
		return nil, err
	}
	return g.toFile(key, fi, false), nil
}

func (g *gluster) toFile(key string, fi fs.FileInfo, isSymlink bool) *file {
	size := fi.Size()
	if fi.IsDir() {
		size = 0
	}
	owner, group := getOwnerGroup(fi)
	return &file{
		obj{
			key,
			size,
			fi.ModTime(),
			fi.IsDir(),
			"",
		},
		owner,
		group,
		fi.Mode(),
		isSymlink,
	}
}

func (g *gluster) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	f, err := g.vol().Open(key)
	if err != nil {
		return nil, err
	}

	finfo, err := f.Stat()
	if err != nil {
		_ = f.Close()
		return nil, err
	}
	if finfo.IsDir() || off > finfo.Size() {
		_ = f.Close()
		return io.NopCloser(bytes.NewBuffer([]byte{})), nil
	}

	if limit > 0 {
		return &SectionReaderCloser{
			SectionReader: io.NewSectionReader(f, off, limit),
			Closer:        f,
		}, nil
	}
	return f, nil
}

func (g *gluster) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	v := g.vol()
	if strings.HasSuffix(key, dirSuffix) {
		return v.MkdirAll(key, os.FileMode(0777))
	}
	f, err := v.OpenFile(key, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
	if err != nil && os.IsNotExist(err) {
		if err := v.MkdirAll(filepath.Dir(key), os.FileMode(0777)); err != nil {
			return err
		}
		f, err = v.OpenFile(key, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0666)
	}
	if err != nil {
		return err
	}
	defer func() {
		if err != nil {
			_ = v.Unlink(key)
		}
	}()

	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	_, err = io.CopyBuffer(f, in, *buf)
	if err != nil {
		_ = f.Close()
		return err
	}
	if err = f.Sync(); err != nil {
		_ = f.Close()
		return err
	}
	err = f.Close()
	return err
}

func (g *gluster) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	v := g.vol()
	err := v.Unlink(key)
	if err != nil && strings.Contains(err.Error(), "is a directory") {
		err = v.Rmdir(key)
	}
	if os.IsNotExist(err) {
		err = nil
	}
	return err
}

// readDirSorted reads the directory named by dirname and returns
// a sorted list of directory entries.
func (g *gluster) readDirSorted(dirname string, followLink bool) ([]*mEntry, error) {
	v := g.vol()
	f, err := v.OpenDir(dirname)
	if err != nil {
		return nil, err
	}
	defer f.Close()
	entries, err := f.Readdir(0)
	if err != nil {
		return nil, err
	}

	mEntries := make([]*mEntry, 0, len(entries))
	for _, e := range entries {
		name := e.Name()
		if name == "." || name == ".." {
			continue
		}
		isSymlink := e.Mode()&os.ModeSymlink != 0
		if e.IsDir() {
			mEntries = append(mEntries, &mEntry{nil, name + dirSuffix, e, false})
		} else if isSymlink && followLink {
			fi, err := v.Stat(filepath.Join(dirname, name))
			if err != nil {
				mEntries = append(mEntries, &mEntry{nil, name, e, true})
				continue
			}
			if fi.IsDir() {
				name += dirSuffix
			}
			mEntries = append(mEntries, &mEntry{nil, name, fi, false})
		} else {
			mEntries = append(mEntries, &mEntry{nil, name, e, isSymlink})
		}
	}
	sort.Slice(mEntries, func(i, j int) bool { return mEntries[i].Name() < mEntries[j].Name() })
	return mEntries, err
}

func (g *gluster) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}
	var dir string = prefix
	var objs []Object
	if !strings.HasSuffix(dir, dirSuffix) {
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := g.Head(ctx, prefix)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}
	entries, err := g.readDirSorted(dir, followLink)
	if err != nil {
		if os.IsPermission(err) {
			logger.Warnf("skip %s: %s", dir, err)
			return nil, false, "", nil
		}
		if os.IsNotExist(err) {
			return nil, false, "", nil
		}
		return nil, false, "", err
	}
	for _, e := range entries {
		p := filepath.Join(dir, e.Name())
		if e.IsDir() {
			p = filepath.ToSlash(p + "/")
		}
		key := p
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}
		info := e.Info()
		f := g.toFile(key, info, e.isSymlink)
		objs = append(objs, f)
		if len(objs) == int(limit) {
			break
		}
	}
	return generateListResult(objs, limit)
}

func (g *gluster) Chtimes(path string, mtime time.Time) error {
	return notSupported
}

func (g *gluster) Chmod(path string, mode os.FileMode) error {
	return g.vol().Chmod(path, mode)
}

func (g *gluster) Chown(path string, owner, group string) error {
	return notSupported
}

func newGluster(endpoint, ak, sk, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("gluster://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	ps := strings.Split(uri.Path, "/")
	if len(ps) == 1 {
		return nil, fmt.Errorf("no volume provided")
	}
	name := ps[1]
	// multiple clients for possible performance improvement
	var size int
	if ssize := os.Getenv("JFS_NUM_GLUSTER_CLIENTS"); ssize != "" {
		size, _ = strconv.Atoi(ssize)
		if size > 8 {
			size = 8
		}
	}
	if size < 1 {
		size = 1
	}
	// logging
	level := gfapi.LogInfo
	if slevel := os.Getenv("JFS_GLUSTER_LOG_LEVEL"); slevel != "" {
		switch strings.ToUpper(slevel) {
		case "ERROR":
			level = gfapi.LogError
		case "WARN", "WARNING":
			level = gfapi.LogWarning
		case "INFO":
			level = gfapi.LogInfo
		case "DEBUG":
			level = gfapi.LogDebug
		case "TRACE":
			level = gfapi.LogTrace
		}
	}
	logPath := os.Getenv("JFS_GLUSTER_LOG_PATH")
	hosts := strings.Split(uri.Host, ",")
	pid := os.Getpid()
	ostore := gluster{
		name: name,
		vols: make([]*gfapi.Volume, size),
	}
	for i := range ostore.vols {
		v := &gfapi.Volume{}
		// TODO: support port in host
		err = v.Init(name, hosts...)
		if err != nil {
			return nil, fmt.Errorf("init %s: %s", name, err)
		}
		if logPath == "" {
			err = v.SetLogging(fmt.Sprintf("/var/log/glusterfs/%s-%s-%d-%d.log", hosts[0], name, pid, i), level)
		} else {
			err = v.SetLogging(logPath, level)
		}
		if err != nil {
			logger.Warnf("Set gluster logging for vol %s: %s", name, err)
		}
		err = v.Mount()
		if err != nil {
			return nil, fmt.Errorf("mount %s: %s", name, err)
		}
		ostore.vols[i] = v
	}
	return &ostore, nil
}

func init() {
	Register("gluster", newGluster)
}


================================================
FILE: pkg/object/gluster_test.go
================================================
//go:build gluster
// +build gluster

/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"os"
	"testing"
)

func TestGluster(t *testing.T) {
	if os.Getenv("GLUSTER_VOLUME") == "" {
		t.SkipNow()
	}
	b, _ := newGluster(os.Getenv("GLUSTER_VOLUME"), "", "", "")
	testStorage(t, b)

}

func TestGluster2(t *testing.T) {
	if os.Getenv("GLUSTER_VOLUME") == "" {
		t.SkipNow()
	}
	b, _ := newGluster(os.Getenv("GLUSTER_VOLUME"), "", "", "")
	testFileSystem(t, b)
}


================================================
FILE: pkg/object/gs.go
================================================
//go:build !nogs
// +build !nogs

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"net/url"
	"os"
	"sort"
	"strconv"
	"strings"
	"sync/atomic"
	"time"

	"cloud.google.com/go/compute/metadata"
	"cloud.google.com/go/storage"
	"github.com/pkg/errors"
	"golang.org/x/oauth2/google"
	"google.golang.org/api/iterator"
)

type gs struct {
	DefaultObjectStorage
	clients []*storage.Client
	index   uint64
	bucket  string
	region  string
	sc      string
}

func (g *gs) String() string {
	return fmt.Sprintf("gs://%s/", g.bucket)
}

func (g *gs) getClient() *storage.Client {
	if len(g.clients) == 1 {
		return g.clients[0]
	}
	n := atomic.AddUint64(&g.index, 1)
	return g.clients[n%(uint64(len(g.clients)))]
}

func (g *gs) Create(ctx context.Context) error {
	// check if the bucket is already exists
	if objs, _, _, err := g.List(ctx, "", "", "", "", 1, true); err == nil && len(objs) > 0 {
		return nil
	}
	projectID := os.Getenv("GOOGLE_CLOUD_PROJECT")
	if projectID == "" {
		projectID, _ = metadata.ProjectIDWithContext(ctx)
	}
	if projectID == "" {
		cred, err := google.FindDefaultCredentials(ctx)
		if err == nil {
			projectID = cred.ProjectID
		}
	}
	if projectID == "" {
		return errors.New("GOOGLE_CLOUD_PROJECT environment variable must be set")
	}
	// Guess region when region is not provided
	if g.region == "" {
		zone, err := metadata.ZoneWithContext(ctx)
		if err == nil && len(zone) > 2 {
			g.region = zone[:len(zone)-2]
		}
	}

	err := g.getClient().Bucket(g.bucket).Create(ctx, projectID, &storage.BucketAttrs{
		Name:         g.bucket,
		StorageClass: g.sc,
		Location:     g.region,
	})
	if err != nil && strings.Contains(err.Error(), "You already own this bucket") {
		return nil
	}
	return err
}

func (g *gs) Head(ctx context.Context, key string) (Object, error) {
	attrs, err := g.getClient().Bucket(g.bucket).Object(key).Attrs(ctx)
	if err != nil {
		if err == storage.ErrObjectNotExist {
			err = os.ErrNotExist
		}
		return nil, err
	}

	return &obj{
		key,
		attrs.Size,
		attrs.Updated,
		strings.HasSuffix(key, "/"),
		attrs.StorageClass,
	}, nil
}

func (g *gs) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	reader, err := g.getClient().Bucket(g.bucket).Object(key).NewRangeReader(ctx, off, limit)
	if err != nil {
		return nil, err
	}
	// TODO fire another attr request to get the actual storage class
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass(g.sc)
	return reader, nil
}

func (g *gs) Put(ctx context.Context, key string, data io.Reader, getters ...AttrGetter) error {
	writer := g.getClient().Bucket(g.bucket).Object(key).NewWriter(ctx)
	writer.StorageClass = g.sc

	// If you upload small objects (< 16MiB), you should set ChunkSize
	// to a value slightly larger than the objects' sizes to avoid memory bloat.
	// This is especially important if you are uploading many small objects concurrently.
	writer.ChunkSize = 5 << 20

	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	_, err := io.CopyBuffer(writer, data, *buf)
	if err != nil {
		return err
	}
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass(g.sc)
	return writer.Close()
}

func (g *gs) Copy(ctx context.Context, dst, src string) error {
	client := g.getClient()
	srcObj := client.Bucket(g.bucket).Object(src)
	dstObj := client.Bucket(g.bucket).Object(dst)
	copier := dstObj.CopierFrom(srcObj)
	if g.sc != "" {
		copier.StorageClass = g.sc
	}
	_, err := copier.Run(ctx)
	return err
}

func (g *gs) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	if err := g.getClient().Bucket(g.bucket).Object(key).Delete(ctx); err != storage.ErrObjectNotExist {
		return err
	}
	return nil
}

func (g *gs) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	objectIterator := g.getClient().Bucket(g.bucket).Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: delimiter, StartOffset: start})
	pager := iterator.NewPager(objectIterator, int(limit), token)
	var entries []*storage.ObjectAttrs
	nextPageToken, err := pager.NextPage(&entries)
	if err != nil {
		return nil, false, "", err
	}
	n := len(entries)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		item := entries[i]
		if delimiter != "" && item.Prefix != "" {
			objs[i] = &obj{item.Prefix, 0, time.Unix(0, 0), true, item.StorageClass}
		} else {
			objs[i] = &obj{item.Name, item.Size, item.Updated, strings.HasSuffix(item.Name, "/"), item.StorageClass}
		}
	}
	if delimiter != "" {
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, nextPageToken != "", nextPageToken, nil
}

func (g *gs) SetStorageClass(sc string) error {
	g.sc = sc
	return nil
}

func newGS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("gs://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, errors.Errorf("Invalid endpoint: %v, error: %v", endpoint, err)
	}
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	var region string
	if len(hostParts) > 1 {
		region = hostParts[1]
	}

	var size int
	if ssize := os.Getenv("JFS_NUM_GOOGLE_CLIENTS"); ssize != "" {
		if size, err = strconv.Atoi(ssize); err != nil {
			return nil, err
		}
	}
	if size < 1 {
		size = 5
	}
	clis := make([]*storage.Client, size)
	for i := 0; i < size; i++ {
		client, err := storage.NewClient(ctx)
		if err != nil {
			return nil, err
		}
		clis[i] = client
	}

	return &gs{clients: clis, bucket: bucket, region: region}, nil
}

func init() {
	Register("gs", newGS)
}


================================================
FILE: pkg/object/hdfs.go
================================================
//go:build !nohdfs
// +build !nohdfs

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"os"
	"os/user"
	"path"
	"sort"
	"strconv"
	"strings"
	"time"

	"github.com/colinmarc/hdfs/v2"
	"github.com/colinmarc/hdfs/v2/hadoopconf"
)

var superuser = "hdfs"
var supergroup = "supergroup"

type hdfsclient struct {
	DefaultObjectStorage
	addr           string
	basePath       string
	c              *hdfs.Client
	dfsReplication int
	umask          os.FileMode
	closeTimeout   time.Duration
	closeMaxDelay  time.Duration
}

func (h *hdfsclient) String() string {
	return fmt.Sprintf("hdfs://%s%s", h.addr, h.basePath)
}

func (h *hdfsclient) path(key string) string {
	return h.basePath + key
}

func (h *hdfsclient) Head(ctx context.Context, key string) (Object, error) {
	info, err := h.c.Stat(h.path(key))
	if err != nil {
		return nil, err
	}

	return h.toFile(key, info), nil
}

func (h *hdfsclient) toFile(key string, info os.FileInfo) *file {
	hinfo := info.(*hdfs.FileInfo)
	f := &file{
		obj{
			key,
			info.Size(),
			info.ModTime(),
			info.IsDir(),
			"",
		},
		hinfo.Owner(),
		hinfo.OwnerGroup(),
		info.Mode(),
		false,
	}
	if f.owner == superuser {
		f.owner = "root"
	}
	if f.group == supergroup {
		f.group = "root"
	}
	// stickybit from HDFS is different than golang
	if f.mode&01000 != 0 {
		f.mode &= ^os.FileMode(01000)
		f.mode |= os.ModeSticky
	}
	if info.IsDir() {
		f.size = 0
		if !strings.HasSuffix(f.key, "/") && f.key != "" {
			f.key += "/"
		}
	}
	return f
}

func (h *hdfsclient) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	f, err := h.c.Open(h.path(key))
	if err != nil {
		return nil, err
	}

	finfo := f.Stat()
	if finfo.IsDir() || off >= finfo.Size() {
		_ = f.Close()
		return io.NopCloser(bytes.NewBuffer([]byte{})), nil
	}

	if limit > 0 {
		return &SectionReaderCloser{
			SectionReader: io.NewSectionReader(f, off, limit),
			Closer:        f,
		}, nil
	}
	return f, nil
}

func (h *hdfsclient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) (err error) {
	p := h.path(key)
	if strings.HasSuffix(p, dirSuffix) {
		return h.c.MkdirAll(p, 0777&^h.umask)
	}
	var tmp string
	if PutInplace {
		tmp = p
	} else {
		name := path.Base(p)
		if len(name) > 200 {
			name = name[:200]
		}
		tmp = TmpFilePath(p, name)
		defer func() {
			if err != nil {
				_ = h.c.Remove(tmp)
			}
		}()
	}
	f, err := h.c.CreateFile(tmp, h.dfsReplication, 128<<20, 0666&^h.umask)
	if err != nil {
		if pe, ok := err.(*os.PathError); ok && pe.Err == os.ErrNotExist {
			_ = h.c.MkdirAll(path.Dir(p), 0777&^h.umask)
			f, err = h.c.CreateFile(tmp, h.dfsReplication, 128<<20, 0666&^h.umask)
		}
		if pe, ok := err.(*os.PathError); ok && errors.Is(pe.Err, os.ErrExist) {
			_ = h.c.Remove(tmp)
			f, err = h.c.CreateFile(tmp, h.dfsReplication, 128<<20, 0666&^h.umask)
		}
		if err != nil {
			return err
		}
	}
	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	_, err = io.CopyBuffer(f, in, *buf)
	if err != nil {
		_ = f.Close()
		return err
	}
	start := time.Now()
	sleeptime := 400 * time.Millisecond
	for {
		err = f.Close()
		if IsErrReplicating(err) && start.Add(h.closeTimeout).After(time.Now()) {
			time.Sleep(sleeptime)
			sleeptime = min(2*sleeptime, h.closeMaxDelay)
			continue
		} else {
			break
		}
	}
	if err != nil {
		return err
	}
	if !PutInplace {
		err = h.c.Rename(tmp, p)
	}
	return err
}

func min(a, b time.Duration) time.Duration {
	if a < b {
		return a
	}
	return b
}

func IsErrReplicating(err error) bool {
	pe, ok := err.(*os.PathError)
	return ok && pe.Err == hdfs.ErrReplicating
}

func (h *hdfsclient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	err := h.c.Remove(h.path(key))
	if err != nil && os.IsNotExist(err) {
		err = nil
	}
	return err
}

func (h *hdfsclient) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}
	dir := h.path(prefix)
	var objs []Object
	if !strings.HasSuffix(dir, dirSuffix) {
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := h.Head(ctx, prefix)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}

	file, err := h.c.Open(dir)
	var entries []os.FileInfo
	if file != nil {
		entries, err = file.Readdir(0)
	}
	if err != nil {
		if os.IsPermission(err) {
			logger.Warnf("skip %s: %s", dir, err)
			return nil, false, "", nil
		}
		if os.IsNotExist(err) {
			return nil, false, "", nil
		}
		return nil, false, "", err
	}

	// make sure they are ordered in full path
	entryMap := make(map[string]fs.FileInfo)
	names := make([]string, len(entries))
	for i, info := range entries {
		if info.IsDir() {
			names[i] = info.Name() + "/"
		} else {
			names[i] = info.Name()
		}
		entryMap[names[i]] = info
	}
	sort.Strings(names)

	for _, name := range names {
		p := dir + name
		if !strings.HasPrefix(p, h.basePath) {
			continue
		}
		key := p[len(h.basePath):]
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}
		f := h.toFile(key, entryMap[name])
		objs = append(objs, f)
		if len(objs) >= int(limit) {
			break
		}
	}
	return generateListResult(objs, limit)
}

func (h *hdfsclient) Chtimes(key string, mtime time.Time) error {
	// fixme: need set the atime in hdfs.SetTimesRequestProto to -1 to avoid updating the atime
	// ref: https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/fs/FileSystem.html#setTimes-org.apache.hadoop.fs.Path-long-long-
	return h.c.Chtimes(h.path(key), mtime, mtime)
}

func (h *hdfsclient) Chmod(key string, mode os.FileMode) error {
	return h.c.Chmod(h.path(key), mode)
}

func (h *hdfsclient) Chown(key string, owner, group string) error {
	if owner == "root" {
		owner = superuser
	}
	if group == "root" {
		group = supergroup
	}
	return h.c.Chown(h.path(key), owner, group)
}

func newHDFS(addr, username, sk, token string) (ObjectStorage, error) {
	conf, err := hadoopconf.LoadFromEnvironment()
	if err != nil {
		return nil, fmt.Errorf("Problem loading configuration: %s", err)
	}

	rpcAddr, basePath := parseHDFSAddr(addr, conf)
	options := hdfs.ClientOptionsFromConf(conf)
	if addr != "" {
		options.Addresses = rpcAddr
		logger.Infof("HDFS Addresses: %s, basePath: %s", rpcAddr, basePath)
	}

	if options.KerberosClient != nil {
		options.KerberosClient, err = getKerberosClient()
		if err != nil {
			return nil, fmt.Errorf("Problem with kerberos authentication: %s", err)
		}
	} else {
		if username == "" {
			username = os.Getenv("HADOOP_USER_NAME")
		}
		if username == "" {
			current, err := user.Current()
			if err != nil {
				return nil, fmt.Errorf("get current user: %s", err)
			}
			username = current.Username
		}
		options.User = username
	}

	c, err := hdfs.NewClient(options)
	if err != nil {
		return nil, fmt.Errorf("new HDFS client %s: %s", rpcAddr, err)
	}
	if os.Getenv("HADOOP_SUPER_USER") != "" {
		superuser = os.Getenv("HADOOP_SUPER_USER")
	}
	if os.Getenv("HADOOP_SUPER_GROUP") != "" {
		supergroup = os.Getenv("HADOOP_SUPER_GROUP")
	}

	var replication = 3
	if v, found := conf["dfs.replication"]; found {
		if x, err := strconv.Atoi(v); err == nil {
			replication = x
		}
	}
	var umask uint16 = 022
	if v, found := conf["fs.permissions.umask-mode"]; found {
		if x, err := strconv.ParseUint(v, 8, 16); err == nil {
			umask = uint16(x)
		}
	}
	var closeTimeout = 120 * time.Second
	if v, found := conf["ipc.client.rpc-timeout.ms"]; found {
		if x, err := strconv.Atoi(v); err == nil {
			closeTimeout = time.Duration(x) * time.Millisecond
		}
	}
	var closeMaxDelay = 60 * time.Second
	if v, found := conf["dfs.client.block.write.locateFollowingBlock.max.delay.ms"]; found {
		if x, err := strconv.Atoi(v); err == nil {
			closeMaxDelay = time.Duration(x) * time.Millisecond
		}
	}

	return &hdfsclient{
		addr:           strings.Join(rpcAddr, ","),
		basePath:       basePath,
		c:              c,
		dfsReplication: replication,
		umask:          os.FileMode(umask),
		closeTimeout:   closeTimeout,
		closeMaxDelay:  closeMaxDelay,
	}, nil
}

// addr can be hdfs://nameservice e.g. hdfs://example, hdfs://example/user/juicefs
// convert the nameservice as a comma separated list of host:port by referencing hadoop conf
func parseHDFSAddr(addr string, conf hadoopconf.HadoopConf) (rpcAddresses []string, basePath string) {
	addr = strings.TrimPrefix(addr, "hdfs://")
	sp := strings.SplitN(addr, "/", 2)
	authority := sp[0]

	// check if it is a nameservice
	var nns []string
	confParam := "dfs.namenode.rpc-address." + authority
	for key, value := range conf {
		if key == confParam || strings.HasPrefix(key, confParam+".") {
			nns = append(nns, value)
		}
	}
	if len(nns) > 0 {
		rpcAddresses = nns
	} else {
		rpcAddresses = strings.Split(authority, ",")
	}
	basePath = "/"
	if len(sp) > 1 && len(sp[1]) > 0 {
		basePath += strings.TrimRight(sp[1], "/") + "/"
	}
	return
}

func init() {
	Register("hdfs", newHDFS)
}


================================================
FILE: pkg/object/hdfs_kerberos.go
================================================
//go:build !nohdfs
// +build !nohdfs

// Copyright 2014 Colin Marc (colinmarc@gmail.com)
// borrowed from https://github.com/colinmarc/hdfs/blob/master/cmd/hdfs/kerberos.go

package object

import (
	"encoding/base64"
	"fmt"
	"github.com/jcmturner/gokrb5/v8/keytab"
	"os"
	"os/user"
	"strings"

	krb "github.com/jcmturner/gokrb5/v8/client"
	"github.com/jcmturner/gokrb5/v8/config"
	"github.com/jcmturner/gokrb5/v8/credentials"
)

func getKerberosClient() (*krb.Client, error) {
	configPath := os.Getenv("KRB5_CONFIG")
	if configPath == "" {
		configPath = "/etc/krb5.conf"
	}

	cfg, err := config.Load(configPath)
	if err != nil {
		return nil, err
	}

	// Try to authenticate with keytab file first.
	keytabPath := os.Getenv("KRB5KEYTAB")
	keytabBase64 := os.Getenv("KRB5KEYTAB_BASE64")
	principal := os.Getenv("KRB5PRINCIPAL")

	var kt *keytab.Keytab
	if keytabBase64 != "" {
		decodedKeytab, err := base64.StdEncoding.DecodeString(keytabBase64)
		if err != nil {
			return nil, fmt.Errorf("error decoding Base64 encoded data %s", err)
		}
		kt = new(keytab.Keytab)
		err = kt.Unmarshal(decodedKeytab)
		if err != nil {
			return nil, err
		}
	} else if keytabPath != "" {
		kt, err = keytab.Load(keytabPath)
		if err != nil {
			return nil, err
		}
	}
	if kt != nil {
		// e.g. KRB5PRINCIPAL="primary/instance@realm"
		sp := strings.Split(principal, "@")
		if len(sp) != 2 {
			return nil, fmt.Errorf("unusable kerberos principal: %s", principal)
		}
		username, realm := sp[0], sp[1]
		logger.Infof("username: %s, realm: %s", username, realm)
		client := krb.NewWithKeytab(username, realm, kt, cfg)
		return client, nil
	}

	// Determine the ccache location from the environment, falling back to the
	// default location.
	ccachePath := os.Getenv("KRB5CCNAME")
	if strings.Contains(ccachePath, ":") {
		if strings.HasPrefix(ccachePath, "FILE:") {
			ccachePath = strings.SplitN(ccachePath, ":", 2)[1]
		} else {
			return nil, fmt.Errorf("unusable ccache: %s", ccachePath)
		}
	} else if ccachePath == "" {
		u, err := user.Current()
		if err != nil {
			return nil, err
		}

		ccachePath = fmt.Sprintf("/tmp/krb5cc_%s", u.Uid)
	}

	ccache, err := credentials.LoadCCache(ccachePath)
	if err != nil {
		return nil, err
	}

	client, err := krb.NewFromCCache(ccache, cfg)
	if err != nil {
		return nil, err
	}

	return client, nil
}


================================================
FILE: pkg/object/ibmcos.go
================================================
//go:build !noibmcos
// +build !noibmcos

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/IBM/ibm-cos-sdk-go/aws"
	"github.com/IBM/ibm-cos-sdk-go/aws/awserr"
	"github.com/IBM/ibm-cos-sdk-go/aws/credentials/ibmiam"
	"github.com/IBM/ibm-cos-sdk-go/aws/request"
	"github.com/IBM/ibm-cos-sdk-go/aws/session"
	"github.com/IBM/ibm-cos-sdk-go/service/s3"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
)

type ibmcos struct {
	bucket string
	s3     *s3.S3
	sc     string
}

func (s *ibmcos) String() string {
	return fmt.Sprintf("ibmcos://%s/", s.bucket)
}

func (s *ibmcos) Create(ctx context.Context) error {
	input := &s3.CreateBucketInput{Bucket: &s.bucket}
	// https://cloud.ibm.com/docs/cloud-object-storage?topic=cloud-object-storage-classes&code=go
	if s.sc != "" {
		input.CreateBucketConfiguration = &s3.CreateBucketConfiguration{
			LocationConstraint: &s.sc,
		}
	}
	_, err := s.s3.CreateBucket(input)
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}

func (s *ibmcos) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		MinPartSize:              5 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (s *ibmcos) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	params := &s3.GetObjectInput{Bucket: &s.bucket, Key: &key}
	if off > 0 || limit > 0 {
		var r string
		if limit > 0 {
			r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1)
		} else {
			r = fmt.Sprintf("bytes=%d-", off)
		}
		params.Range = &r
	}
	var reqID string
	resp, err := s.s3.GetObjectWithContext(ctx, params, request.WithGetResponseHeader(s3RequestIDKey, &reqID))
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(reqID)
	if err != nil {
		return nil, err
	}
	if resp.StorageClass != nil {
		attrs.SetStorageClass(*resp.StorageClass)
	}
	return resp.Body, nil
}

func (s *ibmcos) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	var body io.ReadSeeker
	if b, ok := in.(io.ReadSeeker); ok {
		body = b
	} else {
		data, err := io.ReadAll(in)
		if err != nil {
			return err
		}
		body = bytes.NewReader(data)
	}
	mimeType := utils.GuessMimeType(key)
	params := &s3.PutObjectInput{
		Bucket:      &s.bucket,
		Key:         &key,
		Body:        body,
		ContentType: &mimeType,
	}
	if s.sc != "" {
		params.SetStorageClass(s.sc)
	}
	var reqID string
	_, err := s.s3.PutObjectWithContext(ctx, params, request.WithGetResponseHeader(s3RequestIDKey, &reqID))
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(reqID).SetStorageClass(s.sc)
	return err
}

func (s *ibmcos) Copy(ctx context.Context, dst, src string) error {
	src = s.bucket + "/" + src
	params := &s3.CopyObjectInput{
		Bucket:     &s.bucket,
		Key:        &dst,
		CopySource: &src,
	}
	if s.sc != "" {
		params.SetStorageClass(s.sc)
	}
	_, err := s.s3.CopyObjectWithContext(ctx, params)
	return err
}

func (s *ibmcos) Head(ctx context.Context, key string) (Object, error) {
	param := s3.HeadObjectInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	r, err := s.s3.HeadObjectWithContext(ctx, &param)
	if err != nil {
		if e, ok := err.(awserr.RequestFailure); ok && e.StatusCode() == http.StatusNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		key,
		*r.ContentLength,
		*r.LastModified,
		strings.HasSuffix(key, "/"),
		*r.StorageClass,
	}, nil
}

func (s *ibmcos) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	param := s3.DeleteObjectInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	var reqID string
	_, err := s.s3.DeleteObjectWithContext(ctx, &param, request.WithGetResponseHeader(s3RequestIDKey, &reqID))
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(reqID)
	return err
}

func (s *ibmcos) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	param := s3.ListObjectsInput{
		Bucket:       &s.bucket,
		Prefix:       &prefix,
		Marker:       &start,
		MaxKeys:      &limit,
		EncodingType: aws.String("url"),
	}
	if delimiter != "" {
		param.Delimiter = &delimiter
	}
	resp, err := s.s3.ListObjectsWithContext(ctx, &param)
	if err != nil {
		return nil, false, "", err
	}
	n := len(resp.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		o := resp.Contents[i]
		oKey, err := decodeKey(*o.Key, resp.EncodingType)
		if err != nil {
			return nil, false, "", errors.WithMessagef(err, "failed to decode key %s", *o.Key)
		}
		objs[i] = &obj{oKey, *o.Size, *o.LastModified, strings.HasSuffix(oKey, "/"), *o.StorageClass}
	}
	if delimiter != "" {
		for _, p := range resp.CommonPrefixes {
			prefix, err := decodeKey(*p.Prefix, resp.EncodingType)
			if err != nil {
				return nil, false, "", errors.WithMessagef(err, "failed to decode commonPrefixes %s", *p.Prefix)
			}
			objs = append(objs, &obj{prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, *resp.IsTruncated, *resp.NextMarker, nil
}

func (s *ibmcos) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (s *ibmcos) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	params := &s3.CreateMultipartUploadInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	if s.sc != "" {
		params.SetStorageClass(s.sc)
	}
	resp, err := s.s3.CreateMultipartUploadWithContext(ctx, params)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: *resp.UploadId, MinPartSize: 5 << 20, MaxCount: 10000}, nil
}

func (s *ibmcos) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	n := int64(num)
	params := &s3.UploadPartInput{
		Bucket:     &s.bucket,
		Key:        &key,
		UploadId:   &uploadID,
		Body:       bytes.NewReader(body),
		PartNumber: &n,
	}
	resp, err := s.s3.UploadPartWithContext(ctx, params)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: *resp.ETag}, nil
}

func (s *ibmcos) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	return nil, notSupported
}

func (s *ibmcos) AbortUpload(ctx context.Context, key string, uploadID string) {
	params := &s3.AbortMultipartUploadInput{
		Bucket:   &s.bucket,
		Key:      &key,
		UploadId: &uploadID,
	}
	_, _ = s.s3.AbortMultipartUploadWithContext(ctx, params)
}

func (s *ibmcos) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	var s3Parts []*s3.CompletedPart
	for i := range parts {
		n := new(int64)
		*n = int64(parts[i].Num)
		s3Parts = append(s3Parts, &s3.CompletedPart{ETag: &parts[i].ETag, PartNumber: n})
	}
	params := &s3.CompleteMultipartUploadInput{
		Bucket:          &s.bucket,
		Key:             &key,
		UploadId:        &uploadID,
		MultipartUpload: &s3.CompletedMultipartUpload{Parts: s3Parts},
	}
	_, err := s.s3.CompleteMultipartUploadWithContext(ctx, params)
	return err
}

func (s *ibmcos) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	input := &s3.ListMultipartUploadsInput{
		Bucket:    aws.String(s.bucket),
		KeyMarker: aws.String(marker),
	}
	// FIXME: parsing time "2018-08-23T12:23:26.046+08:00" as "2006-01-02T15:04:05Z"
	result, err := s.s3.ListMultipartUploadsWithContext(ctx, input)
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{*u.Key, *u.UploadId, *u.Initiated}
	}
	var nextMarker string
	if result.NextKeyMarker != nil {
		nextMarker = *result.NextKeyMarker
	}
	return parts, nextMarker, nil
}

func (s *ibmcos) SetStorageClass(sc string) error {
	s.sc = sc
	return nil
}

func newIBMCOS(endpoint, apiKey, serviceInstanceID, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, _ := url.ParseRequestURI(endpoint)
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	region := hostParts[2]
	authEndpoint := "https://iam.cloud.ibm.com/identity/token"
	serviceEndpoint := "https://" + strings.SplitN(uri.Host, ".", 2)[1]
	conf := aws.NewConfig().
		WithRegion(region).
		WithEndpoint(serviceEndpoint).
		WithCredentials(ibmiam.NewStaticCredentials(aws.NewConfig(),
			authEndpoint, apiKey, serviceInstanceID)).
		WithS3ForcePathStyle(defaultPathStyle())
	sess := session.Must(session.NewSession())
	client := s3.New(sess, conf)
	return &ibmcos{bucket: bucket, s3: client}, nil
}

func init() {
	Register("ibmcos", newIBMCOS)
}


================================================
FILE: pkg/object/interface.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"io"
	"time"
)

type Object interface {
	Key() string
	Size() int64
	Mtime() time.Time
	IsDir() bool
	IsSymlink() bool
	StorageClass() string
}

type obj struct {
	key   string
	size  int64
	mtime time.Time
	isDir bool
	sc    string
}

func (o *obj) Key() string          { return o.key }
func (o *obj) Size() int64          { return o.size }
func (o *obj) Mtime() time.Time     { return o.mtime }
func (o *obj) IsDir() bool          { return o.isDir }
func (o *obj) IsSymlink() bool      { return false }
func (o *obj) StorageClass() string { return o.sc }

type MultipartUpload struct {
	MinPartSize int
	MaxCount    int
	UploadID    string
}

type Part struct {
	Num  int
	Size int
	ETag string
}

type PendingPart struct {
	Key      string
	UploadID string
	Created  time.Time
}

type Limits struct {
	IsSupportMultipartUpload bool
	IsSupportUploadPartCopy  bool
	MinPartSize              int
	MaxPartSize              int64
	MaxPartCount             int
}

// ObjectStorage is the interface for object storage.
// all of these API should be idempotent.
type ObjectStorage interface {
	// Description of the object storage.
	String() string
	// Limits of the object storage.
	Limits() Limits
	// Create the bucket if not existed.
	Create(ctx context.Context) error
	// Get the data for the given object specified by key.
	Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error)
	// Put data read from a reader to an object specified by key.
	Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error
	// Copy an object from src to dst.
	Copy(ctx context.Context, dst, src string) error
	// Delete a object.
	Delete(ctx context.Context, key string, getters ...AttrGetter) error

	// Head returns some information about the object or an error if not found.
	Head(ctx context.Context, key string) (Object, error)
	// List returns a list of objects using ListObjectV2.
	List(ctx context.Context, prefix, startAfter, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error)
	// ListAll returns all the objects as a channel.
	ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error)

	// CreateMultipartUpload starts to upload a large object part by part.
	CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error)
	// UploadPart upload a part of an object.
	UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error)
	// UploadPartCopy Uploads a part by copying data from an existing object as data source.
	UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error)
	// AbortUpload abort a multipart upload.
	AbortUpload(ctx context.Context, key string, uploadID string)
	// CompleteUpload finish a multipart upload.
	CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error
	// ListUploads lists existing multipart uploads.
	ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error)
}

type Shutdownable interface {
	Shutdown()
}

func Shutdown(o ObjectStorage) {
	fn := func(o ObjectStorage) {
		if s, ok := o.(Shutdownable); ok {
			s.Shutdown()
		}
	}

	switch o := o.(type) {
	case *encrypted:
		fn(o.ObjectStorage)
	case *withPrefix:
		fn(o.os)
	case *sharded:
		for _, s := range o.stores {
			fn(s)
		}
	default:
		fn(o)
	}
}


================================================
FILE: pkg/object/ks3.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/pkg/errors"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/ks3sdklib/aws-sdk-go/aws"
	"github.com/ks3sdklib/aws-sdk-go/aws/awserr"
	"github.com/ks3sdklib/aws-sdk-go/aws/credentials"
	"github.com/ks3sdklib/aws-sdk-go/service/s3"
)

const s3StorageClassHdr = "X-Amz-Storage-Class"

type ks3 struct {
	bucket string
	s3     *s3.S3
	sc     string
}

func (s *ks3) String() string {
	return fmt.Sprintf("ks3://%s/", s.bucket)
}

func (s *ks3) Create(ctx context.Context) error {
	_, err := s.s3.CreateBucketWithContext(ctx, &s3.CreateBucketInput{Bucket: &s.bucket})
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}

func (s *ks3) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              5 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (s *ks3) Head(ctx context.Context, key string) (Object, error) {
	param := s3.HeadObjectInput{
		Bucket: &s.bucket,
		Key:    &key,
	}

	r, err := s.s3.HeadObjectWithContext(ctx, &param)
	if err != nil {
		if e, ok := err.(awserr.RequestFailure); ok && e.StatusCode() == http.StatusNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}

	var sc string
	if val, ok := r.Metadata[s3StorageClassHdr]; ok {
		sc = *val
	} else {
		sc = "STANDARD"
	}
	return &obj{
		key,
		*r.ContentLength,
		*r.LastModified,
		strings.HasSuffix(key, "/"),
		sc,
	}, nil
}

func (s *ks3) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	params := &s3.GetObjectInput{Bucket: &s.bucket, Key: &key}
	if off > 0 || limit > 0 {
		var r string
		if limit > 0 {
			r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1)
		} else {
			r = fmt.Sprintf("bytes=%d-", off)
		}
		params.Range = &r
	}
	resp, err := s.s3.GetObjectWithContext(ctx, params)
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(aws.ToString(resp.Metadata[s3RequestIDKey]))
		attrs.SetStorageClass(aws.ToString(resp.Metadata[s3StorageClassHdr]))
	}
	if err != nil {
		return nil, err
	}
	return resp.Body, nil
}

func (s *ks3) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	var body io.ReadSeeker
	if b, ok := in.(io.ReadSeeker); ok {
		body = b
	} else {
		data, err := io.ReadAll(in)
		if err != nil {
			return err
		}
		body = bytes.NewReader(data)
	}
	mimeType := utils.GuessMimeType(key)
	params := &s3.PutObjectInput{
		Bucket:      &s.bucket,
		Key:         &key,
		Body:        body,
		ContentType: &mimeType,
	}
	if s.sc != "" {
		params.StorageClass = aws.String(s.sc)
	}
	resp, err := s.s3.PutObjectWithContext(ctx, params)
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(aws.ToString(resp.Metadata[s3RequestIDKey])).SetStorageClass(s.sc)
	}
	return err
}
func (s *ks3) Copy(ctx context.Context, dst, src string) error {
	src = s.bucket + "/" + src
	params := &s3.CopyObjectInput{
		Bucket:     &s.bucket,
		Key:        &dst,
		CopySource: &src,
	}
	if s.sc != "" {
		params.StorageClass = aws.String(s.sc)
	}
	_, err := s.s3.CopyObjectWithContext(ctx, params)
	return err
}

func (s *ks3) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	param := s3.DeleteObjectInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	resp, err := s.s3.DeleteObjectWithContext(ctx, &param)
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(aws.ToString(resp.Metadata[s3RequestIDKey]))
	}
	if e, ok := err.(awserr.RequestFailure); ok && e.StatusCode() == http.StatusNotFound {
		return nil
	}
	return err
}

func (s *ks3) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	param := s3.ListObjectsInput{
		Bucket:       &s.bucket,
		Prefix:       &prefix,
		Marker:       &start,
		MaxKeys:      &limit,
		EncodingType: aws.String("url"),
	}
	if delimiter != "" {
		param.Delimiter = &delimiter
	}
	resp, err := s.s3.ListObjectsWithContext(ctx, &param)
	if err != nil {
		return nil, false, "", err
	}
	n := len(resp.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		o := resp.Contents[i]
		oKey, err := decodeKey(*o.Key, resp.EncodingType)
		if err != nil {
			return nil, false, "", errors.WithMessagef(err, "failed to decode key %s", *o.Key)
		}
		objs[i] = &obj{oKey, *o.Size, *o.LastModified, strings.HasSuffix(oKey, "/"), *o.StorageClass}
	}
	if delimiter != "" {
		for _, p := range resp.CommonPrefixes {
			prefix, err := decodeKey(*p.Prefix, resp.EncodingType)
			if err != nil {
				return nil, false, "", errors.WithMessagef(err, "failed to decode commonPrefixes %s", *p.Prefix)
			}
			objs = append(objs, &obj{prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	var nextMarker string
	if resp.NextMarker != nil {
		nextMarker = *resp.NextMarker
	}
	return objs, *resp.IsTruncated, nextMarker, nil
}

func (s *ks3) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (s *ks3) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	params := &s3.CreateMultipartUploadInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	if s.sc != "" {
		params.StorageClass = aws.String(s.sc)
	}
	resp, err := s.s3.CreateMultipartUploadWithContext(ctx, params)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: *resp.UploadID, MinPartSize: 5 << 20, MaxCount: 10000}, nil
}

func (s *ks3) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	n := int64(num)
	params := &s3.UploadPartInput{
		Bucket:     &s.bucket,
		Key:        &key,
		UploadID:   &uploadID,
		Body:       bytes.NewReader(body),
		PartNumber: &n,
	}
	resp, err := s.s3.UploadPartWithContext(ctx, params)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: *resp.ETag}, nil
}

func (s *ks3) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	resp, err := s.s3.UploadPartCopyWithContext(ctx, &s3.UploadPartCopyInput{
		Bucket:          aws.String(s.bucket),
		CopySource:      aws.String(s.bucket + "/" + srcKey),
		CopySourceRange: aws.String(fmt.Sprintf("bytes=%d-%d", off, off+size-1)),
		Key:             aws.String(key),
		PartNumber:      aws.Long(int64(num)),
		UploadID:        aws.String(uploadID),
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: *resp.CopyPartResult.ETag}, nil
}

func (s *ks3) AbortUpload(ctx context.Context, key string, uploadID string) {
	params := &s3.AbortMultipartUploadInput{
		Bucket:   &s.bucket,
		Key:      &key,
		UploadID: &uploadID,
	}
	_, _ = s.s3.AbortMultipartUploadWithContext(ctx, params)
}

func (s *ks3) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	var s3Parts []*s3.CompletedPart
	for i := range parts {
		n := new(int64)
		*n = int64(parts[i].Num)
		s3Parts = append(s3Parts, &s3.CompletedPart{ETag: &parts[i].ETag, PartNumber: n})
	}
	params := &s3.CompleteMultipartUploadInput{
		Bucket:          &s.bucket,
		Key:             &key,
		UploadID:        &uploadID,
		MultipartUpload: &s3.CompletedMultipartUpload{Parts: s3Parts},
	}
	_, err := s.s3.CompleteMultipartUploadWithContext(ctx, params)
	return err
}

func (s *ks3) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	input := &s3.ListMultipartUploadsInput{
		Bucket:    aws.String(s.bucket),
		KeyMarker: aws.String(marker),
	}
	// FIXME: parsing time "2018-08-23T12:23:26.046+08:00" as "2006-01-02T15:04:05Z"
	result, err := s.s3.ListMultipartUploadsWithContext(ctx, input)
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{*u.Key, *u.UploadID, *u.Initiated}
	}
	var nextMarker string
	if result.NextKeyMarker != nil {
		nextMarker = *result.NextKeyMarker
	}
	return parts, nextMarker, nil
}

func (s *ks3) SetStorageClass(sc string) error {
	s.sc = sc
	return nil
}

var ks3Regions = map[string]string{
	"cn-beijing":   "BEIJING",
	"cn-shanghai":  "SHANGHAI",
	"cn-guangzhou": "GUANGZHOU",
	"cn-qingdao":   "QINGDAO",
	"jr-beijing":   "JR_BEIJING",
	"jr-shanghai":  "JR_SHANGHAI",
	"":             "HANGZHOU",
	"cn-hk-1":      "HONGKONG",
	"rus":          "RUSSIA",
	"sgp":          "SINGAPORE",
}

func newKS3(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, _ := url.ParseRequestURI(endpoint)
	ssl := strings.ToLower(uri.Scheme) == "https"
	hostParts := strings.Split(uri.Host, ".")
	if len(hostParts) < 2 {
		return nil, fmt.Errorf("invalid endpoint: %s", endpoint)
	}
	bucket := hostParts[0]
	region := hostParts[1][3:]
	region = strings.TrimLeft(region, "-")
	var pathStyle bool = defaultPathStyle()
	if strings.HasSuffix(uri.Host, "ksyun.com") || strings.HasSuffix(uri.Host, "ksyuncs.com") {
		region = strings.TrimSuffix(region, "-internal")
		region = ks3Regions[region]
		pathStyle = false
	} else if envRegion := os.Getenv("AWS_REGION"); envRegion != "" {
		region = envRegion
	}
	if region == "" {
		region = "us-east-1"
	}

	var err error
	accessKey, err = url.PathUnescape(accessKey)
	if err != nil {
		return nil, fmt.Errorf("unescape access key: %s", err)
	}
	secretKey, err = url.PathUnescape(secretKey)
	if err != nil {
		return nil, fmt.Errorf("unescape secret key: %s", err)
	}
	awsConfig := &aws.Config{
		Region:           region,
		Endpoint:         strings.SplitN(uri.Host, ".", 2)[1],
		DisableSSL:       !ssl,
		HTTPClient:       httpClient,
		S3ForcePathStyle: pathStyle,
		Credentials:      credentials.NewStaticCredentials(accessKey, secretKey, token),
	}

	return &ks3{bucket: bucket, s3: s3.New(awsConfig)}, nil
}

func init() {
	Register("ks3", newKS3)
}


================================================
FILE: pkg/object/mem.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"os"
	"sort"
	"strings"
	"sync"
	"time"
)

type mobj struct {
	data  []byte
	mtime time.Time
	mode  os.FileMode
	owner string
	group string
}

type memStore struct {
	sync.Mutex
	DefaultObjectStorage
	name    string
	objects map[string]*mobj
}

func (m *memStore) String() string {
	return fmt.Sprintf("mem://%s/", m.name)
}

func (m *memStore) Head(ctx context.Context, key string) (Object, error) {
	m.Lock()
	defer m.Unlock()
	// Minimum length is 1.
	if key == "" {
		return nil, errors.New("object key cannot be empty")
	}
	o, ok := m.objects[key]
	if !ok {
		return nil, os.ErrNotExist
	}
	f := &file{
		obj{
			key,
			int64(len(o.data)),
			o.mtime,
			strings.HasSuffix(key, "/"),
			"",
		},
		o.owner,
		o.group,
		o.mode,
		false,
	}
	return f, nil
}

func (m *memStore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	m.Lock()
	defer m.Unlock()
	// Minimum length is 1.
	if key == "" {
		return nil, errors.New("object key cannot be empty")
	}
	d, ok := m.objects[key]
	if !ok {
		return nil, errors.New("not exists")
	}
	if off > int64(len(d.data)) {
		off = int64(len(d.data))
	}
	data := d.data[off:]
	if limit > 0 && limit < int64(len(data)) {
		data = data[:limit]
	}
	return io.NopCloser(bytes.NewBuffer(data)), nil
}

func (m *memStore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	m.Lock()
	defer m.Unlock()
	// Minimum length is 1.
	if key == "" {
		return errors.New("object key cannot be empty")
	}
	_, ok := m.objects[key]
	if ok {
		logger.Debugf("overwrite %s", key)
	}
	data, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	m.objects[key] = &mobj{data: data, mtime: time.Now()}
	return nil
}

func (m *memStore) Copy(ctx context.Context, dst, src string) error {
	d, err := m.Get(ctx, src, 0, -1)
	if err != nil {
		return err
	}
	return m.Put(ctx, dst, d)
}

func (m *memStore) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	m.Lock()
	defer m.Unlock()
	delete(m.objects, key)
	return nil
}

func (m *memStore) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	m.Lock()
	defer m.Unlock()

	objs := make([]Object, 0)
	commonPrefixsMap := make(map[string]bool, 0)
	for k := range m.objects {
		if strings.HasPrefix(k, prefix) && k > marker {
			o := m.objects[k]
			if delimiter != "" {
				remainString := strings.TrimPrefix(k, prefix)
				if pos := strings.Index(remainString, delimiter); pos != -1 {
					commonPrefix := remainString[0 : pos+1]
					if _, ok := commonPrefixsMap[commonPrefix]; ok {
						continue
					}
					f := &file{
						obj{
							prefix + commonPrefix,
							0,
							time.Unix(0, 0),
							strings.HasSuffix(commonPrefix, "/"),
							"",
						},
						o.owner,
						o.group,
						o.mode,
						false,
					}
					objs = append(objs, f)
					commonPrefixsMap[commonPrefix] = true
					continue
				}
			}

			f := &file{
				obj{
					k,
					int64(len(o.data)),
					o.mtime,
					strings.HasSuffix(k, "/"),
					"",
				},
				o.owner,
				o.group,
				o.mode,
				false,
			}
			objs = append(objs, f)
		}
	}
	sort.Slice(objs, func(i, j int) bool {
		return objs[i].Key() < objs[j].Key()
	})
	if int64(len(objs)) > limit {
		objs = objs[:limit]
	}
	return generateListResult(objs, limit)
}

func newMem(endpoint, accesskey, secretkey, token string) (ObjectStorage, error) {
	store := &memStore{name: endpoint}
	store.objects = make(map[string]*mobj)
	return store, nil
}

func init() {
	Register("mem", newMem)
}


================================================
FILE: pkg/object/minio.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"fmt"
	"net/url"
	"os"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
)

type minio struct {
	s3client
}

func (m *minio) String() string {
	if m.s3.Options().BaseEndpoint != nil {
		endpoint := *m.s3.Options().BaseEndpoint
		if idx := strings.Index(endpoint, "://"); idx >= 0 {
			endpoint = endpoint[idx+3:]
		}
		return fmt.Sprintf("minio://%s/%s/", endpoint, m.bucket)
	}
	return fmt.Sprintf("minio://%s/", m.bucket)
}

func (m *minio) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              5 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func newMinio(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("http://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	ssl := strings.ToLower(uri.Scheme) == "https"
	region := uri.Query().Get("region")
	if region == "" {
		region = os.Getenv("MINIO_REGION")
	}
	if region == "" {
		region = awsDefaultRegion
	}
	if accessKey == "" {
		accessKey = os.Getenv("MINIO_ACCESS_KEY")
	}
	if secretKey == "" {
		secretKey = os.Getenv("MINIO_SECRET_KEY")
	}
	var cfg aws.Config
	if accessKey != "" {
		cfg, err = config.LoadDefaultConfig(ctx,
			config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	} else {
		cfg, err = config.LoadDefaultConfig(ctx)
	}
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(cfg, func(options *s3.Options) {
		options.Region = region
		options.BaseEndpoint = aws.String(uri.Scheme + "://" + uri.Host)
		options.EndpointOptions.DisableHTTPS = !ssl
		options.UsePathStyle = defaultPathStyle()
		options.HTTPClient = httpClient
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})
	if len(uri.Path) < 2 {
		return nil, fmt.Errorf("no bucket name provided in %s", endpoint)
	}
	bucket := uri.Path[1:]
	if strings.Contains(bucket, "/") && strings.HasPrefix(bucket, "minio/") {
		bucket = bucket[len("minio/"):]
	}
	bucket = strings.Split(bucket, "/")[0]
	return &minio{s3client{bucket: bucket, s3: client, region: region}}, nil
}

func init() {
	Register("minio", newMinio)
}


================================================
FILE: pkg/object/nfs.go
================================================
//go:build !nonfs
// +build !nonfs

/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"os"
	"os/user"
	"path"
	"sort"
	"strings"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
	"github.com/vmware/go-nfs-client/nfs"
	"github.com/vmware/go-nfs-client/nfs/rpc"
)

var _ ObjectStorage = (*nfsStore)(nil)

type nfsStore struct {
	DefaultObjectStorage
	username string
	host     string
	root     string
	fmode    os.FileMode
	dmode    os.FileMode

	target *nfs.Target
}

type nfsEntry struct {
	*nfs.EntryPlus
	name      string
	fi        os.FileInfo
	isSymlink bool
}

func (e *nfsEntry) Name() string {
	return e.name
}

func (e *nfsEntry) Size() int64 {
	if e.fi != nil {
		return e.fi.Size()
	}
	return e.EntryPlus.Size()
}

func (e *nfsEntry) Info() (os.FileInfo, error) {
	if e.fi != nil {
		return e.fi, nil
	}
	return e.EntryPlus, nil
}

func (e *nfsEntry) IsDir() bool {
	if e.fi != nil {
		return e.fi.IsDir()
	}
	return e.EntryPlus.IsDir()
}

func (n *nfsStore) String() string {
	return fmt.Sprintf("nfs://%s@%s:%s", n.username, n.host, n.root)
}

func (n *nfsStore) path(key string) string {
	if key == "" {
		return "./"
	}
	return key
}

func (n *nfsStore) Head(ctx context.Context, key string) (Object, error) {
	p := n.path(key)
	fi, _, err := n.target.Lookup(p)
	if err != nil {
		return nil, err
	}
	if attr, ok := fi.(*nfs.Fattr); ok && attr.Type == nfs.NF3Lnk {
		src, err := n.Readlink(p)
		if err != nil {
			return nil, err
		}
		dir, _ := path.Split(p)
		ff, err := n.Head(ctx, path.Join(dir, src))
		if err != nil {
			return nil, err
		}
		if f2, ok := ff.(*file); ok {
			f2.isSymlink = true
		}
		return ff, nil
	}
	return n.fileInfo(key, fi), nil
}

func (n *nfsStore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	p := n.path(key)
	if strings.HasSuffix(p, "/") {
		return io.NopCloser(bytes.NewBuffer([]byte{})), nil
	}

	ff, err := n.target.Open(p)
	if err != nil {
		return nil, errors.Wrapf(err, "open %s", p)
	}

	if limit > 0 {
		return &SectionReaderCloser{
			SectionReader: io.NewSectionReader(ff, off, limit),
			Closer:        ff,
		}, nil
	}
	return ff, err
}

func (n *nfsStore) mkdirAll(p string) error {
	p = strings.TrimSuffix(p, "/")
	fi, _, err := n.target.Lookup(p)
	if err == nil {
		if fi.IsDir() {
			logger.Tracef("nfs mkdir: path %s already exists", p)
			return nil
		} else {
			return syscall.ENOTDIR
		}
	} else if !os.IsNotExist(err) {
		return err
	}

	dir, _ := path.Split(p)
	if dir != "." {
		if err = n.mkdirAll(dir); err != nil {
			return err
		}
	}
	_, err = n.target.Mkdir(p, n.dmode)
	return err
}

func (n *nfsStore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) (err error) {
	p := n.path(key)
	if strings.HasSuffix(p, dirSuffix) {
		return n.mkdirAll(p)
	}
	var tmp string
	if PutInplace {
		tmp = p
	} else {
		name := path.Base(p)
		if len(name) > 200 {
			name = name[:200]
		}
		tmp = TmpFilePath(p, name)
		defer func() {
			if err != nil {
				_ = n.target.Remove(tmp)
			}
		}()
	}
	_, err = n.target.Create(tmp, n.fmode)
	if os.IsNotExist(err) {
		_ = n.mkdirAll(path.Dir(p))
		_, err = n.target.Create(tmp, n.fmode)
	}
	if os.IsExist(err) {
		_ = n.target.Remove(tmp)
		_, err = n.target.Create(tmp, n.fmode)
	}
	if err != nil {
		return errors.Wrapf(err, "create %s", tmp)
	}
	ff, err := n.target.Open(tmp)
	if err != nil {
		return err
	}

	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	_, err = io.CopyBuffer(ff, in, *buf)
	if err != nil {
		_ = ff.Close()
		return err
	}
	err = ff.Close()
	if err != nil {
		return err
	}
	if !PutInplace {
		// overwrite dst
		err = n.target.Rename(tmp, p)
	}
	return err
}

func (n *nfsStore) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	path := n.path(key)
	if path == "./" {
		return nil
	}
	fi, _, err := n.target.Lookup(path)
	if err != nil {
		if nfs.IsNotDirError(err) || os.IsNotExist(err) {
			return nil
		}
		return err
	}
	p := strings.TrimSuffix(path, "/")
	if fi.IsDir() {
		err = n.target.RmDir(p)
	} else {
		err = n.target.Remove(p)
	}
	if err != nil && os.IsNotExist(err) {
		err = nil
	}
	return err
}

func (n *nfsStore) fileInfo(key string, fi os.FileInfo) Object {
	owner, group := n.getOwnerGroup(fi)
	isSymlink := fi.Mode()&os.ModeSymlink != 0
	ff := &file{
		obj{key, fi.Size(), fi.ModTime(), fi.IsDir(), ""},
		owner,
		group,
		fi.Mode(),
		isSymlink,
	}
	if fi.IsDir() {
		if key != "" && !strings.HasSuffix(key, "/") {
			ff.key += "/"
		}
		ff.size = 0
	}
	return ff
}

func (n *nfsStore) readDirSorted(ctx context.Context, dir string, followLink bool) ([]*nfsEntry, error) {
	o, err := n.Head(ctx, strings.TrimSuffix(dir, "/"))
	if err != nil {
		return nil, err
	}
	dirname := o.Key()
	entries, err := n.target.ReadDirPlus(dirname)
	if err != nil {
		return nil, errors.Wrapf(err, "readdir %s", dirname)
	}
	nfsEntries := make([]*nfsEntry, len(entries))
	for i, e := range entries {
		if e.IsDir() {
			nfsEntries[i] = &nfsEntry{e, e.Name() + dirSuffix, nil, false}
		} else if e.Attr.Attr.Type == nfs.NF3Lnk && followLink {
			// follow symlink
			nfsEntries[i] = &nfsEntry{e, e.Name(), nil, true}
			src, err := n.Readlink(path.Join(dirname, e.Name()))
			if err != nil {
				logger.Errorf("readlink %s: %s", e.Name(), err)
				continue
			}
			srcPath := path.Clean(path.Join(dirname, src))
			fi, _, err := n.target.Lookup(srcPath)
			if err != nil {
				logger.Warnf("follow link `%s`: lookup `%s`: %s", path.Join(dirname, e.Name()), srcPath, err)
				continue
			}
			name := e.Name()
			if fi.IsDir() {
				name = e.Name() + dirSuffix
			}
			nfsEntries[i] = &nfsEntry{e, name, fi, false}
		} else {
			nfsEntries[i] = &nfsEntry{e, e.Name(), nil, e.Attr.Attr.Type == nfs.NF3Lnk}
		}
	}
	sort.Slice(nfsEntries, func(i, j int) bool { return nfsEntries[i].Name() < nfsEntries[j].Name() })
	return nfsEntries, err
}

func (n *nfsStore) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}
	dir := prefix
	var objs []Object
	if dir != "" && !strings.HasSuffix(dir, dirSuffix) {
		dir = path.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := n.Head(ctx, dir)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}
	entries, err := n.readDirSorted(ctx, dir, followLink)
	if err != nil {
		if os.IsPermission(err) || errors.Is(err, nfs.NFS3Error(nfs.NFS3ErrAcces)) {
			logger.Warnf("skip %s: %s", dir, err)
			return nil, false, "", nil
		}
		if os.IsNotExist(err) {
			return nil, false, "", nil
		}
		return nil, false, "", err
	}
	for _, e := range entries {
		p := path.Join(dir, e.Name())
		if e.IsDir() && !e.isSymlink {
			p = p + "/"
		}
		if !strings.HasPrefix(p, prefix) || (marker != "" && p <= marker) {
			continue
		}
		f := toFile(p, e, e.isSymlink, n.getOwnerGroup)
		objs = append(objs, f)
		if len(objs) == int(limit) {
			break
		}
	}
	return generateListResult(objs, limit)
}

func (n *nfsStore) setAttr(path string, attrSet func(attr *nfs.Fattr) nfs.Sattr3) error {
	p := n.path(path)
	fi, fh, err := n.target.Lookup(p)
	if err != nil {
		return err
	}
	fattr := fi.(*nfs.Fattr)
	_, err = n.target.SetAttr(fh, attrSet(fattr))
	return err
}

func (n *nfsStore) Chtimes(path string, mtime time.Time) error {
	return n.setAttr(path, func(attr *nfs.Fattr) nfs.Sattr3 {
		return nfs.Sattr3{
			Mtime: nfs.SetTime{
				SetIt: nfs.SetToClientTime,
				Time: nfs.NFS3Time{
					Seconds:  uint32(mtime.Unix()),
					Nseconds: uint32(mtime.Nanosecond()),
				},
			},
		}
	})
}

func (n *nfsStore) Chmod(path string, mode os.FileMode) error {
	return n.setAttr(path, func(attr *nfs.Fattr) nfs.Sattr3 {
		return nfs.Sattr3{
			Mode: nfs.SetMode{
				SetIt: true,
				Mode:  uint32(mode),
			},
		}
	})
}

func (n *nfsStore) Chown(path string, owner, group string) error {
	uid := utils.LookupUser(owner)
	gid := utils.LookupGroup(group)
	if uid == -1 || gid == -1 {
		return fmt.Errorf("user(%s):group(%s) not found", owner, group)
	}
	return n.setAttr(path, func(attr *nfs.Fattr) nfs.Sattr3 {
		return nfs.Sattr3{
			UID: nfs.SetUID{
				SetIt: true,
				UID:   uint32(uid),
			},
			GID: nfs.SetUID{
				SetIt: true,
				UID:   uint32(gid),
			},
		}
	})
}

func (n *nfsStore) Symlink(oldName, newName string) error {
	newName = strings.TrimRight(newName, "/")
	p := n.path(newName)
	dir := path.Dir(p)
	if _, _, err := n.target.Lookup(dir); err != nil && os.IsNotExist(err) {
		if _, err := n.target.Mkdir(dir, n.dmode); err != nil && !os.IsExist(err) {
			return errors.Wrapf(err, "mkdir %s", dir)
		}
	} else if err != nil && !os.IsNotExist(err) {
		return err
	}
	return n.target.Symlink(n.path(oldName), n.path(newName))
}

func (n *nfsStore) Readlink(name string) (string, error) {
	f, err := n.target.Open(n.path(name))
	if err != nil {
		return "", errors.Wrapf(err, "open %s", name)
	}
	return f.Readlink()
}

func (n *nfsStore) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (n *nfsStore) findOwnerGroup(attr *nfs.Fattr) (string, string) {
	return utils.UserName(int(attr.UID)), utils.GroupName(int(attr.GID))
}

func (n *nfsStore) getOwnerGroup(info os.FileInfo) (string, string) {
	if st, match := info.(*nfs.Fattr); match {
		return n.findOwnerGroup(st)
	}
	if st, match := info.Sys().(*nfs.Fattr); match {
		return n.findOwnerGroup(st)
	}
	return "", ""
}

func newNFSStore(addr, username, pass, token string) (ObjectStorage, error) {
	if username == "" {
		u, err := user.Current()
		if err != nil {
			return nil, fmt.Errorf("current user: %s", err)
		}
		username = u.Username
	}
	b := strings.Split(addr, ":")
	if len(b) != 2 {
		return nil, fmt.Errorf("invalid NFS address %s", addr)
	}
	host := b[0]
	path := b[1]
	mount, err := nfs.DialMount(host, time.Second*3)
	if err != nil {
		return nil, fmt.Errorf("unable to dial MOUNT service %s: %v", addr, err)
	}
	auth := rpc.NewAuthUnix(username, uint32(utils.GetCurrentUID()), uint32(utils.GetCurrentGID()))
	target, err := mount.Mount(path, auth.Auth())
	target.Config.DirCount = 1 << 17
	// Readdir returns up to 1M at a time, even if MaxCount is set larger
	target.Config.MaxCount = 1 << 20
	if err != nil {
		return nil, fmt.Errorf("unable to mount %s: %v", addr, err)
	}
	umask := utils.GetUmask()
	return &nfsStore{
		username: username,
		host:     host,
		root:     path,
		fmode:    os.FileMode(0666 &^ umask),
		dmode:    os.FileMode(0777 &^ umask),
		target:   target}, nil
}

func init() {
	Register("nfs", newNFSStore)
}


================================================
FILE: pkg/object/object_storage.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"math/rand"
	"net/url"
	"os"
	"path/filepath"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
)

var ctx = context.Background()
var logger = utils.GetLogger("juicefs")

var UserAgent = "JuiceFS"

type MtimeChanger interface {
	Chtimes(path string, mtime time.Time) error
}

type SupportSymlink interface {
	// Symlink create a symbolic link
	Symlink(oldName, newName string) error
	// Readlink read a symbolic link
	Readlink(name string) (string, error)
}

type File interface {
	Object
	Owner() string
	Group() string
	Mode() os.FileMode
}

type onlyWriter struct {
	io.Writer
}

type file struct {
	obj
	owner     string
	group     string
	mode      os.FileMode
	isSymlink bool
}

func (f *file) Owner() string     { return f.owner }
func (f *file) Group() string     { return f.group }
func (f *file) Mode() os.FileMode { return f.mode }
func (f *file) IsSymlink() bool   { return f.isSymlink }

func MarshalObject(o Object) map[string]interface{} {
	m := make(map[string]interface{})
	m["key"] = o.Key()
	m["size"] = o.Size()
	m["mtime"] = strconv.FormatInt(o.Mtime().UnixNano(), 10)
	m["isdir"] = o.IsDir()
	if f, ok := o.(File); ok {
		m["mode"] = f.Mode()
		m["owner"] = f.Owner()
		m["group"] = f.Group()
		m["isSymlink"] = f.IsSymlink()
	}
	return m
}

func UnmarshalObject(m map[string]interface{}) Object {
	mtime_int64, _ := strconv.ParseInt(m["mtime"].(string), 10, 64)
	mtime := time.Unix(0, mtime_int64)
	o := obj{
		key:   m["key"].(string),
		size:  int64(m["size"].(float64)),
		mtime: mtime,
		isDir: m["isdir"].(bool)}
	if _, ok := m["mode"]; ok {
		f := file{o, m["owner"].(string), m["group"].(string), os.FileMode(m["mode"].(float64)), m["isSymlink"].(bool)}
		return &f
	}
	return &o
}

type FileSystem interface {
	MtimeChanger
	Chmod(path string, mode os.FileMode) error
	Chown(path string, owner, group string) error
}

var notSupported = utils.ENOTSUP

type DefaultObjectStorage struct{}

func (s DefaultObjectStorage) Create(ctx context.Context) error {
	return nil
}

func (s DefaultObjectStorage) Limits() Limits {
	return Limits{IsSupportMultipartUpload: false, IsSupportUploadPartCopy: false}
}

func (s DefaultObjectStorage) Head(key string) (Object, error) {
	return nil, notSupported
}

func (s DefaultObjectStorage) Copy(ctx context.Context, dst, src string) error {
	return notSupported
}

func (s DefaultObjectStorage) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	return nil, notSupported
}

func (s DefaultObjectStorage) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	return nil, notSupported
}

func (s DefaultObjectStorage) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	return nil, notSupported
}

func (s DefaultObjectStorage) AbortUpload(ctx context.Context, key string, uploadID string) {}

func (s DefaultObjectStorage) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	return notSupported
}

func (s DefaultObjectStorage) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	return nil, "", nil
}

func (s DefaultObjectStorage) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	return nil, false, "", notSupported
}

func (s DefaultObjectStorage) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

type Creator func(bucket, accessKey, secretKey, token string) (ObjectStorage, error)

var storages = make(map[string]Creator)

func Register(name string, register Creator) {
	storages[name] = register
}

func CreateStorage(name, endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	f, ok := storages[name]
	if ok {
		logger.Debugf("Creating %s storage at endpoint %s", name, endpoint)
		return f(endpoint, accessKey, secretKey, token)
	}
	return nil, fmt.Errorf("invalid storage: %s", name)
}

var bufPool = sync.Pool{
	New: func() interface{} {
		// Default io.Copy uses 32KB buffer, here we choose a larger one (1MiB io-size increases throughput by ~20%)
		buf := make([]byte, 1<<20)
		return &buf
	},
}

type listThread struct {
	sync.Mutex
	cond      *utils.Cond
	ready     bool
	err       error
	entries   []Object
	nextToken string
	hasMore   bool
}

func (l *listThread) reset() {
	l.err = nil
	l.entries = nil
	l.nextToken = ""
	l.hasMore = false
}

func ListAllWithDelimiter(ctx context.Context, store ObjectStorage, prefix, start, end string, followLink bool) (<-chan Object, error) {
	entries, _, _, err := store.List(ctx, prefix, start, "", "/", 1e9, followLink)
	if err != nil {
		logger.Errorf("list %s: %s", prefix, err)
		return nil, err
	}

	listed := make(chan Object, 10240)
	var walk func(string, []Object) error
	walk = func(prefix string, entries []Object) error {
		var concurrent = 10
		var err error
		threads := make([]listThread, concurrent)
		for c := 0; c < concurrent; c++ {
			t := &threads[c]
			t.cond = utils.NewCond(t)
			go func(c int) {
				for i := c; i < len(entries); i += concurrent {
					key := entries[i].Key()
					if end != "" && key >= end {
						break
					}
					if key < start && !strings.HasPrefix(start, key) {
						continue
					}
					if !entries[i].IsDir() || key == prefix {
						continue
					}
					t.entries, t.hasMore, t.nextToken, t.err = store.List(ctx, key, "\x00", t.nextToken, "/", 1000, followLink) // exclude itself
					t.Lock()
					t.ready = true
					t.cond.Signal()
					for t.ready {
						t.cond.WaitWithTimeout(time.Second)
						if err != nil {
							t.Unlock()
							return
						}
					}
					t.Unlock()
				}
			}(c)
		}

		for i, e := range entries {
			key := e.Key()
			if end != "" && key >= end {
				return nil
			}
			if key >= start {
				listed <- e
			} else if !strings.HasPrefix(start, key) {
				continue
			}
			if !e.IsDir() || key == prefix {
				continue
			}

			t := &threads[i%concurrent]
			t.Lock()
			for !t.ready {
				t.cond.WaitWithTimeout(time.Millisecond * 10)
			}
			if t.err != nil {
				err = t.err
				t.Unlock()
				return err
			}
			for t.hasMore {
				var more []Object
				startAfter := t.entries[len(t.entries)-1].Key()
				more, t.hasMore, t.nextToken, t.err = store.List(ctx, key, startAfter, t.nextToken, "/", 1e9, followLink)
				if t.err != nil {
					err = t.err
					t.Unlock()
					return err
				}
				t.entries = append(t.entries, more...)
			}
			t.ready = false
			t.cond.Signal()
			children := t.entries
			t.reset()
			t.Unlock()

			err = walk(key, children)
			if err != nil {
				return err
			}
		}
		return nil
	}

	go func() {
		defer close(listed)
		err := walk(prefix, entries)
		if err != nil {
			listed <- nil
		}
	}()
	return listed, nil
}

func generateListResult(objs []Object, limit int64) ([]Object, bool, string, error) {
	var nextMarker string
	if len(objs) > 0 {
		nextMarker = objs[len(objs)-1].Key()
	}
	return objs, len(objs) == int(limit), nextMarker, nil
}

func decodeKey(value string, typ *string) (string, error) {
	if typ != nil && *typ == "url" {
		return url.QueryUnescape(value)
	}
	return value, nil
}

func TmpFilePath(parent, name string) string {
	return filepath.Join(filepath.Dir(parent), ".jfs."+name+".tmp."+strconv.Itoa(rand.Int()))
}


================================================
FILE: pkg/object/object_storage_test.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/rand"
	"crypto/rsa"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"math"
	"os"
	"reflect"
	"regexp"
	"sort"
	"strings"
	"sync"
	"testing"
	"time"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/baidubce/bce-sdk-go/services/bos/api"
	"github.com/huaweicloud/huaweicloud-sdk-go-obs/obs"

	"github.com/colinmarc/hdfs/v2/hadoopconf"
	"github.com/juicedata/juicefs/pkg/utils"

	blob2 "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"

	"github.com/volcengine/ve-tos-golang-sdk/v2/tos/enum"

	"github.com/redis/go-redis/v9"
)

func get(s ObjectStorage, k string, off, limit int64, getters ...AttrGetter) (string, error) {
	r, err := s.Get(context.Background(), k, off, limit, getters...)
	if err != nil {
		return "", err
	}
	defer r.Close()
	data, err := io.ReadAll(r)
	if err != nil {
		return "", err
	}
	return string(data), nil
}

func listAll(ctx context.Context, s ObjectStorage, prefix, marker string, limit int64, followLink bool) ([]Object, error) {
	ch, err := ListAll(ctx, s, prefix, marker, followLink, true)
	if err == nil {
		objs := make([]Object, 0)
		for obj := range ch {
			if len(objs) < int(limit) {
				objs = append(objs, obj)
			}
		}
		return objs, nil
	}
	return nil, err
}

func setStorageClass(o ObjectStorage) string {
	if osc, ok := o.(SupportStorageClass); ok {
		var sc = "STANDARD_IA"
		switch o.(type) {
		case *wasb:
			sc = string(blob2.AccessTierCool)
		case *gs:
			sc = "NEARLINE"
		case *ossClient:
			sc = string(oss.StorageClassIA)
		case *tosClient:
			sc = string(enum.StorageClassIa)
		case *obsClient:
			sc = string(obs.StorageClassStandard)
		case *bosclient:
			sc = api.STORAGE_CLASS_STANDARD
		case *minio:
			sc = "REDUCED_REDUNDANCY"
		case *scw:
			sc = "ONEZONE_IA" // STANDARD, ONEZONE_IA, GLACIER
		}
		err := osc.SetStorageClass(sc)
		if err != nil {
			sc = ""
		}
		return sc
	}
	return ""
}

// nolint:errcheck
func testStorage(t *testing.T, s ObjectStorage) {
	ctx := context.Background()
	sc := setStorageClass(s)
	if err := s.Create(ctx); err != nil {
		t.Fatalf("Can't create bucket %s: %s", s, err)
	}
	if err := s.Create(ctx); err != nil {
		t.Fatalf("err should be nil when creating a bucket with the same name")
	}
	prefix := "unit-test/"
	s = WithPrefix(s, prefix)
	defer func() {
		if err := s.Delete(ctx, "test"); err != nil {
			t.Fatalf("delete failed: %s", err)
		}
	}()
	all, err := listAll(ctx, s, "", "", 10000, true)
	var dels []string
	for _, object := range all {
		dels = append(dels, object.Key())
	}
	for i := len(dels) - 1; i >= 0; i-- {
		_ = s.Delete(ctx, dels[i])
	}

	var scPut string
	key := "测试编码文件" + `{"name":"juicefs"}` + string('\u001F') + "%uFF081%uFF09.jpg"
	if err := s.Put(ctx, key, bytes.NewReader(nil), WithStorageClass(&scPut)); err != nil {
		t.Logf("PUT testEncodeFile failed: %s", err.Error())
	} else {
		if scPut != sc {
			t.Fatalf("Storage class should be %q, got %q", sc, scPut)
		}
		if resp, _, _, err := s.List(ctx, "测试编码文件", "", "", "", 1, true); err != nil && err != notSupported {
			t.Logf("List testEncodeFile Failed: %s", err)
		} else if len(resp) == 1 && resp[0].Key() != key {
			t.Logf("List testEncodeFile Failed: expect key %s, but got %s", key, resp[0].Key())
		}
	}
	_ = s.Delete(ctx, key)

	_, err = s.Get(ctx, "not_exists", 0, -1)
	if err == nil {
		t.Fatalf("Get should failed: %s", err)
	}
	if _, err := s.Head(ctx, string(make([]byte, 8<<10))); err == nil {
		t.Logf("Head should failed: %s", err)
	}

	br := []byte("hello")
	if err := s.Put(ctx, "test", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}

	var scGet string
	// get all
	if d, e := get(s, "test", 0, -1, WithStorageClass(&scGet)); e != nil || d != "hello" {
		t.Fatalf("expect hello, but got %v, error: %s", d, e)
	}
	if scGet != sc { // Relax me when testing against a storage that doesn't use specified storage class
		t.Fatalf("Storage class should be %q, got %q", sc, scGet)
	}

	if d, e := get(s, "test", 0, 5); e != nil || d != "hello" {
		t.Fatalf("expect hello, but got %v, error: %s", d, e)
	}
	// get first
	if d, e := get(s, "test", 0, 1); e != nil || d != "h" {
		t.Fatalf("expect h, but got %v, error: %s", d, e)
	}
	// get last
	if d, e := get(s, "test", 4, 1); e != nil || d != "o" {
		t.Fatalf("expect o, but got %v, error: %s", d, e)
	}
	// get last 3
	if d, e := get(s, "test", 2, 3); e != nil || d != "llo" {
		t.Fatalf("expect llo, but got %v, error: %s", d, e)
	}
	// get middle
	if d, e := get(s, "test", 2, 2); e != nil || d != "ll" {
		t.Fatalf("expect ll, but got %v, error: %s", d, e)
	}
	// get the end out of range
	if d, e := get(s, "test", 4, 2); e != nil || d != "o" {
		t.Logf("out-of-range get: 'o', but got %v, error: %s", len(d), e)
	}
	// get the off out of range
	if d, e := get(s, "test", 6, 2); e != nil || d != "" {
		t.Logf("out-of-range get: '', but got %v, error: %s", len(d), e)
	}
	switch s.(*withPrefix).os.(type) {
	case FileSystem:
		objs, err2 := listAll(ctx, s, "", "", 2, true)
		if err2 == nil {
			if len(objs) != 2 {
				t.Fatalf("List should return 2 keys, but got %d", len(objs))
			}
			if objs[0].Key() != "" {
				t.Fatalf("First key should be empty string, but got %s", objs[0].Key())
			}
			if objs[0].Size() != 0 {
				t.Fatalf("First object size should be 0, but got %d", objs[0].Size())
			}
			if objs[1].Key() != "test" {
				t.Fatalf("Second key should be test, but got %s", objs[1].Key())
			}
			if !strings.Contains(s.String(), "encrypted") && objs[1].Size() != 5 {
				t.Fatalf("Size of first key shold be 5, but got %v", objs[1].Size())
			}
			now := time.Now()
			if objs[1].Mtime().Before(now.Add(-30*time.Second)) || objs[1].Mtime().After(now.Add(time.Second*30)) {
				t.Fatalf("Mtime of key should be within 30 seconds, but got %s", objs[1].Mtime().Sub(now))
			}
		} else {
			t.Fatalf("list failed: %s", err2.Error())
		}

		objs, err2 = listAll(ctx, s, "", "test2", 1, true)
		if err2 != nil {
			t.Fatalf("list3 failed: %s", err2.Error())
		} else if len(objs) != 0 {
			t.Fatalf("list3 should not return anything, but got %d", len(objs))
		}
	default:
		objs, err2 := listAll(ctx, s, "", "", 1, true)
		if err2 == nil {
			if len(objs) != 1 {
				t.Fatalf("List should return 1 keys, but got %d", len(objs))
			}
			if objs[0].Key() != "test" {
				t.Fatalf("First key should be test, but got %s", objs[0].Key())
			}
			if !strings.Contains(s.String(), "encrypted") && objs[0].Size() != 5 {
				t.Fatalf("Size of first key shold be 5, but got %v", objs[0].Size())
			}
			now := time.Now()
			if objs[0].Mtime().Before(now.Add(-30*time.Second)) || objs[0].Mtime().After(now.Add(time.Second*30)) {
				t.Fatalf("Mtime of key should be within 30 seconds, but got %s", objs[0].Mtime().Sub(now))
			}
		} else {
			t.Fatalf("list failed: %s", err2.Error())
		}

		objs, err2 = listAll(ctx, s, "", "test2", 1, true)
		if err2 != nil {
			t.Fatalf("list3 failed: %s", err2.Error())
		} else if len(objs) != 0 {
			t.Fatalf("list3 should not return anything, but got %d", len(objs))
		}
	}

	defer s.Delete(ctx, "a/")
	defer s.Delete(ctx, "a/a")
	if err := s.Put(ctx, "a/a", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}
	defer s.Delete(ctx, "a/a1")
	if err := s.Put(ctx, "a/a1", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}
	defer s.Delete(ctx, "b/")
	defer s.Delete(ctx, "b/b")
	if err := s.Put(ctx, "b/b", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}
	defer s.Delete(ctx, "b/b1")
	if err := s.Put(ctx, "b/b1", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}
	defer s.Delete(ctx, "c/")
	//tikv will appear empty value is not supported
	if err1 := s.Put(ctx, "c/", bytes.NewReader(nil)); err1 != nil {
		//minio will appear XMinioObjectExistsAsDirectory: Object name already exists as a directory. status code:  409
		if err2 := s.Put(ctx, "c/", bytes.NewReader(br)); err2 != nil {
			t.Fatalf("PUT failed err1: %s, err2: %s", err1.Error(), err2.Error())
		}
	}
	defer s.Delete(ctx, "a1")
	if err := s.Put(ctx, "a1", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}
	defer s.Delete(ctx, "a/b/c/d/e/f")
	if err := s.Put(ctx, "a/b/c/d/e/f", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}

	br = []byte("hello2")
	if err := s.Put(ctx, "a1", bytes.NewReader(br)); err != nil {
		t.Fatalf("PUT failed: %s", err.Error())
	}

	if obs, more, nextMarker, err := s.List(ctx, "", "", "", "/", 4, true); err != nil {
		if !errors.Is(err, notSupported) {
			t.Fatalf("list: %s", err)
		} else {
			t.Logf("list is not supported")
		}
	} else {
		if _, ok := s.(*withPrefix).os.(FileSystem); !ok {
			keys := []string{"a/", "a1", "b/", "c/"}
			if len(obs) != 4 {
				t.Fatalf("list should return 4 results but got %d", len(obs))
			}
			for i, o := range obs {
				if o.Key() != keys[i] {
					t.Fatalf("should get key %s but got %s", keys[i], o.Key())
				}
			}
			if !more {
				t.Fatalf("should have more results")
			}
			if nextMarker == "" {
				t.Fatalf("next marker should not be empty")
			}
			obs, more, nextMarker, err = s.List(ctx, "", obs[len(obs)-1].Key(), nextMarker, "/", 4, true)
			if err != nil {
				t.Fatalf("list with marker: %s", err)
			}
			if len(obs) != 1 {
				t.Fatalf("list should return 1 results but got %d", len(obs))
			}
			if obs[0].Key() != "test" {
				t.Fatalf("should get key test but got %s", obs[0].Key())
			}
			_, more, nextMarker, err = s.List(ctx, "", obs[len(obs)-1].Key(), nextMarker, "/", 4, true)
			if more {
				t.Fatalf("should no more results")
			}
			if nextMarker != "" {
				t.Fatalf("next marker should not be empty")
			}
		}
	}

	if obs, _, _, err := s.List(ctx, "", "", "", "/", 10, true); err != nil {
		if !errors.Is(err, notSupported) {
			t.Fatalf("list with delimiter: %s", err)
		} else {
			t.Logf("list with delimiter is not supported")
		}
	} else {
		switch s.(*withPrefix).os.(type) {
		case FileSystem:
			if len(obs) == 0 || obs[0].Key() != "" {
				t.Fatalf("list should return itself")
			} else {
				obs = obs[1:] // ignore itself
			}
		}
		if len(obs) != 5 {
			t.Fatalf("list with delimiter should return five results but got %d", len(obs))
		}
		keys := []string{"a/", "a1", "b/", "c/", "test"}
		for i, o := range obs {
			if o.Key() != keys[i] {
				t.Fatalf("should get key %s but got %s", keys[i], o.Key())
			}
		}
	}

	if obs, _, _, err := s.List(ctx, "a", "", "", "/", 10, true); err != nil {
		if !errors.Is(err, notSupported) {
			t.Fatalf("list with delimiter: %s", err)
		}
	} else {
		if len(obs) != 2 {
			t.Fatalf("list with delimiter should return two results but got %d", len(obs))
		}
		keys := []string{"a/", "a1"}
		for i, o := range obs {
			if o.Key() != keys[i] {
				t.Fatalf("should get key %s but got %s", keys[i], o.Key())
			}
		}
	}

	if obs, _, _, err := s.List(ctx, "a/", "", "", "/", 10, true); err != nil {
		if !errors.Is(err, notSupported) {
			t.Fatalf("list with delimiter: %s", err)
		} else {
			t.Logf("list with delimiter is not supported")
		}
	} else {
		switch s.(*withPrefix).os.(type) {
		case FileSystem:
			if len(obs) == 0 || obs[0].Key() != "a/" {
				t.Fatalf("list should return itself")
			} else {
				obs = obs[1:] // ignore itself
			}
		}
		if len(obs) != 3 {
			t.Fatalf("list with delimiter should return three results but got %d", len(obs))
		}
		keys := []string{"a/a", "a/a1", "a/b/"}
		for i, o := range obs {
			if o.Key() != keys[i] {
				t.Fatalf("should get key %s but got %s", keys[i], o.Key())
			}
		}
	}

	// test redis cluster list all api
	keyTotal := 100
	var sortedKeys []string
	for i := 0; i < keyTotal; i++ {
		k := fmt.Sprintf("hashKey%d", i)
		sortedKeys = append(sortedKeys, k)
		if err := s.Put(ctx, k, bytes.NewReader(br)); err != nil {
			t.Fatalf("PUT failed: %s", err.Error())
		}
	}
	sort.Strings(sortedKeys)
	defer func() {
		for i := 0; i < keyTotal; i++ {
			_ = s.Delete(ctx, fmt.Sprintf("hashKey%d", i))
		}
	}()
	objs, err := listAll(ctx, s, "hashKey", "", int64(keyTotal), true)
	if err != nil {
		t.Fatalf("list4 failed: %s", err.Error())
	} else {
		for i := 0; i < keyTotal; i++ {
			if objs[i].Key() != sortedKeys[i] {
				t.Fatal("The result for list4 is incorrect")
			}
			if sc != "" && objs[i].StorageClass() != sc {
				t.Fatal("storage class is not correct")
			}
		}
	}

	f, _ := os.CreateTemp("", "test")
	f.Write([]byte("this is a file"))
	f.Seek(0, 0)
	os.Remove(f.Name())
	defer f.Close()
	if err := s.Put(ctx, "file", f); err != nil {
		t.Fatalf("failed to put from file")
	} else if _, err := s.Head(ctx, "file"); err != nil {
		t.Fatalf("file should exists")
	} else {
		if err := s.Delete(ctx, "file"); err != nil {
			t.Fatalf("delete failed %s", err)
		}
	}

	if _, err := s.Head(ctx, "not-exist-file"); !os.IsNotExist(err) {
		t.Fatal("err should be os.ErrNotExist")
	}

	if o, err := s.Head(ctx, "test"); err != nil {
		t.Fatalf("check exists failed: %s", err.Error())
	} else if sc != "" && o.StorageClass() != sc {
		t.Fatalf("storage class should be %s but got %s", sc, o.StorageClass())
	}

	dstKey := "test-copy"
	defer s.Delete(ctx, dstKey)
	err = s.Copy(ctx, fmt.Sprintf("%s%s", prefix, dstKey), fmt.Sprintf("%stest", prefix))
	if err != nil && err != notSupported {
		t.Fatalf("copy failed: %s", err.Error())
	}
	if err == nil {
		if o, err := s.Head(ctx, dstKey); err != nil {
			t.Fatalf("check exists failed: %s", err.Error())
		} else if sc != "" && o.StorageClass() != sc {
			t.Fatalf("storage class should be %s but got %s", sc, o.StorageClass())
		}
	}

	if err := s.Delete(ctx, "test"); err != nil {
		t.Fatalf("delete failed: %s", err)
	}

	if err := s.Delete(ctx, "test"); err != nil {
		t.Fatalf("delete non exists: %v", err)
	}

	getMockData := func(seed []byte, idx int) []byte {
		size := len(seed)
		if size == 0 {
			return nil
		}
		content := make([]byte, size)
		if idx == 0 {
			content = seed
		} else {
			i := idx % size
			copy(content[:size-i], seed[i:size])
			copy(content[size-i:size], seed[:i])
		}
		return content
	}
	k := "large"
	defer s.Delete(ctx, k)

	if upload, err := s.CreateMultipartUpload(ctx, k); err == nil {
		total := 3
		seed := make([]byte, upload.MinPartSize)
		utils.RandRead(seed)
		parts := make([]*Part, total)
		content := make([][]byte, total)
		for i := 0; i < total; i++ {
			content[i] = getMockData(seed, i)
		}
		pool := make(chan struct{}, 4)
		errCh := make(chan error, total)
		var wg sync.WaitGroup
		for i := 1; i <= total; i++ {
			pool <- struct{}{}
			wg.Add(1)
			num := i
			go func() {
				defer func() {
					<-pool
					wg.Done()
				}()
				parts[num-1], err = s.UploadPart(ctx, k, upload.UploadID, num, content[num-1])
				if err != nil {
					errCh <- fmt.Errorf("multipart upload error: %v", err)
				}
			}()
		}
		wg.Wait()
		close(errCh)

		for err := range errCh {
			t.Fatalf("Test failed: %v", err)
		}

		// overwrite the first part
		firstPartContent := append(getMockData(seed, 0), getMockData(seed, 0)...)
		if len(firstPartContent) < int(s.Limits().MaxPartSize) {
			firstPartContent = getMockData(seed, 0)
			firstPartContent[0] = 'a'
		}
		oldPart := parts[0]
		if parts[0], err = s.UploadPart(ctx, k, upload.UploadID, 1, firstPartContent); err != nil {
			t.Logf("overwrite the first part error: %v", err)
			parts[0] = oldPart
		} else {
			content[0] = firstPartContent
		}

		// overwrite the last part
		lastPartContent := []byte("hello")
		oldPart = parts[total-1]
		if parts[total-1], err = s.UploadPart(ctx, k, upload.UploadID, total, lastPartContent); err != nil {
			t.Logf("overwrite the last part error: %v", err)
			parts[total-1] = oldPart
		} else {
			content[total-1] = lastPartContent
		}

		if err = s.CompleteUpload(ctx, k, upload.UploadID, parts); err != nil {
			t.Fatalf("failed to complete multipart upload: %v", err)
		}
		if meta, err := s.Head(ctx, k); err != nil {
			t.Fatalf("failed to head object: %v", err)
		} else if sc != "" && meta.StorageClass() != sc {
			t.Fatalf("storage class should be %s but got %s", sc, meta.StorageClass())
		}
		checkContent := func(key string, content []byte) {
			r, err := s.Get(ctx, key, 0, -1)
			if err != nil {
				t.Fatalf("failed to get multipart upload file: %v", err)
			}
			defer r.Close()
			cnt, err := io.ReadAll(r)
			if err != nil {
				t.Fatalf("failed to get multipart upload file: %v", err)
			}
			if !bytes.Equal(cnt, content) {
				t.Fatal("the content of the multipart upload file is incorrect")
			}
		}
		checkContent(k, bytes.Join(content, nil))

		if s.Limits().IsSupportUploadPartCopy {
			var copyUpload *MultipartUpload
			var dstKey = "dstUploadPartCopyKey"
			defer s.Delete(ctx, dstKey)
			if copyUpload, err = s.CreateMultipartUpload(ctx, dstKey); err != nil {
				t.Fatalf("failed to create multipart upload: %v", err)
			}
			copyParts := make([]*Part, total)
			var startIdx = 0
			for i, c := range content {
				copyParts[i], err = s.UploadPartCopy(ctx, dstKey, copyUpload.UploadID, i+1, k, int64(startIdx), int64(len(c)))
				if err != nil {
					t.Fatalf("failed to upload part copy: %v", err)
				}
				startIdx += len(c)
			}
			if err = s.CompleteUpload(ctx, dstKey, copyUpload.UploadID, copyParts); err != nil {
				t.Fatalf("failed to complete multipart upload: %v", err)
			}
			checkContent(dstKey, bytes.Join(content, nil))
		}
	} else {
		t.Logf("%s does not support multipart upload: %s", s, err.Error())
	}

	// Copy empty objects
	defer func() {
		if err := s.Delete(ctx, "empty"); err != nil {
			t.Logf("delete empty file failed: %s", err)
		}
	}()

	if err := s.Put(ctx, "empty", bytes.NewReader([]byte{})); err != nil {
		t.Logf("PUT empty object failed: %s", err.Error())
	}

	// Copy `/` suffixed object
	defer func() {
		if err := s.Delete(ctx, "slash/"); err != nil {
			t.Logf("delete slash/ failed %s", err)
		}
	}()
	if err := s.Put(ctx, "slash/", bytes.NewReader([]byte{})); err != nil {
		t.Logf("PUT `/` suffixed object failed: %s", err.Error())
	}
}

func TestMem(t *testing.T) {
	m, _ := newMem("", "", "", "")
	testStorage(t, m)
}

func TestDisk(t *testing.T) {
	_ = os.RemoveAll("/tmp/abc/")
	s, _ := newDisk("/tmp/abc/", "", "", "")
	testStorage(t, s)
}

func TestQingStor(t *testing.T) { //skip mutate
	if os.Getenv("QY_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	s, _ := newQingStor(os.Getenv("QY_ENDPOINT"),
		os.Getenv("QY_ACCESS_KEY"), os.Getenv("QY_SECRET_KEY"), "")
	testStorage(t, s)

	//private cloud
	if os.Getenv("PRIVATE_QY_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	s2, _ := newQingStor("http://test.jn1.is.shanhe.com",
		os.Getenv("PRIVATE_QY_ACCESS_KEY"), os.Getenv("PRIVATE_QY_SECRET_KEY"), "")
	testStorage(t, s2)
}

func TestS3(t *testing.T) { //skip mutate
	if os.Getenv("AWS_ACCESS_KEY_ID") == "" {
		t.SkipNow()
	}
	s, _ := newS3(os.Getenv("AWS_ENDPOINT"),
		os.Getenv("AWS_ACCESS_KEY_ID"),
		os.Getenv("AWS_SECRET_ACCESS_KEY"),
		os.Getenv("AWS_SESSION_TOKEN"))
	testStorage(t, s)
}

func TestOracleCompileRegexp(t *testing.T) {
	ep := "axntujn0ebj1.compat.objectstorage.ap-singapore-1.oraclecloud.com"
	oracleCompile := regexp.MustCompile(oracleCompileRegexp)
	if oracleCompile.MatchString(ep) {
		if submatch := oracleCompile.FindStringSubmatch(ep); len(submatch) >= 2 {
			if submatch[1] != "ap-singapore-1" {
				t.Fatalf("oracle endpoint parse failed")
			}
		} else {
			t.Fatalf("oracle endpoint parse failed")
		}
	} else {
		t.Fatalf("oracle endpoint parse failed")
	}
}

func TestOVHCompileRegexp(t *testing.T) {
	for _, ep := range []string{"s3.gra.cloud.ovh.net", "s3.gra.perf.cloud.ovh.net", "s3.gra.io.cloud.ovh.net"} {
		ovhCompile := regexp.MustCompile(OVHCompileRegexp)
		if ovhCompile.MatchString(ep) {
			if submatch := ovhCompile.FindStringSubmatch(ep); len(submatch) >= 2 {
				if submatch[1] != "gra" {
					t.Fatalf("ovh endpoint parse failed")
				}
			} else {
				t.Fatalf("ovh endpoint parse failed")
			}
		} else {
			t.Fatalf("ovh endpoint parse failed")
		}
	}
}

func TestOSS(t *testing.T) { //skip mutate
	if os.Getenv("ALICLOUD_ACCESS_KEY_ID") == "" {
		t.SkipNow()
	}
	s, _ := newOSS(os.Getenv("ALICLOUD_ENDPOINT"),
		os.Getenv("ALICLOUD_ACCESS_KEY_ID"),
		os.Getenv("ALICLOUD_ACCESS_KEY_SECRET"), "")
	testStorage(t, s)
}

func TestUFile(t *testing.T) { //skip mutate
	if os.Getenv("UCLOUD_PUBLIC_KEY") == "" {
		t.SkipNow()
	}
	ufile, _ := newUFile(os.Getenv("UCLOUD_ENDPOINT"),
		os.Getenv("UCLOUD_PUBLIC_KEY"), os.Getenv("UCLOUD_PRIVATE_KEY"), "")
	testStorage(t, ufile)
}

func TestGS(t *testing.T) { //skip mutate
	if os.Getenv("GOOGLE_APPLICATION_CREDENTIALS") == "" {
		t.SkipNow()
	}
	gs, _ := newGS(os.Getenv("GOOGLE_ENDPOINT"), "", "", "")
	testStorage(t, gs)
}

func TestQiniu(t *testing.T) { //skip mutate
	if os.Getenv("QINIU_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	qiniu, _ := newQiniu(os.Getenv("QINIU_ENDPOINT"),
		os.Getenv("QINIU_ACCESS_KEY"), os.Getenv("QINIU_SECRET_KEY"), "")
	testStorage(t, qiniu)
	//qiniu, _ = newQiniu("https://test.cn-north-1-s3.qiniu.com",
	//	os.Getenv("QINIU_ACCESS_KEY"), os.Getenv("QINIU_SECRET_KEY"))
	//testStorage(t, qiniu)
}

func TestKS3(t *testing.T) { //skip mutate
	if os.Getenv("KS3_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	ks3, _ := newKS3(os.Getenv("KS3_ENDPOINT"),
		os.Getenv("KS3_ACCESS_KEY"), os.Getenv("KS3_SECRET_KEY"), "")
	testStorage(t, ks3)
}

func TestCOS(t *testing.T) { //skip mutate
	if os.Getenv("COS_SECRETID") == "" {
		t.SkipNow()
	}
	cos, _ := newCOS(
		os.Getenv("COS_ENDPOINT"),
		os.Getenv("COS_SECRETID"), os.Getenv("COS_SECRETKEY"), "")
	testStorage(t, cos)
}

func TestAzure(t *testing.T) { //skip mutate
	if os.Getenv("AZURE_STORAGE_ACCOUNT") == "" {
		t.SkipNow()
	}
	//https://containersName.core.windows.net
	abs, _ := newWasb(os.Getenv("AZURE_ENDPOINT"),
		os.Getenv("AZURE_STORAGE_ACCOUNT"), os.Getenv("AZURE_STORAGE_KEY"), "")
	testStorage(t, abs)
}

func TestB2(t *testing.T) { //skip mutate
	if os.Getenv("B2_ACCOUNT_ID") == "" {
		t.SkipNow()
	}
	b, err := newB2(os.Getenv("B2_ENDPOINT"), os.Getenv("B2_ACCOUNT_ID"), os.Getenv("B2_APP_KEY"), "")
	if err != nil {
		t.Fatalf("create B2: %s", err)
	}
	testStorage(t, b)
}

func TestSpace(t *testing.T) { //skip mutate
	if os.Getenv("SPACE_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	b, _ := newSpace(os.Getenv("SPACE_ENDPOINT"), os.Getenv("SPACE_ACCESS_KEY"), os.Getenv("SPACE_SECRET_KEY"), "")
	testStorage(t, b)
}

func TestBOS(t *testing.T) { //skip mutate
	if os.Getenv("BDCLOUD_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	b, _ := newBOS(os.Getenv("BDCLOUD_ENDPOINT"),
		os.Getenv("BDCLOUD_ACCESS_KEY"), os.Getenv("BDCLOUD_SECRET_KEY"), "")
	testStorage(t, b)
}

func TestSftp(t *testing.T) { //skip mutate
	if os.Getenv("SFTP_HOST") == "" {
		t.SkipNow()
	}
	b, _ := newSftp(os.Getenv("SFTP_HOST"), os.Getenv("SFTP_USER"), os.Getenv("SFTP_PASS"), "")
	testStorage(t, b)
}

func TestOBS(t *testing.T) { //skip mutate
	if os.Getenv("HWCLOUD_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	b, _ := newOBS(os.Getenv("HWCLOUD_ENDPOINT"),
		os.Getenv("HWCLOUD_ACCESS_KEY"), os.Getenv("HWCLOUD_SECRET_KEY"), "")
	testStorage(t, b)
}

func TestNFS(t *testing.T) { //skip mutate
	if os.Getenv("NFS_ADDR") == "" {
		t.SkipNow()
	}
	b, err := newNFSStore(os.Getenv("NFS_ADDR"), os.Getenv("NFS_ACCESS_KEY"), os.Getenv("NFS_SECRET_KEY"), "")
	if err != nil {
		t.Fatal(err)
	}
	testStorage(t, b)
}

func TestHDFS(t *testing.T) { //skip mutate
	conf := make(hadoopconf.HadoopConf)
	conf["dfs.namenode.rpc-address.ns.namenode1"] = "hadoop01:8020"
	conf["dfs.namenode.rpc-address.ns.namenode2"] = "hadoop02:8020"

	checkAddr := func(addr string, expected []string, base string) {
		addresses, basePath := parseHDFSAddr(addr, conf)
		sort.Strings(addresses)
		if !reflect.DeepEqual(addresses, expected) {
			t.Fatalf("expected addrs is %+v but got %+v from %s", expected, addresses, addr)
		}
		if basePath != base {
			t.Fatalf("expected path is %s but got %s from %s", base, basePath, addr)
		}
	}

	checkAddr("hadoop01:8020", []string{"hadoop01:8020"}, "/")
	checkAddr("hdfs://hadoop01:8020/", []string{"hadoop01:8020"}, "/")
	checkAddr("hadoop01:8020/user/juicefs/", []string{"hadoop01:8020"}, "/user/juicefs/")
	checkAddr("hadoop01:8020/user/juicefs", []string{"hadoop01:8020"}, "/user/juicefs/")
	checkAddr("hdfs://hadoop01:8020/user/juicefs/", []string{"hadoop01:8020"}, "/user/juicefs/")

	// for HA
	checkAddr("hadoop01:8020,hadoop02:8020", []string{"hadoop01:8020", "hadoop02:8020"}, "/")
	checkAddr("hadoop01:8020,hadoop02:8020/user/juicefs/", []string{"hadoop01:8020", "hadoop02:8020"}, "/user/juicefs/")
	checkAddr("hdfs://ns/user/juicefs", []string{"hadoop01:8020", "hadoop02:8020"}, "/user/juicefs/")
	checkAddr("ns/user/juicefs/", []string{"hadoop01:8020", "hadoop02:8020"}, "/user/juicefs/")

	if os.Getenv("HDFS_ADDR") == "" {
		t.SkipNow()
	}
	dfs, _ := newHDFS(os.Getenv("HDFS_ADDR"), "", "", "")
	testStorage(t, dfs)
}

func TestOOS(t *testing.T) { //skip mutate
	if os.Getenv("OOS_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	b, _ := newOOS(os.Getenv("OOS_ENDPOINT"),
		os.Getenv("OOS_ACCESS_KEY"), os.Getenv("OOS_SECRET_KEY"), "")
	testStorage(t, b)
}

func TestScw(t *testing.T) { //skip mutate
	if os.Getenv("SCW_ACCESS_KEY") == "" {
		t.SkipNow()
	}
	b, _ := newScw(os.Getenv("SCW_ENDPOINT"), os.Getenv("SCW_ACCESS_KEY"), os.Getenv("SCW_SECRET_KEY"), "")
	testStorage(t, b)
}

func TestMinIO(t *testing.T) {
	if os.Getenv("MINIO_TEST_BUCKET") == "" {
		t.SkipNow()
	}
	b, _ := newMinio(os.Getenv("MINIO_TEST_BUCKET"), os.Getenv("MINIO_ACCESS_KEY"), os.Getenv("MINIO_SECRET_KEY"), "")
	testStorage(t, b)
}

// func TestUpYun(t *testing.T) {
// 	s, _ := newUpyun("http://jfstest", "test", "")
// 	testStorage(t, s)
// }

func TestTiKV(t *testing.T) { //skip mutate
	if os.Getenv("TIKV_ADDR") == "" {
		t.SkipNow()
	}
	s, err := newTiKV(os.Getenv("TIKV_ADDR"), "", "", "")
	if err != nil {
		t.Fatal(err)
	}
	testStorage(t, s)
}

func TestRedis(t *testing.T) {
	if os.Getenv("REDIS_ADDR") == "" {
		t.SkipNow()
	}

	opt, _ := redis.ParseURL(os.Getenv("REDIS_ADDR"))
	rdb := redis.NewClient(opt)
	_ = rdb.FlushDB(context.Background())

	s, err := newRedis(os.Getenv("REDIS_ADDR"), "", "", "")
	if err != nil {
		t.Fatal(err)
	}
	testStorage(t, s)
}

func TestSwift(t *testing.T) { //skip mutate
	if os.Getenv("SWIFT_ADDR") == "" {
		t.SkipNow()
	}
	s, err := newSwiftOSS(os.Getenv("SWIFT_ADDR"), "", "", "")
	if err != nil {
		t.Fatal(err)
	}
	testStorage(t, s)
}

func TestWebDAV(t *testing.T) { //skip mutate
	if os.Getenv("WEBDAV_TEST_BUCKET") == "" {
		t.SkipNow()
	}
	s, _ := newWebDAV(os.Getenv("WEBDAV_TEST_BUCKET"), "", "", "")
	testStorage(t, s)
}

func TestEncrypted(t *testing.T) {
	s, _ := CreateStorage("mem", "", "", "", "")
	privkey, _ := rsa.GenerateKey(rand.Reader, 2048)
	kc := NewRSAEncryptor(privkey)
	dc, _ := NewDataEncryptor(kc, AES256GCM_RSA)
	es := NewEncrypted(s, dc)
	testStorage(t, es)
}

func TestMarsharl(t *testing.T) {
	ctx := context.Background()
	if os.Getenv("HDFS_ADDR") == "" {
		t.SkipNow()
	}
	s, _ := newHDFS(os.Getenv("HDFS_ADDR"), "", "", "")
	if err := s.Put(ctx, "hello", bytes.NewReader([]byte("world"))); err != nil {
		t.Fatalf("PUT failed: %s", err)
	}
	fs := s.(FileSystem)
	_ = fs.Chown("hello", "user", "group")
	_ = fs.Chmod("hello", 0764)
	o, err := s.Head(ctx, "hello")
	if err != nil {
		t.Fatalf("HEAD failed: %s", err)
	}

	m := MarshalObject(o)
	d, _ := json.Marshal(m)
	var m2 map[string]interface{}
	if err := json.Unmarshal(d, &m2); err != nil {
		t.Fatalf("unmarshal: %s", err)
	}
	o2 := UnmarshalObject(m2)
	if math.Abs(float64(o2.Mtime().UnixNano()-o.Mtime().UnixNano())) > 1000 {
		t.Fatalf("mtime %s != %s", o2.Mtime(), o.Mtime())
	}
	o2.(*file).mtime = o.Mtime()
	if !reflect.DeepEqual(o, o2) {
		t.Fatalf("%+v != %+v", o2, o)
	}
}

func TestSharding(t *testing.T) {
	s, _ := NewSharded("mem", "%d", "", "", "", 10)
	testStorage(t, s)
}

func TestSQLite(t *testing.T) {
	s, err := newSQLStore("sqlite3", "/tmp/teststore.db", "", "")
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testStorage(t, s)
}

func TestPG(t *testing.T) { //skip mutate
	if os.Getenv("PG_ADDR") == "" {
		t.SkipNow()
	}
	s, err := newSQLStore("postgres", os.Getenv("PG_ADDR"), os.Getenv("PG_USER"), os.Getenv("PG_PASSWORD"))
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testStorage(t, s)

}
func TestPGWithSearchPath(t *testing.T) { //skip mutate
	_, err := newSQLStore("postgres", "127.0.0.1:5432/test?sslmode=disable&search_path=juicefs,public", "", "")
	if !strings.Contains(err.Error(), "currently, only one schema is supported in search_path") {
		t.Fatalf("TestPGWithSearchPath error: %s", err)
	}
}

func TestMySQL(t *testing.T) { //skip mutate
	if os.Getenv("MYSQL_ADDR") == "" {
		t.SkipNow()
	}
	s, err := newSQLStore("mysql", os.Getenv("MYSQL_ADDR"), os.Getenv("MYSQL_USER"), os.Getenv("MYSQL_PASSWORD"))
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testStorage(t, s)
}

func TestNameString(t *testing.T) {
	s, _ := newMem("test", "", "", "")
	s = WithPrefix(s, "a/")
	s = WithPrefix(s, "b/")
	if s.String() != "mem://test/a/b/" {
		t.Fatalf("name with two prefix does not match: %s", s.String())
	}
}

func TestEtcd(t *testing.T) { //skip mutate
	if os.Getenv("ETCD_ADDR") == "" {
		t.SkipNow()
	}
	s, _ := newEtcd(os.Getenv("ETCD_ADDR"), "", "", "")
	testStorage(t, s)
}

//func TestCeph(t *testing.T) {
//	if os.Getenv("CEPH_ENDPOINT") == "" {
//		t.SkipNow()
//	}
//	s, _ := newCeph(os.Getenv("CEPH_ENDPOINT"), os.Getenv("CEPH_CLUSTER"), os.Getenv("CEPH_USER"))
//	testStorage(t, s)
//}

func TestEOS(t *testing.T) { //skip mutate
	if os.Getenv("EOS_ENDPOINT") == "" {
		t.SkipNow()
	}
	s, _ := newEos(os.Getenv("EOS_ENDPOINT"), os.Getenv("EOS_ACCESS_KEY"), os.Getenv("EOS_SECRET_KEY"), "")
	testStorage(t, s)
}

func TestWASABI(t *testing.T) { //skip mutate
	if os.Getenv("WASABI_ENDPOINT") == "" {
		t.SkipNow()
	}
	s, _ := newWasabi(os.Getenv("WASABI_ENDPOINT"), os.Getenv("WASABI_ACCESS_KEY"), os.Getenv("WASABI_SECRET_KEY"), "")
	testStorage(t, s)
}

func TestIBMCOS(t *testing.T) { //skip mutate
	if os.Getenv("IBMCOS_ENDPOINT") == "" {
		t.SkipNow()
	}
	s, _ := newIBMCOS(os.Getenv("IBMCOS_ENDPOINT"), os.Getenv("IBMCOS_ACCESS_KEY"), os.Getenv("IBMCOS_SECRET_KEY"), "")
	testStorage(t, s)
}

func TestTOS(t *testing.T) { //skip mutate
	if os.Getenv("TOS_ENDPOINT") == "" {
		t.SkipNow()
	}
	tos, err := newTOS(os.Getenv("TOS_ENDPOINT"), os.Getenv("TOS_ACCESS_KEY"), os.Getenv("TOS_SECRET_KEY"), "")
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testStorage(t, tos)
}

func TestDragonfly(t *testing.T) { //skip mutate
	if os.Getenv("DRAGONFLY_ENDPOINT") == "" {
		t.SkipNow()
	}
	dragonfly, err := newDragonfly(os.Getenv("DRAGONFLY_ENDPOINT"), "", "", "")
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testStorage(t, dragonfly)
}

func TestCifs(t *testing.T) { //skip mutate
	if os.Getenv("CIFS_ADDR") == "" {
		fmt.Println("skip CIFS test")
		t.SkipNow()
	}
	cifs, err := newCifs(os.Getenv("CIFS_ADDR"), os.Getenv("CIFS_USER"), os.Getenv("CIFS_PASSWORD"), "")
	if err != nil {
		t.Fatalf("create: %s", err)
	}
	testStorage(t, cifs)
}

// func TestBunny(t *testing.T) { //skip mutate
// 	if os.Getenv("BUNNY_ENDPOINT") == "" {
// 		t.SkipNow()
// 	}
// 	bunny, err := newBunny(os.Getenv("BUNNY_ENDPOINT"), "", os.Getenv("BUNNY_SECRET_KEY"), "")
// 	if err != nil {
// 		t.Fatalf("create: %s", err)
// 	}
// 	testStorage(t, bunny)
// }

func TestMain(m *testing.M) {
	if envFile := os.Getenv("JUICEFS_ENV_FILE_FOR_TEST"); envFile != "" {
		// schema: S3 AWS_ENDPOINT=xxxxx
		if _, err := os.Stat(envFile); err == nil {
			file, _ := os.ReadFile(envFile)
			for _, line := range strings.Split(strings.TrimSpace(string(file)), "\n") {
				if envkv := strings.SplitN(line, "=", 2); len(envkv) == 2 {
					if err := os.Setenv(envkv[0], envkv[1]); err != nil {
						logger.Errorf("set env %s=%s error", envkv[0], envkv[1])
					}
				}
			}
		}
	}
	m.Run()
}


================================================
FILE: pkg/object/obs.go
================================================
//go:build !noobs
// +build !noobs

/*
 * JuiceFS, Copyright 2019 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/md5"
	"encoding/base64"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/pkg/errors"

	"github.com/huaweicloud/huaweicloud-sdk-go-obs/obs"
	"github.com/juicedata/juicefs/pkg/utils"
	"golang.org/x/net/http/httpproxy"
)

const obsDefaultRegion = "cn-north-1"

type obsClient struct {
	bucket    string
	region    string
	checkEtag bool
	sc        string
	c         *obs.ObsClient
}

func (s *obsClient) String() string {
	return fmt.Sprintf("obs://%s/", s.bucket)
}

func (s *obsClient) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              100 << 10,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (s *obsClient) Create(ctx context.Context) error {
	params := &obs.CreateBucketInput{}
	params.Bucket = s.bucket
	params.Location = s.region
	params.AvailableZone = "3az"
	params.StorageClass = obs.StorageClassType(s.sc)
	_, err := s.c.CreateBucket(params)
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}
func getStorageClassStr(sc obs.StorageClassType) string {
	if sc == "" {
		return string(obs.StorageClassStandard)
	} else {
		return string(sc)
	}
}
func (s *obsClient) Head(ctx context.Context, key string) (Object, error) {
	params := &obs.GetObjectMetadataInput{
		Bucket: s.bucket,
		Key:    key,
	}
	r, err := s.c.GetObjectMetadata(params)
	if err != nil {
		if e, ok := err.(obs.ObsError); ok && e.BaseModel.StatusCode == http.StatusNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}

	return &obj{
		key,
		r.ContentLength,
		r.LastModified,
		strings.HasSuffix(key, "/"),
		getStorageClassStr(r.StorageClass),
	}, nil
}

func (s *obsClient) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	params := &obs.GetObjectInput{}
	params.Bucket = s.bucket
	params.Key = key
	var resp *obs.GetObjectOutput
	var err error
	rangeStr := getRange(off, limit)
	if rangeStr != "" {
		resp, err = s.c.GetObject(params, obs.WithHeader(obs.HEADER_RANGE, []string{rangeStr}))
	} else {
		resp, err = s.c.GetObject(params)
	}
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.RequestId).SetStorageClass(getStorageClassStr(resp.StorageClass))
	}
	if err != nil {
		return nil, err
	}
	if err = checkGetStatus(resp.StatusCode, rangeStr != ""); err != nil {
		_ = resp.Body.Close()
		return nil, err
	}
	return resp.Body, nil
}

func (s *obsClient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	var body io.ReadSeeker
	var vlen int64
	var sum []byte
	if b, ok := in.(io.ReadSeeker); ok {
		var err error
		h := md5.New()
		buf := bufPool.Get().(*[]byte)
		defer bufPool.Put(buf)
		vlen, err = io.CopyBuffer(h, in, *buf)
		if err != nil {
			return err
		}
		_, err = b.Seek(0, io.SeekStart)
		if err != nil {
			return err
		}
		sum = h.Sum(nil)
		body = b
	} else {
		data, err := io.ReadAll(in)
		if err != nil {
			return err
		}
		vlen = int64(len(data))
		s := md5.Sum(data)
		sum = s[:]
		body = bytes.NewReader(data)
	}
	mimeType := utils.GuessMimeType(key)
	params := &obs.PutObjectInput{}
	params.Bucket = s.bucket
	params.Key = key
	params.Body = body
	params.ContentLength = vlen
	params.ContentMD5 = base64.StdEncoding.EncodeToString(sum[:])
	params.ContentType = mimeType
	params.StorageClass = obs.StorageClassType(s.sc)
	resp, err := s.c.PutObject(params)
	if err == nil && s.checkEtag && strings.Trim(resp.ETag, "\"") != obs.Hex(sum) {
		err = fmt.Errorf("unexpected ETag: %s != %s", strings.Trim(resp.ETag, "\""), obs.Hex(sum))
	}
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.RequestId).SetStorageClass(getStorageClassStr(resp.StorageClass))
	}
	return err
}

func (s *obsClient) Copy(ctx context.Context, dst, src string) error {
	params := &obs.CopyObjectInput{}
	params.Bucket = s.bucket
	params.Key = dst
	params.CopySourceBucket = s.bucket
	params.CopySourceKey = src
	params.StorageClass = obs.StorageClassType(s.sc)
	_, err := s.c.CopyObject(params)
	return err
}

func (s *obsClient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	params := obs.DeleteObjectInput{}
	params.Bucket = s.bucket
	params.Key = key
	resp, err := s.c.DeleteObject(&params)
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.RequestId)
	}
	return err
}

func (s *obsClient) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	input := &obs.ListObjectsInput{
		Bucket: s.bucket,
		Marker: start,
	}
	input.Prefix = prefix
	input.MaxKeys = int(limit)
	input.Delimiter = delimiter
	input.EncodingType = "url"
	resp, err := s.c.ListObjects(input)
	if err != nil {
		return nil, false, "", err
	}
	n := len(resp.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		// Obs SDK listObjects method already decodes the object key.
		o := resp.Contents[i]
		objs[i] = &obj{o.Key, o.Size, o.LastModified, strings.HasSuffix(o.Key, "/"), string(o.StorageClass)}
	}
	if delimiter != "" {
		for _, p := range resp.CommonPrefixes {
			prefix, err := obs.UrlDecode(p)
			if err != nil {
				return nil, false, "", errors.WithMessagef(err, "failed to decode commonPrefixes %s", p)
			}
			objs = append(objs, &obj{prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, resp.IsTruncated, resp.NextMarker, nil
}

func (s *obsClient) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (s *obsClient) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	params := &obs.InitiateMultipartUploadInput{}
	params.Bucket = s.bucket
	params.Key = key
	params.StorageClass = obs.StorageClassType(s.sc)
	resp, err := s.c.InitiateMultipartUpload(params)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: resp.UploadId, MinPartSize: 5 << 20, MaxCount: 10000}, nil
}

func (s *obsClient) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	params := &obs.UploadPartInput{}
	params.Bucket = s.bucket
	params.Key = key
	params.UploadId = uploadID
	params.Body = bytes.NewReader(body)
	params.PartNumber = num
	params.PartSize = int64(len(body))
	sum := md5.Sum(body)
	params.ContentMD5 = base64.StdEncoding.EncodeToString(sum[:])
	resp, err := s.c.UploadPart(params)
	if err == nil && s.checkEtag && strings.Trim(resp.ETag, "\"") != obs.Hex(sum[:]) {
		err = fmt.Errorf("unexpected ETag: %s != %s", strings.Trim(resp.ETag, "\""), obs.Hex(sum[:]))
	}
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: resp.ETag}, err
}

func (s *obsClient) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	resp, err := s.c.CopyPart(&obs.CopyPartInput{
		Bucket:               s.bucket,
		Key:                  key,
		UploadId:             uploadID,
		PartNumber:           num,
		CopySourceBucket:     s.bucket,
		CopySourceKey:        srcKey,
		CopySourceRangeStart: off,
		CopySourceRangeEnd:   off + size - 1,
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: resp.ETag}, err
}

func (s *obsClient) AbortUpload(ctx context.Context, key string, uploadID string) {
	params := &obs.AbortMultipartUploadInput{}
	params.Bucket = s.bucket
	params.Key = key
	params.UploadId = uploadID
	_, _ = s.c.AbortMultipartUpload(params)
}

func (s *obsClient) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	params := &obs.CompleteMultipartUploadInput{}
	params.Bucket = s.bucket
	params.Key = key
	params.UploadId = uploadID
	for i := range parts {
		params.Parts = append(params.Parts, obs.Part{ETag: parts[i].ETag, PartNumber: parts[i].Num})
	}
	_, err := s.c.CompleteMultipartUpload(params)
	return err
}

func (s *obsClient) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	input := &obs.ListMultipartUploadsInput{
		Bucket:    s.bucket,
		KeyMarker: marker,
	}

	result, err := s.c.ListMultipartUploads(input)
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{u.Key, u.UploadId, u.Initiated}
	}
	var nextMarker string
	if result.NextKeyMarker != "" {
		nextMarker = result.NextKeyMarker
	}
	return parts, nextMarker, nil
}

func (s *obsClient) SetStorageClass(sc string) error {
	s.sc = sc
	return nil
}

func autoOBSEndpoint(bucketName, accessKey, secretKey, token string) (string, error) {
	region := obsDefaultRegion
	if r := os.Getenv("HWCLOUD_DEFAULT_REGION"); r != "" {
		region = r
	}
	endpoint := fmt.Sprintf("https://obs.%s.myhuaweicloud.com", region)

	obsCli, err := obs.New(accessKey, secretKey, endpoint, obs.WithSecurityToken(token))
	if err != nil {
		return "", err
	}
	defer obsCli.Close()

	result, err := obsCli.ListBuckets(&obs.ListBucketsInput{QueryLocation: true})
	if err != nil {
		return "", err
	}
	for _, bucket := range result.Buckets {
		if bucket.Name == bucketName {
			logger.Debugf("Get location of bucket %q: %s", bucketName, bucket.Location)
			return fmt.Sprintf("obs.%s.myhuaweicloud.com", bucket.Location), nil
		}
	}
	return "", fmt.Errorf("bucket %q does not exist", bucketName)
}

func newOBS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("invalid endpoint %s: %q", endpoint, err)
	}
	hostParts := strings.SplitN(uri.Host, ".", 2)
	bucketName := hostParts[0]
	if len(hostParts) > 1 {
		endpoint = fmt.Sprintf("%s://%s", uri.Scheme, hostParts[1])
	}

	if accessKey == "" {
		accessKey = os.Getenv("HWCLOUD_ACCESS_KEY")
		secretKey = os.Getenv("HWCLOUD_SECRET_KEY")
	}

	var region string
	if len(hostParts) == 1 {
		if endpoint, err = autoOBSEndpoint(bucketName, accessKey, secretKey, token); err != nil {
			return nil, fmt.Errorf("cannot get location of bucket %s: %q", bucketName, err)
		}
		if !strings.HasPrefix(endpoint, "http") {
			endpoint = fmt.Sprintf("%s://%s", uri.Scheme, endpoint)
		}
	} else {
		region = strings.Split(hostParts[1], ".")[1]
	}

	// Use proxy setting from environment variables: HTTP_PROXY, HTTPS_PROXY, NO_PROXY
	if uri, err = url.ParseRequestURI(endpoint); err != nil {
		return nil, fmt.Errorf("invalid endpoint %s: %q", endpoint, err)
	}
	proxyURL, err := httpproxy.FromEnvironment().ProxyFunc()(uri)
	if err != nil {
		return nil, fmt.Errorf("get proxy url for endpoint: %s error: %q", endpoint, err)
	}
	var urlString string
	if proxyURL != nil {
		urlString = proxyURL.String()
	}

	// Empty proxy url string has no effect
	// there is a bug in the retry of PUT (did not call Seek(0,0) before retry), so disable the retry here
	c, err := obs.New(accessKey, secretKey, endpoint, obs.WithSecurityToken(token),
		obs.WithProxyUrl(urlString), obs.WithMaxRetryCount(0), obs.WithHttpTransport(httpClient.Transport.(*http.Transport)))
	if err != nil {
		return nil, fmt.Errorf("fail to initialize OBS: %q", err)
	}
	var checkEtag bool
	if _, err = c.GetBucketEncryption(bucketName); err != nil {
		if obsError, ok := err.(obs.ObsError); ok && obsError.Code == "NoSuchEncryptionConfiguration" {
			checkEtag = true
		} else if !ok || obsError.Code != "NoSuchBucket" {
			logger.Warnf("get bucket encryption: %q", err)
		}
	}
	return &obsClient{bucket: bucketName, region: region, checkEtag: checkEtag, c: c}, nil
}

func init() {
	Register("obs", newOBS)
}


================================================
FILE: pkg/object/oos.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"net/url"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
)

type oos struct {
	s3client
}

func (s *oos) String() string {
	return fmt.Sprintf("oos://%s/", s.s3client.bucket)
}

func (s *oos) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		MinPartSize:              5 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (s *oos) Create(ctx context.Context) error {
	_, _, _, err := s.List(ctx, "", "", "", "", 1, true)
	if err != nil {
		return fmt.Errorf("please create bucket %s manually", s.s3client.bucket)
	}
	return err
}

func (s *oos) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}
	objs, hasMore, nextMarker, err := s.s3client.List(ctx, prefix, start, token, delimiter, limit, followLink)
	if start != "" && len(objs) > 0 && objs[0].Key() == start {
		objs = objs[1:]
	}
	return objs, hasMore, nextMarker, err
}

func newOOS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	ssl := strings.ToLower(uri.Scheme) == "https"
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	region := hostParts[1][4:]
	endpoint = uri.Scheme + "://" + uri.Host[len(bucket)+1:]
	forcePathStyle := !strings.Contains(strings.ToLower(endpoint), "xstore.ctyun.cn")

	awsCfg, err := config.LoadDefaultConfig(ctx,
		config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(awsCfg, func(options *s3.Options) {
		options.EndpointOptions.DisableHTTPS = !ssl
		options.Region = region
		options.UsePathStyle = forcePathStyle
		options.HTTPClient = httpClient
		options.BaseEndpoint = aws.String(endpoint)
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})
	return &oos{s3client{bucket: bucket, s3: client, region: region}}, nil
}

func init() {
	Register("oos", newOOS)
}


================================================
FILE: pkg/object/oss.go
================================================
//go:build !nooss
// +build !nooss

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
	openapicred "github.com/aliyun/credentials-go/credentials"
)

const ossDefaultRegionID = "cn-hangzhou"

type ossClient struct {
	client *oss.Client
	bucket string
	sc     string
}

func (o *ossClient) String() string {
	return fmt.Sprintf("oss://%s/", o.bucket)
}

func (o *ossClient) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              int(oss.MinPartSize),
		MaxPartSize:              oss.MaxPartSize,
		MaxPartCount:             int(oss.MaxUploadParts),
	}
}

func (o *ossClient) Create(ctx context.Context) error {
	var configuration *oss.CreateBucketConfiguration
	if o.sc != "" {
		configuration = &oss.CreateBucketConfiguration{
			StorageClass: oss.StorageClassType(o.sc),
		}
	}
	_, err := o.client.PutBucket(ctx, &oss.PutBucketRequest{
		Bucket:                    &o.bucket,
		CreateBucketConfiguration: configuration,
	})
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}

func (o *ossClient) Head(ctx context.Context, key string) (Object, error) {
	info, err := o.client.HeadObject(ctx, &oss.HeadObjectRequest{
		Bucket: &o.bucket,
		Key:    &key,
	})
	if err != nil {
		var svcErr *oss.ServiceError
		if errors.As(err, &svcErr) && svcErr.StatusCode == http.StatusNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		key,
		info.ContentLength,
		oss.ToTime(info.LastModified),
		strings.HasSuffix(key, "/"),
		oss.ToString(info.StorageClass),
	}, nil
}

func (o *ossClient) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (resp io.ReadCloser, err error) {
	var result *oss.GetObjectResult
	var reqId string
	var sc string
	result, err = o.client.GetObject(ctx, &oss.GetObjectRequest{
		Bucket:        &o.bucket,
		Key:           &key,
		Range:         oss.HTTPRange{Offset: off, Count: limit}.FormatHTTPRange(),
		RangeBehavior: oss.Ptr("standard"),
	})
	if err != nil {
		var svcErr *oss.ServiceError
		if errors.As(err, &svcErr) {
			reqId = svcErr.RequestID
		}
	} else {
		reqId = result.ResultCommon.Headers.Get(oss.HeaderOssRequestID)
		sc = oss.ToString(result.StorageClass)
		if off > 0 || limit > 0 {
			resp = result.Body
		} else {
			resp = verifyChecksum(result.Body,
				result.Headers.Get(oss.HeaderOssMetaPrefix+checksumAlgr),
				result.ContentLength)
		}
	}

	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(reqId)
	attrs.SetStorageClass(sc)
	return
}

func (o *ossClient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	req := &oss.PutObjectRequest{
		Bucket:       &o.bucket,
		Key:          &key,
		StorageClass: oss.StorageClassType(o.sc),
		Body:         in,
	}
	if ins, ok := in.(io.ReadSeeker); ok {
		req.Metadata = make(map[string]string)
		req.Metadata[oss.HeaderOssMetaPrefix+checksumAlgr] = generateChecksum(ins)
	}
	var reqId string
	result, err := o.client.PutObject(ctx, req)
	if err != nil {
		var svcErr *oss.ServiceError
		if errors.As(err, &svcErr) {
			reqId = svcErr.RequestID
		}
	} else {
		reqId = result.Headers.Get(oss.HeaderOssRequestID)
	}
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(reqId).SetStorageClass(o.sc)
	return err
}

func (o *ossClient) Copy(ctx context.Context, dst, src string) error {
	var req = &oss.CopyObjectRequest{
		SourceBucket: &o.bucket,
		Bucket:       &o.bucket,
		SourceKey:    &src,
		Key:          &dst,
		StorageClass: oss.StorageClassType(o.sc),
	}
	_, err := o.client.CopyObject(ctx, req)
	return err
}

func (o *ossClient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	result, err := o.client.DeleteObject(ctx, &oss.DeleteObjectRequest{
		Bucket: &o.bucket,
		Key:    &key,
	})
	var reqId string
	if err != nil {
		var svcErr *oss.ServiceError
		if errors.As(err, &svcErr) {
			reqId = svcErr.RequestID
		}
	} else {
		reqId = result.Headers.Get(oss.HeaderOssRequestID)
	}
	attrs := ApplyGetters(getters...)
	attrs.SetRequestID(reqId)
	return err
}

func (o *ossClient) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}
	result, err := o.client.ListObjectsV2(ctx, &oss.ListObjectsV2Request{
		Bucket:            &o.bucket,
		Prefix:            &prefix,
		StartAfter:        &start,
		ContinuationToken: &token,
		Delimiter:         &delimiter,
		MaxKeys:           int32(limit),
	})
	if err != nil {
		return nil, false, "", err
	}
	n := len(result.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		o := result.Contents[i]
		objs[i] = &obj{oss.ToString(o.Key), o.Size, oss.ToTime(o.LastModified), strings.HasSuffix(oss.ToString(o.Key), "/"), oss.ToString(o.StorageClass)}
	}
	if delimiter != "" {
		for _, o := range result.CommonPrefixes {
			objs = append(objs, &obj{oss.ToString(o.Prefix), 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, result.IsTruncated, oss.ToString(result.NextContinuationToken), nil
}

func (o *ossClient) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (o *ossClient) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	result, err := o.client.InitiateMultipartUpload(ctx, &oss.InitiateMultipartUploadRequest{
		Bucket:       &o.bucket,
		Key:          &key,
		StorageClass: oss.StorageClassType(o.sc),
	})
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: oss.ToString(result.UploadId), MinPartSize: 4 << 20, MaxCount: 10000}, nil
}

func (o *ossClient) UploadPart(ctx context.Context, key string, uploadID string, num int, data []byte) (*Part, error) {
	r, err := o.client.UploadPart(ctx, &oss.UploadPartRequest{
		Bucket:     &o.bucket,
		UploadId:   &uploadID,
		Key:        &key,
		Body:       bytes.NewReader(data),
		PartNumber: int32(num),
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: oss.ToString(r.ETag)}, nil
}

func (o *ossClient) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	partCopy, err := o.client.UploadPartCopy(ctx, &oss.UploadPartCopyRequest{
		SourceBucket: &o.bucket,
		Bucket:       &o.bucket,
		SourceKey:    &srcKey,
		Key:          &key,
		UploadId:     &uploadID,
		PartNumber:   int32(num),
		Range:        oss.HTTPRange{Offset: off, Count: size}.FormatHTTPRange(),
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: oss.ToString(partCopy.ETag)}, nil
}

func (o *ossClient) AbortUpload(ctx context.Context, key string, uploadID string) {
	_, _ = o.client.AbortMultipartUpload(ctx, &oss.AbortMultipartUploadRequest{
		Bucket:   &o.bucket,
		UploadId: &uploadID,
		Key:      &key,
	})
}

func (o *ossClient) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	oparts := make([]oss.UploadPart, len(parts))
	for i, p := range parts {
		oparts[i].PartNumber = int32(p.Num)
		oparts[i].ETag = &p.ETag
	}
	_, err := o.client.CompleteMultipartUpload(ctx, &oss.CompleteMultipartUploadRequest{
		Bucket:   &o.bucket,
		Key:      &key,
		UploadId: &uploadID,
		CompleteMultipartUpload: &oss.CompleteMultipartUpload{
			Parts: oparts,
		},
	})
	return err
}

func (o *ossClient) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	result, err := o.client.ListParts(ctx, &oss.ListPartsRequest{
		Bucket: &o.bucket,
		Key:    &marker,
	})
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Parts))
	for i, u := range result.Parts {
		parts[i] = &PendingPart{oss.ToString(result.Key), oss.ToString(result.UploadId), oss.ToTime(u.LastModified)}
	}
	return parts, string(result.NextPartNumberMarker), nil
}

func (o *ossClient) SetStorageClass(sc string) error {
	o.sc = sc
	return nil
}

func autoOSSEndpoint(bucketName string, provider credentials.CredentialsProvider) (string, error) {
	var err error
	regionID := ossDefaultRegionID
	if rid := os.Getenv("ALICLOUD_REGION_ID"); rid != "" {
		regionID = rid
	}
	config := oss.NewConfig()
	config.CredentialsProvider = provider
	config.Region = &regionID
	client := oss.NewClient(config)
	var info *oss.GetBucketInfoResult
	info, err = client.GetBucketInfo(ctx, &oss.GetBucketInfoRequest{
		Bucket: &bucketName,
	})
	if err != nil {
		return "", err
	}
	// try oss internal endpoint
	client2 := oss.NewClient(oss.NewConfig().
		WithEndpoint(oss.ToString(info.BucketInfo.IntranetEndpoint)).
		WithCredentialsProvider(provider).
		WithRegion(regionID))
	if _, err := client2.GetBucketInfo(ctx, &oss.GetBucketInfoRequest{Bucket: &bucketName}); err == nil {
		return "http://" + oss.ToString(info.BucketInfo.IntranetEndpoint), err
	}
	return "https://" + oss.ToString(info.BucketInfo.ExtranetEndpoint), nil
}

func newOSS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("invalid endpoint: %v, error: %v", endpoint, err)
	}
	hostParts := strings.SplitN(uri.Host, ".", 2)
	bucketName := hostParts[0]

	var domain string
	if len(hostParts) > 1 {
		domain = uri.Scheme + "://" + hostParts[1]
	}
	// try environment variable
	if accessKey == "" {
		accessKey = os.Getenv("ALICLOUD_ACCESS_KEY_ID")
		secretKey = os.Getenv("ALICLOUD_ACCESS_KEY_SECRET")
		token = os.Getenv("SECURITY_TOKEN")
	}
	var provider credentials.CredentialsProvider
	if accessKey == "" {
		// use default credential chain https://github.com/aliyun/credentials-go?tab=readme-ov-file#credential-provider-chain
		defaultCred, _ := openapicred.NewCredential(nil)
		provider = credentials.CredentialsProviderFunc(func(ctx context.Context) (credentials.Credentials, error) {
			// return the old certificate before its expiration and obtain a new certificate when the old certificate expires
			cred, err := defaultCred.GetCredential()
			if err != nil {
				return credentials.Credentials{}, err
			}
			return credentials.Credentials{
				AccessKeyID:     *cred.AccessKeyId,
				AccessKeySecret: *cred.AccessKeySecret,
				SecurityToken:   *cred.SecurityToken,
			}, nil
		})
	} else {
		provider = credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)
	}

	if domain == "" {
		if domain, err = autoOSSEndpoint(bucketName, provider); err != nil {
			return nil, fmt.Errorf("unable to get endpoint of bucket %s: %s", bucketName, err)
		}
		logger.Debugf("use endpoint %s", domain)
	}
	var regionID string
	var useV4 bool
	if regionID = os.Getenv("ALICLOUD_REGION_ID"); regionID == "" {
		index := strings.Index(domain, ".")
		if index <= 0 {
			return nil, fmt.Errorf("invalid endpoint: %q", domain)
		}
		if strings.HasSuffix(domain, ".aliyuncs.com") {
			if strings.Contains(domain, ".privatelink.") {
				// <id>.oss.<region>.privatelink.aliyuncs.com
				parts := strings.Split(domain, ".")
				if len(parts) < 3 {
					return nil, fmt.Errorf("invalid private link endpoint: %q", domain)
				}
				regionID = parts[2]
				useV4 = true
			} else {
				// oss-<region>.aliyuncs.com
				// oss-<region>-internal.aliyuncs.com
				old := strings.TrimPrefix(strings.TrimPrefix(domain[:index], "http://"), "https://")
				regionID = strings.TrimPrefix(old, "oss-")
				regionID = strings.TrimSuffix(regionID, "-internal")
				regionID = strings.TrimSuffix(regionID, "-vpc")
				useV4 = old != regionID
			}
		}
	}
	config := oss.LoadDefaultConfig()
	config.Endpoint = oss.Ptr(domain)
	if useV4 {
		config.WithSignatureVersion(oss.SignatureVersionV4)
		config.Region = oss.Ptr(regionID)
	} else {
		config.WithSignatureVersion(oss.SignatureVersionV1)
	}
	config.UsePathStyle = oss.Ptr(strings.Contains(domain, ".privatelink."))
	config.RetryMaxAttempts = oss.Ptr(1)
	config.ConnectTimeout = oss.Ptr(time.Second * 2)
	config.ReadWriteTimeout = oss.Ptr(time.Second * 5)
	enableChecksum := strings.EqualFold(uri.Query().Get("disable-checksum"), "false")
	if enableChecksum {
		logger.Infof("default CRC checksum is enabled")
	}
	config.DisableUploadCRC64Check = oss.Ptr(!enableChecksum)
	config.DisableDownloadCRC64Check = oss.Ptr(!enableChecksum)
	config.UserAgent = &UserAgent
	config.HttpClient = httpClient
	config.CredentialsProvider = provider
	client := oss.NewClient(config)
	o := &ossClient{client: client, bucket: bucketName}
	return o, nil
}

func init() {
	Register("oss", newOSS)
}


================================================
FILE: pkg/object/prefix.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"os"
	"time"
)

type withPrefix struct {
	os     ObjectStorage
	prefix string
}

// WithPrefix return an object storage that add a prefix to keys.
func WithPrefix(os ObjectStorage, prefix string) ObjectStorage {
	return &withPrefix{os, prefix}
}

func (s *withPrefix) SetStorageClass(sc string) error {
	if o, ok := s.os.(SupportStorageClass); ok {
		return o.SetStorageClass(sc)
	}
	return notSupported
}

func (s *withPrefix) Symlink(oldName, newName string) error {
	if w, ok := s.os.(SupportSymlink); ok {
		return w.Symlink(oldName, s.prefix+newName)
	}
	return notSupported
}

func (s *withPrefix) Readlink(name string) (string, error) {
	if w, ok := s.os.(SupportSymlink); ok {
		return w.Readlink(s.prefix + name)
	}
	return "", notSupported
}

func (p *withPrefix) String() string {
	return fmt.Sprintf("%s%s", p.os, p.prefix)
}

func (p *withPrefix) Limits() Limits {
	return p.os.Limits()
}

func (p *withPrefix) Create(ctx context.Context) error {
	return p.os.Create(ctx)
}

type withFile struct {
	File
	key string
}

func (f *withFile) Key() string { return f.key }

type withObj struct {
	Object
	key string
}

func (o *withObj) Key() string { return o.key }

func (p *withPrefix) updateKey(o Object) Object {
	key := o.Key()
	if len(key) < len(p.prefix) {
		return o
	}
	key = key[len(p.prefix):]
	switch po := o.(type) {
	case *obj:
		po.key = key
	case *file:
		po.key = key
	case File:
		o = &withFile{po, key}
	case Object:
		o = &withObj{po, key}
	}
	return o
}

func (p *withPrefix) Head(ctx context.Context, key string) (Object, error) {
	o, err := p.os.Head(ctx, p.prefix+key)
	if err != nil {
		return nil, err
	}
	return p.updateKey(o), nil
}

func (p *withPrefix) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	if off > 0 && limit < 0 {
		return nil, fmt.Errorf("invalid range: %d-%d", off, limit)
	}
	return p.os.Get(ctx, p.prefix+key, off, limit, getters...)
}

func (p *withPrefix) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	return p.os.Put(ctx, p.prefix+key, in, getters...)
}

func (p *withPrefix) Copy(ctx context.Context, dst, src string) error {
	return p.os.Copy(ctx, dst, src)
}

func (p *withPrefix) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	return p.os.Delete(ctx, p.prefix+key, getters...)
}

func (p *withPrefix) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if start != "" {
		start = p.prefix + start
	}
	objs, hasMore, nextMarker, err := p.os.List(ctx, p.prefix+prefix, start, token, delimiter, limit, followLink)
	for i, o := range objs {
		objs[i] = p.updateKey(o)
	}
	return objs, hasMore, nextMarker, err
}

func (p *withPrefix) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	if marker != "" {
		marker = p.prefix + marker
	}
	r, err := p.os.ListAll(ctx, p.prefix+prefix, marker, followLink)
	if err != nil {
		return r, err
	}
	r2 := make(chan Object, 10240)
	go func() {
		for o := range r {
			if o != nil && o.Key() != "" {
				o = p.updateKey(o)
			}
			r2 <- o
		}
		close(r2)
	}()
	return r2, nil
}

func (p *withPrefix) Chmod(path string, mode os.FileMode) error {
	if fs, ok := p.os.(FileSystem); ok {
		return fs.Chmod(p.prefix+path, mode)
	}
	return notSupported
}

func (p *withPrefix) Chown(path string, owner, group string) error {
	if fs, ok := p.os.(FileSystem); ok {
		return fs.Chown(p.prefix+path, owner, group)
	}
	return notSupported
}

func (p *withPrefix) Chtimes(key string, mtime time.Time) error {
	if fs, ok := p.os.(FileSystem); ok {
		return fs.Chtimes(p.prefix+key, mtime)
	}
	return notSupported
}

func (p *withPrefix) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	return p.os.CreateMultipartUpload(ctx, p.prefix+key)
}

func (p *withPrefix) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	return p.os.UploadPart(ctx, p.prefix+key, uploadID, num, body)
}

func (s *withPrefix) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	return s.os.UploadPartCopy(ctx, s.prefix+key, uploadID, num, s.prefix+srcKey, off, size)
}

func (p *withPrefix) AbortUpload(ctx context.Context, key string, uploadID string) {
	p.os.AbortUpload(ctx, p.prefix+key, uploadID)
}

func (p *withPrefix) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	return p.os.CompleteUpload(ctx, p.prefix+key, uploadID, parts)
}

func (p *withPrefix) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	parts, nextMarker, err := p.os.ListUploads(ctx, marker)
	for _, part := range parts {
		part.Key = part.Key[len(p.prefix):]
	}
	return parts, nextMarker, err
}

var _ ObjectStorage = (*withPrefix)(nil)

func IsFileSystem(object ObjectStorage) bool {
	if o, ok := object.(*withPrefix); ok {
		object = o.os
	}
	_, ok := object.(FileSystem)
	return ok
}


================================================
FILE: pkg/object/qingstor.go
================================================
//go:build !noqingstore
// +build !noqingstore

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/qingstor/qingstor-sdk-go/v4/config"
	"github.com/qingstor/qingstor-sdk-go/v4/request/errors"
	qs "github.com/qingstor/qingstor-sdk-go/v4/service"
)

type qingstor struct {
	bucket *qs.Bucket
	sc     string
}

func (q *qingstor) String() string {
	return fmt.Sprintf("qingstor://%s/", *q.bucket.Properties.BucketName)
}

func (q *qingstor) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              4 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (q *qingstor) Create(ctx context.Context) error {
	_, err := q.bucket.PutWithContext(ctx)
	if err != nil && strings.Contains(err.Error(), "bucket_already_exists") {
		err = nil
	}
	return err
}

func (q *qingstor) Head(ctx context.Context, key string) (Object, error) {
	r, err := q.bucket.HeadObjectWithContext(ctx, key, nil)
	if err != nil {
		if e, ok := err.(*errors.QingStorError); ok && e.StatusCode == http.StatusNotFound {
			return nil, os.ErrNotExist
		}
	}
	return &obj{
		key,
		*r.ContentLength,
		*r.LastModified,
		strings.HasSuffix(key, "/"),
		*r.XQSStorageClass,
	}, nil
}

func (q *qingstor) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	input := &qs.GetObjectInput{}
	rangeStr := getRange(off, limit)
	if rangeStr != "" {
		input.Range = &rangeStr
	}
	output, err := q.bucket.GetObjectWithContext(ctx, key, input)
	if output != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(aws.ToString(output.RequestID))
		if output.XQSStorageClass != nil {
			attrs.SetStorageClass(*output.XQSStorageClass)
		}
	}
	if err != nil {
		return nil, err
	}
	if err = checkGetStatus(*output.StatusCode, rangeStr != ""); err != nil {
		_ = output.Body.Close()
		return nil, err
	}
	return output.Body, nil
}

func findLen(in io.Reader) (io.Reader, int64, error) {
	var vlen int64
	switch v := in.(type) {
	case *bytes.Buffer:
		vlen = int64(v.Len())
	case *bytes.Reader:
		vlen = int64(v.Len())
	case *strings.Reader:
		vlen = int64(v.Len())
	case *os.File:
		st, err := v.Stat()
		if err != nil {
			return nil, 0, err
		}
		vlen = st.Size()
	case io.ReadSeeker:
		var err error
		vlen, err = v.Seek(0, 2)
		if err != nil {
			return nil, 0, err
		}
		if _, err = v.Seek(0, 0); err != nil {
			return nil, 0, err
		}
	default:
		d, err := io.ReadAll(in)
		if err != nil {
			return nil, 0, err
		}
		vlen = int64(len(d))
		in = bytes.NewBuffer(d)
	}
	return in, vlen, nil
}

func (q *qingstor) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	body, vlen, err := findLen(in)
	if err != nil {
		return err
	}
	mimeType := utils.GuessMimeType(key)
	input := &qs.PutObjectInput{
		Body:          body,
		ContentLength: &vlen,
		ContentType:   &mimeType,
	}
	if q.sc != "" {
		input.XQSStorageClass = &q.sc
	}
	out, err := q.bucket.PutObjectWithContext(ctx, key, input)
	if out != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(aws.ToString(out.RequestID)).SetStorageClass(q.sc)
	}
	if err != nil {
		return err
	}
	if *out.StatusCode != 201 {
		return fmt.Errorf("unexpected code: %d", *out.StatusCode)
	}
	return nil
}

func (q *qingstor) Copy(ctx context.Context, dst, src string) error {
	source := fmt.Sprintf("/%s/%s", *q.bucket.Properties.BucketName, src)
	input := &qs.PutObjectInput{
		XQSCopySource: &source,
	}
	if q.sc != "" {
		input.XQSStorageClass = &q.sc
	}
	out, err := q.bucket.PutObjectWithContext(ctx, dst, input)
	if err != nil {
		return err
	}
	if *out.StatusCode != 201 {
		return fmt.Errorf("unexpected code: %d", *out.StatusCode)
	}
	return nil
}

func (q *qingstor) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	output, err := q.bucket.DeleteObjectWithContext(ctx, key)
	if output != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(aws.ToString(output.RequestID))
	}
	return err
}

func (q *qingstor) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}
	limit_ := int(limit)
	input := &qs.ListObjectsInput{
		Prefix: &prefix,
		Marker: &start,
		Limit:  &limit_,
	}
	if delimiter != "" {
		input.Delimiter = &delimiter
	}
	out, err := q.bucket.ListObjectsWithContext(ctx, input)
	if err != nil {
		return nil, false, "", err
	}
	n := len(out.Keys)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		k := out.Keys[i]
		objs[i] = &obj{
			*k.Key,
			*k.Size,
			time.Unix(int64(*k.Modified), 0),
			strings.HasSuffix(*k.Key, "/"),
			*k.StorageClass,
		}
	}
	if delimiter != "" {
		for _, p := range out.CommonPrefixes {
			objs = append(objs, &obj{*p, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, *out.HasMore, *out.NextMarker, nil
}

func (q *qingstor) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (q *qingstor) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	var input qs.InitiateMultipartUploadInput
	if q.sc != "" {
		input.XQSStorageClass = &q.sc
	}
	r, err := q.bucket.InitiateMultipartUploadWithContext(ctx, key, &input)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: *r.UploadID, MinPartSize: 4 << 20, MaxCount: 10000}, nil
}

func (q *qingstor) UploadPart(ctx context.Context, key string, uploadID string, num int, data []byte) (*Part, error) {
	input := &qs.UploadMultipartInput{
		UploadID:   &uploadID,
		PartNumber: &num,
		Body:       bytes.NewReader(data),
	}
	r, err := q.bucket.UploadMultipartWithContext(ctx, key, input)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, Size: len(data), ETag: strings.Trim(*r.ETag, "\"")}, nil
}

func (q *qingstor) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	input := &qs.UploadMultipartInput{
		UploadID:      &uploadID,
		PartNumber:    &num,
		XQSCopySource: aws.String(fmt.Sprintf("/%s/%s", *q.bucket.Properties.BucketName, srcKey)),
		XQSCopyRange:  aws.String(fmt.Sprintf("bytes=%d-%d", off, off+size-1)),
	}
	r, err := q.bucket.UploadMultipartWithContext(ctx, key, input)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, Size: int(size), ETag: strings.Trim(*r.ETag, "\"")}, nil
}

func (q *qingstor) AbortUpload(ctx context.Context, key string, uploadID string) {
	input := &qs.AbortMultipartUploadInput{
		UploadID: &uploadID,
	}
	_, _ = q.bucket.AbortMultipartUploadWithContext(ctx, key, input)
}

func (q *qingstor) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	oparts := make([]*qs.ObjectPartType, len(parts))
	for i := range parts {
		oparts[i] = &qs.ObjectPartType{
			PartNumber: &parts[i].Num,
			Etag:       &parts[i].ETag,
		}
	}
	input := &qs.CompleteMultipartUploadInput{
		UploadID:    &uploadID,
		ObjectParts: oparts,
	}
	_, err := q.bucket.CompleteMultipartUploadWithContext(ctx, key, input)
	return err
}

func (q *qingstor) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	input := &qs.ListMultipartUploadsInput{
		KeyMarker: &marker,
	}
	result, err := q.bucket.ListMultipartUploadsWithContext(ctx, input)
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{*u.Key, *u.UploadID, *u.Created}
	}
	var nextMarker string
	if result.NextKeyMarker != nil {
		nextMarker = *result.NextKeyMarker
	}
	return parts, nextMarker, nil
}

func (q *qingstor) SetStorageClass(sc string) error {
	q.sc = sc
	return nil
}

func newQingStor(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err)
	}
	var bucketName, zone, host string
	if !strings.HasSuffix(uri.Host, "qingstor.com") {
		// support private cloud
		hostParts := strings.SplitN(uri.Host, ".", 2)
		bucketName, zone, host = hostParts[0], "", hostParts[1]
	} else {
		hostParts := strings.SplitN(uri.Host, ".", 3)
		bucketName, zone, host = hostParts[0], hostParts[1], hostParts[2]
	}
	conf, err := config.New(accessKey, secretKey)
	if err != nil {
		return nil, fmt.Errorf("Can't load config: %s", err.Error())
	}
	conf.Host = host
	conf.Protocol = uri.Scheme
	if uri.Scheme == "http" {
		conf.Port = 80
	} else {
		conf.Port = 443
	}
	conf.Connection = httpClient
	qsService, _ := qs.Init(conf)
	bucket, _ := qsService.Bucket(bucketName, zone)
	return &qingstor{bucket: bucket}, nil
}

func init() {
	Register("qingstor", newQingStor)
}


================================================
FILE: pkg/object/qiniu.go
================================================
//go:build !noqiniu && !nos3
// +build !noqiniu,!nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
	"github.com/qiniu/go-sdk/v7/auth"
	"github.com/qiniu/go-sdk/v7/storage"
)

type qiniu struct {
	s3client
	bm     *storage.BucketManager
	cred   *auth.Credentials
	cfg    *storage.Config
	marker string
}

func (q *qiniu) String() string {
	return fmt.Sprintf("qiniu://%s/", q.bucket)
}

func (q *qiniu) SetStorageClass(_ string) error {
	return notSupported
}

func (q *qiniu) Limits() Limits {
	return Limits{}
}

func (q *qiniu) download(key string, off, limit int64) (io.ReadCloser, error) {
	deadline := time.Now().Add(time.Second * 3600).Unix()
	url := storage.MakePrivateURL(q.cred, os.Getenv("QINIU_DOMAIN"), key, deadline)
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return nil, err
	}
	now := time.Now().UTC().Format(http.TimeFormat)
	req.Header.Add("Date", now)
	if off > 0 || limit > 0 {
		if limit > 0 {
			req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", off, off+limit-1))
		} else {
			req.Header.Add("Range", fmt.Sprintf("bytes=%d-", off))
		}
	}
	resp, err := httpClient.Do(req)
	if err != nil {
		return nil, err
	}
	if resp.StatusCode != 200 && resp.StatusCode != 206 {
		return nil, fmt.Errorf("Status code: %d", resp.StatusCode)
	}
	return resp.Body, nil
}

var notexist = "no such file or directory"

func (q *qiniu) Head(ctx context.Context, key string) (Object, error) {
	r, err := q.bm.Stat(q.bucket, key)
	if err != nil {
		if strings.Contains(err.Error(), notexist) {
			err = os.ErrNotExist
		}
		return nil, err
	}

	mtime := time.Unix(0, r.PutTime*100)
	return &obj{
		key,
		r.Fsize,
		mtime,
		strings.HasSuffix(key, "/"),
		"",
	}, nil
}

func (q *qiniu) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	if strings.HasPrefix(key, "/") && os.Getenv("QINIU_DOMAIN") != "" {
		return q.download(key, off, limit)
	}
	return q.s3client.Get(ctx, key, off, limit, getters...)
}

func (q *qiniu) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	body, vlen, err := findLen(in)
	if err != nil {
		return err
	}
	putPolicy := storage.PutPolicy{Scope: q.bucket + ":" + key}
	upToken := putPolicy.UploadToken(q.cred)
	formUploader := storage.NewFormUploader(q.cfg)
	var ret storage.PutRet
	return formUploader.Put(ctx, &ret, upToken, key, body, vlen, nil)
}

func (q *qiniu) Copy(ctx context.Context, dst, src string) error {
	return q.bm.Copy(q.bucket, src, q.bucket, dst, true)
}

func (q *qiniu) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	return nil, notSupported
}

func (q *qiniu) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	err := q.bm.Delete(q.bucket, key)
	if err != nil && strings.Contains(err.Error(), notexist) {
		return nil
	}
	return err
}

func (q *qiniu) List(ctx context.Context, prefix, startAfter, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}
	entries, prefixes, markerOut, hasNext, err := q.bm.ListFiles(q.bucket, prefix, delimiter, token, int(limit))
	if len(entries) > 0 || err == io.EOF {
		// ignore error if returned something
		err = nil
	}
	if err != nil {
		return nil, false, "", err
	}
	n := len(entries)
	objs := make([]Object, 0, n)
	for i := 0; i < n; i++ {
		entry := entries[i]
		if entry.Key <= startAfter {
			continue
		}
		mtime := entry.PutTime / 10000000
		objs = append(objs, &obj{entry.Key, entry.Fsize, time.Unix(mtime, 0), strings.HasSuffix(entry.Key, "/"), ""})
	}
	if delimiter != "" {
		for _, p := range prefixes {
			if p <= startAfter {
				continue
			}
			objs = append(objs, &obj{p, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	if len(objs) == 0 {
		hasNext = false
		markerOut = ""
	}
	return objs, hasNext, markerOut, nil
}

func newQiniu(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint: %v, error: %v", endpoint, err)
	}
	hostParts := strings.SplitN(uri.Host, ".", 2)
	bucket := hostParts[0]
	endpoint = hostParts[1]
	var region string
	if strings.HasPrefix(endpoint, "s3") {
		// private region
		region = endpoint[strings.Index(endpoint, "-")+1 : strings.Index(endpoint, ".")]
	} else if strings.HasPrefix(endpoint, "qvm-") {
		region = "cn-east-1" // internal
	} else if strings.HasPrefix(endpoint, "qvm-z1") {
		region = "cn-north-1"
	} else {
		region = endpoint[:strings.LastIndex(endpoint, "-")]
	}

	awsCfg, err := config.LoadDefaultConfig(ctx,
		config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(awsCfg, func(options *s3.Options) {
		options.BaseEndpoint = aws.String(uri.Scheme + "://" + endpoint)
		options.Region = region
		options.EndpointOptions.DisableHTTPS = uri.Scheme == "http"
		options.UsePathStyle = true
		options.HTTPClient = httpClient
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})
	s3c := s3client{bucket: bucket, s3: client, region: region}
	cfg := storage.Config{
		UseHTTPS: uri.Scheme == "https",
	}
	zone, err := storage.GetZone(accessKey, bucket)
	if err != nil {
		domain := strings.SplitN(endpoint, "-", 2)[1]
		zone = &storage.Zone{
			RsHost:     "rs-" + domain,
			RsfHost:    "rsf-" + domain,
			ApiHost:    "api-" + domain,
			IovipHost:  "io-" + domain,
			SrcUpHosts: []string{"up-" + domain},
		}
	} else if strings.HasPrefix(endpoint, "qvm-z1") {
		zone.SrcUpHosts = []string{"free-qvm-z1-zz.qiniup.com"}
	} else if strings.HasPrefix(endpoint, "qvm-") {
		zone.SrcUpHosts = []string{"free-qvm-z0-xs.qiniup.com"}
	}
	cfg.Zone = zone
	cred := auth.New(accessKey, secretKey)
	bucketManager := storage.NewBucketManager(cred, &cfg)
	return &qiniu{s3c, bucketManager, cred, &cfg, ""}, nil
}

func init() {
	Register("qiniu", newQiniu)
}


================================================
FILE: pkg/object/redis.go
================================================
//go:build !noredis
// +build !noredis

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net"
	"net/url"
	"os"
	"sort"
	"strings"
	"sync"
	"time"

	"github.com/redis/go-redis/v9"
)

// redisStore stores data chunks into Redis.
type redisStore struct {
	DefaultObjectStorage
	rdb redis.UniversalClient
	uri string
}

func (r *redisStore) String() string {
	return r.uri + "/"
}

func (r *redisStore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	data, err := r.rdb.Get(ctx, key).Bytes()
	if err != nil {
		return nil, err
	}
	if off > int64(len(data)) {
		off = int64(len(data))
	}
	data = data[off:]
	if limit > 0 && limit < int64(len(data)) {
		data = data[:limit]
	}
	return io.NopCloser(bytes.NewBuffer(data)), nil
}

func (r *redisStore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	data, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	return r.rdb.Set(ctx, key, data, 0).Err()
}

func (r *redisStore) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	return r.rdb.Del(ctx, key).Err()
}

func (t *redisStore) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	var scanCli []redis.UniversalClient
	var m sync.Mutex
	if c, ok := t.rdb.(*redis.ClusterClient); ok {
		err := c.ForEachMaster(ctx, func(ctx context.Context, client *redis.Client) error {
			m.Lock()
			defer m.Unlock()
			scanCli = append(scanCli, client)
			return nil
		})
		if err != nil {
			return nil, err
		}
	} else {
		scanCli = append(scanCli, t.rdb)
	}
	batch := 1000
	var objs = make(chan Object, batch)
	var keyList []string
	var cursor uint64
	for _, mCli := range scanCli {
		for {
			// FIXME: this will be really slow for many objects
			keys, c, err := mCli.Scan(ctx, cursor, prefix+"*", int64(batch)).Result()
			if err != nil {
				logger.Warnf("redis scan error, coursor %d: %s", cursor, err)
				return nil, err
			}
			for _, key := range keys {
				if key > marker {
					keyList = append(keyList, key)
				}
			}
			if c == 0 {
				break
			}
			cursor = c
		}
	}
	sort.Strings(keyList)

	go func() {
		defer close(objs)
		lKeyList := len(keyList)
		for start := 0; start < lKeyList; start += batch {
			end := start + batch
			if end > lKeyList {
				end = lKeyList
			}

			p := t.rdb.Pipeline()
			for _, key := range keyList[start:end] {
				p.StrLen(ctx, key)
			}
			cmds, err := p.Exec(ctx)
			if err != nil {
				objs <- nil
				return
			}

			now := time.Now()
			for idx, cmd := range cmds {
				if intCmd, ok := cmd.(*redis.IntCmd); ok {
					size, err := intCmd.Result()
					if err != nil {
						objs <- nil
						return
					}
					if size == 0 {
						exist, err := t.rdb.Exists(ctx, keyList[start:end][idx]).Result()
						if err != nil {
							objs <- nil
							return
						}
						if exist == 0 {
							continue
						}
					}
					// FIXME: mtime
					objs <- &obj{keyList[start:end][idx], size, now, strings.HasSuffix(keyList[start:end][idx], "/"), ""}
				}
			}
		}
	}()
	return objs, nil
}

func (t *redisStore) Head(ctx context.Context, key string) (Object, error) {
	data, err := t.rdb.Get(ctx, key).Bytes()
	if err != nil {
		if err == redis.Nil {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		key,
		int64(len(data)),
		time.Now(),
		strings.HasSuffix(key, "/"),
		"",
	}, err
}

func newRedis(uri, user, passwd, token string) (ObjectStorage, error) {
	u, err := url.Parse(uri)
	if err != nil {
		return nil, fmt.Errorf("url parse %s: %s", uri, err)
	}
	hosts := u.Host
	opt, err := redis.ParseURL(u.String())
	if err != nil {
		return nil, fmt.Errorf("redis parse %s: %s", uri, err)
	}
	if user != "" {
		opt.Username = user
	}
	if passwd != "" {
		opt.Password = passwd
	}
	if opt.MaxRetries == 0 {
		opt.MaxRetries = -1 // Redis use -1 to disable retries
	}
	var rdb redis.UniversalClient
	if strings.Contains(hosts, ",") && strings.Index(hosts, ",") < strings.Index(hosts, ":") {
		var fopt redis.FailoverOptions
		ps := strings.Split(hosts, ",")
		fopt.MasterName = ps[0]
		fopt.SentinelAddrs = ps[1:]
		_, port, _ := net.SplitHostPort(fopt.SentinelAddrs[len(fopt.SentinelAddrs)-1])
		if port == "" {
			port = "26379"
		}
		for i, addr := range fopt.SentinelAddrs {
			h, p, e := net.SplitHostPort(addr)
			if e != nil {
				fopt.SentinelAddrs[i] = net.JoinHostPort(addr, port)
			} else if p == "" {
				fopt.SentinelAddrs[i] = net.JoinHostPort(h, port)
			}
		}
		fopt.SentinelPassword = os.Getenv("SENTINEL_PASSWORD_FOR_OBJ")
		fopt.DB = opt.DB
		fopt.Username = opt.Username
		fopt.Password = opt.Password
		fopt.TLSConfig = opt.TLSConfig
		fopt.MaxRetries = opt.MaxRetries
		fopt.MinRetryBackoff = opt.MinRetryBackoff
		fopt.MaxRetryBackoff = opt.MaxRetryBackoff
		fopt.ReadTimeout = opt.ReadTimeout
		fopt.WriteTimeout = opt.WriteTimeout
		rdb = redis.NewFailoverClient(&fopt)
	} else {
		if !strings.Contains(hosts, ",") {
			c := redis.NewClient(opt)
			info, err := c.ClusterInfo(context.Background()).Result()
			if err != nil && strings.Contains(err.Error(), "cluster mode") || err == nil && strings.Contains(info, "cluster_state:") {
				logger.Infof("redis %s is in cluster mode", hosts)
			} else {
				rdb = c
			}
		}
		if rdb == nil {
			var copt redis.ClusterOptions
			copt.Addrs = strings.Split(hosts, ",")
			copt.MaxRedirects = 1
			copt.Username = opt.Username
			copt.Password = opt.Password
			copt.TLSConfig = opt.TLSConfig
			copt.MaxRetries = opt.MaxRetries
			copt.MinRetryBackoff = opt.MinRetryBackoff
			copt.MaxRetryBackoff = opt.MaxRetryBackoff
			copt.ReadTimeout = opt.ReadTimeout
			copt.WriteTimeout = opt.WriteTimeout
			rdb = redis.NewClusterClient(&copt)
		}
	}
	u.User = new(url.Userinfo)
	return &redisStore{DefaultObjectStorage{}, rdb, u.String()}, nil
}

func init() {
	Register("redis", newRedis)
}


================================================
FILE: pkg/object/response_attrs.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

const DefaultStorageClass = "STANDARD"

type SupportStorageClass interface {
	SetStorageClass(sc string) error
}

// A generic way to get attributes from different object storage clients
type ResponseAttrs struct {
	storageClass *string
	requestID    *string
	requestSize  *int64
	// other interested attrs can be added here
}

func (r *ResponseAttrs) SetRequestID(id string) *ResponseAttrs {
	if r.requestID != nil { // Will be nil if caller is not interested in this attribute
		*r.requestID = id
	}
	return r
}

func (r *ResponseAttrs) SetStorageClass(sc string) *ResponseAttrs {
	if r.storageClass != nil && sc != "" { // Don't overwrite default storage class
		*r.storageClass = sc
	}
	return r
}

func (r *ResponseAttrs) GetRequestSize() int64 {
	if r.requestSize != nil {
		return *r.requestSize
	}
	return -1
}

type AttrGetter func(attrs *ResponseAttrs)

func WithRequestID(id *string) AttrGetter {
	return func(attrs *ResponseAttrs) {
		attrs.requestID = id
	}
}

func WithStorageClass(sc *string) AttrGetter {
	return func(attrs *ResponseAttrs) {
		attrs.storageClass = sc
	}
}

func WithRequestSize(size *int64) AttrGetter {
	return func(attrs *ResponseAttrs) {
		attrs.requestSize = size
	}
}

func ApplyGetters(getters ...AttrGetter) ResponseAttrs {
	var attrs ResponseAttrs
	for _, getter := range getters {
		getter(&attrs)
	}
	return attrs
}


================================================
FILE: pkg/object/response_attrs_test.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

const reqIDExample = "c30c0107cd3a073f6607cd3a-ac103aa8-1rqU4w-PuO-cs-tos-front-azc-2"

func apiCall(getters ...AttrGetter) {
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass("STANDARD")
	attrs.SetRequestID(reqIDExample)
	return
}

func Test_api_call(t *testing.T) {
	var reqID, sc string

	apiCall(WithRequestID(&reqID), WithStorageClass(&sc))
	assert.Equalf(t, reqIDExample, reqID, "expected %q, got %q", reqIDExample, reqID)
	assert.Equalf(t, "STANDARD", sc, "expected %q, got %q", "STANDARD", sc)

	attrs := ApplyGetters(WithStorageClass(&sc))
	attrs.SetStorageClass("") // Won't overwrite by empty string
	assert.Equalf(t, "STANDARD", sc, "expected %q, got %q", "STANDARD", sc)
}


================================================
FILE: pkg/object/restful.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/tls"
	"errors"
	"fmt"
	"io"
	"math/rand"
	"net"
	"net/http"
	"os"
	"strings"
	"time"

	"github.com/viki-org/dnscache"
)

var resolver = dnscache.New(time.Minute)
var httpClient *http.Client

func splitIPsByVersion(ips []net.IP) ([]net.IP, []net.IP) {
	ipv6 := make([]net.IP, 0, len(ips))
	ipv4 := make([]net.IP, 0, len(ips))
	for _, ip := range ips {
		if ip.To4() == nil {
			ipv6 = append(ipv6, ip)
		} else {
			ipv4 = append(ipv4, ip)
		}
	}
	return ipv6, ipv4
}

// dialParallel is adapted from the Go standard library.
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
func dialParallel(ctx context.Context, dialer *net.Dialer, network string, primaries, fallbacks []net.IP, port string) (net.Conn, error) {
	if len(fallbacks) == 0 {
		return dialRandom(ctx, dialer, network, primaries, port)
	}

	returned := make(chan struct{})
	defer close(returned)

	type dialResult struct {
		net.Conn
		error
		primary bool
		done    bool
	}
	results := make(chan dialResult) // unbuffered

	startRacer := func(ctx context.Context, primary bool) {
		ras := primaries
		if !primary {
			ras = fallbacks
		}
		c, err := dialRandom(ctx, dialer, network, ras, port)
		select {
		case results <- dialResult{Conn: c, error: err, primary: primary, done: true}:
		case <-returned:
			if c != nil {
				c.Close()
			}
		}
	}

	var primary, fallback dialResult

	// Start the main racer.
	primaryCtx, primaryCancel := context.WithCancel(ctx)
	defer primaryCancel()
	go startRacer(primaryCtx, true)

	// Start the timer for the fallback racer.
	fallbackTimer := time.NewTimer(300 * time.Millisecond)
	defer fallbackTimer.Stop()

	for {
		select {
		case <-fallbackTimer.C:
			fallbackCtx, fallbackCancel := context.WithCancel(ctx)
			defer fallbackCancel()
			go startRacer(fallbackCtx, false)

		case res := <-results:
			if res.error == nil {
				return res.Conn, nil
			}
			if res.primary {
				primary = res
			} else {
				fallback = res
			}
			if primary.done && fallback.done {
				return nil, errors.Join(primary.error, fallback.error)
			}
			if res.primary && fallbackTimer.Stop() {
				// If we were able to stop the timer, that means it
				// was running (hadn't yet started the fallback), but
				// we just got an error on the primary path, so start
				// the fallback immediately (in 0 nanoseconds).
				fallbackTimer.Reset(0)
			}
		}
	}
}

func dialRandom(ctx context.Context, dialer *net.Dialer, network string, ips []net.IP, port string) (net.Conn, error) {
	var lastErr error
	n := len(ips)
	if n == 0 {
		return nil, fmt.Errorf("no addresses to dial")
	}
	first := rand.Intn(n)
	for i := 0; i < n; i++ {
		ip := ips[(first+i)%n]
		select {
		case <-ctx.Done():
			return nil, ctx.Err()
		default:
		}
		conn, err := dialer.DialContext(ctx, network, net.JoinHostPort(ip.String(), port))
		if err == nil {
			return conn, nil
		}
		lastErr = err
	}
	return nil, lastErr
}

func init() {
	dialer := &net.Dialer{Timeout: time.Second * 10}
	httpClient = &http.Client{
		Transport: &http.Transport{
			Proxy:                 http.ProxyFromEnvironment,
			TLSHandshakeTimeout:   time.Second * 20,
			ResponseHeaderTimeout: time.Second * 30,
			IdleConnTimeout:       time.Second * 300,
			MaxIdleConnsPerHost:   500,
			ReadBufferSize:        32 << 10,
			WriteBufferSize:       32 << 10,
			DialContext: func(ctx context.Context, network string, address string) (net.Conn, error) {
				host, port, err := net.SplitHostPort(address)
				if err != nil {
					return nil, err
				}
				if ip := net.ParseIP(host); ip != nil {
					return dialer.DialContext(ctx, network, net.JoinHostPort(ip.String(), port))
				}
				ips, err := resolver.Fetch(host)
				if err != nil {
					return nil, err
				}
				if len(ips) == 0 {
					return nil, &net.DNSError{Err: "no such host", Name: host, IsNotFound: true}
				}
				ipv6, ipv4 := splitIPsByVersion(ips)
				return dialParallel(ctx, dialer, network, ipv6, ipv4, port)
			},
			DisableCompression: true,
			TLSClientConfig:    &tls.Config{},
		},
		Timeout: time.Hour,
	}
}

func GetHttpClient() *http.Client {
	return httpClient
}

func cleanup(response *http.Response) {
	if response != nil && response.Body != nil {
		_, _ = io.Copy(io.Discard, response.Body)
		_ = response.Body.Close()
	}
}

type RestfulStorage struct {
	DefaultObjectStorage
	endpoint  string
	accessKey string
	secretKey string
	signName  string
	signer    func(*http.Request, string, string, string)
}

func (s *RestfulStorage) String() string {
	return s.endpoint
}

var HEADER_NAMES = []string{"Content-MD5", "Content-Type", "Date"}

func (s *RestfulStorage) request(ctx context.Context, method, key string, body io.Reader, headers map[string]string) (*http.Response, error) {
	uri := s.endpoint + "/" + key
	req, err := http.NewRequestWithContext(ctx, method, uri, body)
	if err != nil {
		return nil, err
	}
	if f, ok := body.(*os.File); ok {
		st, err := f.Stat()
		if err == nil {
			req.ContentLength = st.Size()
		}
	}
	now := time.Now().UTC().Format(http.TimeFormat)
	req.Header.Add("Date", now)
	for key := range headers {
		req.Header.Add(key, headers[key])
	}
	s.signer(req, s.accessKey, s.secretKey, s.signName)
	return httpClient.Do(req)
}

func parseError(resp *http.Response) error {
	data, err := io.ReadAll(resp.Body)
	if err != nil {
		return fmt.Errorf("request failed: %s", err)
	}
	return fmt.Errorf("status: %v, message: %s", resp.StatusCode, string(data))
}

func (s *RestfulStorage) Head(ctx context.Context, key string) (Object, error) {
	resp, err := s.request(ctx, "HEAD", key, nil, nil)
	if err != nil {
		return nil, err
	}
	if resp.StatusCode == http.StatusNotFound {
		return nil, os.ErrNotExist
	}
	defer cleanup(resp)
	if resp.StatusCode != 200 {
		return nil, parseError(resp)
	}

	lastModified := resp.Header.Get("Last-Modified")
	if lastModified == "" {
		return nil, fmt.Errorf("cannot get last modified time")
	}
	mtime, _ := time.Parse(time.RFC1123, lastModified)
	return &obj{
		key,
		resp.ContentLength,
		mtime,
		strings.HasSuffix(key, "/"),
		"",
	}, nil
}

func getRange(off, limit int64) string {
	if off > 0 || limit > 0 {
		if limit > 0 {
			return fmt.Sprintf("bytes=%d-%d", off, off+limit-1)
		} else {
			return fmt.Sprintf("bytes=%d-", off)
		}
	}
	return ""
}

func checkGetStatus(statusCode int, partial bool) error {
	var expected = http.StatusOK
	if partial {
		expected = http.StatusPartialContent
	}
	if statusCode != expected {
		return fmt.Errorf("expected status code %d, but got %d", expected, statusCode)
	}
	return nil
}

func (s *RestfulStorage) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	headers := make(map[string]string)
	if off > 0 || limit > 0 {
		headers["Range"] = getRange(off, limit)
	}
	resp, err := s.request(ctx, "GET", key, nil, headers)
	if err != nil {
		return nil, err
	}
	if resp.StatusCode != 200 && resp.StatusCode != 206 {
		return nil, parseError(resp)
	}
	if err = checkGetStatus(resp.StatusCode, len(headers) > 0); err != nil {
		_ = resp.Body.Close()
		return nil, err
	}
	return resp.Body, nil
}

func (u *RestfulStorage) Put(ctx context.Context, key string, body io.Reader, getters ...AttrGetter) error {
	resp, err := u.request(ctx, "PUT", key, body, nil)
	if err != nil {
		return err
	}
	defer cleanup(resp)
	if resp.StatusCode != 201 && resp.StatusCode != 200 {
		return parseError(resp)
	}
	return nil
}

func (s *RestfulStorage) Copy(ctx context.Context, dst, src string) error {
	in, err := s.Get(ctx, src, 0, -1)
	if err != nil {
		return err
	}
	defer in.Close()
	d, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	return s.Put(ctx, dst, bytes.NewReader(d))
}

func (s *RestfulStorage) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	resp, err := s.request(ctx, "DELETE", key, nil, nil)
	if err != nil {
		return err
	}
	defer cleanup(resp)
	if resp.StatusCode != 204 && resp.StatusCode != 404 {
		return parseError(resp)
	}
	return nil
}

func (s *RestfulStorage) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	return nil, false, "", notSupported
}

var _ ObjectStorage = (*RestfulStorage)(nil)


================================================
FILE: pkg/object/restful_test.go
================================================
/*
 * JuiceFS, Copyright 2026 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"net"
	"testing"
	"time"
)

// startTCPListener starts a TCP listener on the given address and returns it.
// The listener accepts connections in background and immediately closes them.
func startTCPListener(t *testing.T, addr string) net.Listener {
	t.Helper()
	ln, err := net.Listen("tcp", addr)
	if err != nil {
		t.Fatalf("failed to listen on %s: %v", addr, err)
	}
	go func() {
		for {
			c, err := ln.Accept()
			if err != nil {
				return
			}
			c.Close()
		}
	}()
	return ln
}

func getPort(t *testing.T, ln net.Listener) string {
	t.Helper()
	_, port, err := net.SplitHostPort(ln.Addr().String())
	if err != nil {
		t.Fatal(err)
	}
	return port
}

func TestDialParallel_OnlyPrimaries(t *testing.T) {
	ln := startTCPListener(t, "127.0.0.1:0")
	defer ln.Close()
	port := getPort(t, ln)

	dialer := &net.Dialer{Timeout: 2 * time.Second}
	conn, err := dialParallel(context.Background(), dialer, "tcp",
		[]net.IP{net.ParseIP("127.0.0.1")}, nil, port)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	conn.Close()
}

func TestDialParallel_OnlyFallbacks(t *testing.T) {
	// Bug reproduced: empty primaries should not panic
	ln := startTCPListener(t, "127.0.0.1:0")
	defer ln.Close()
	port := getPort(t, ln)

	dialer := &net.Dialer{Timeout: 2 * time.Second}
	conn, err := dialParallel(context.Background(), dialer, "tcp",
		nil, []net.IP{net.ParseIP("127.0.0.1")}, port)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	conn.Close()
}

func TestDialParallel_PrimaryFailsFast_FallbackSucceeds(t *testing.T) {
	// Primary (IPv6 ::1) has no listener → fails fast (connection refused)
	// Fallback (127.0.0.1) has a listener → succeeds
	ln := startTCPListener(t, "127.0.0.1:0")
	defer ln.Close()
	port := getPort(t, ln)

	dialer := &net.Dialer{Timeout: 2 * time.Second}
	conn, err := dialParallel(context.Background(), dialer, "tcp",
		[]net.IP{net.ParseIP("::1")},       // primary - will fail (no listener on ::1:port)
		[]net.IP{net.ParseIP("127.0.0.1")}, // fallback - has listener
		port)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	conn.Close()
}

func TestDialParallel_BothFail(t *testing.T) {
	dialer := &net.Dialer{Timeout: 500 * time.Millisecond}
	_, err := dialParallel(context.Background(), dialer, "tcp",
		[]net.IP{net.ParseIP("::1")},
		[]net.IP{net.ParseIP("127.0.0.1")},
		"0")
	if err == nil {
		t.Fatal("expected error when both groups fail, got nil")
	}
}

func TestSplitIPsByVersion(t *testing.T) {
	ips := []net.IP{
		net.ParseIP("127.0.0.1"),
		net.ParseIP("::1"),
		net.ParseIP("10.0.0.1"),
		net.ParseIP("fe80::1"),
	}
	v6, v4 := splitIPsByVersion(ips)
	if len(v6) != 2 {
		t.Errorf("expected 2 IPv6, got %d", len(v6))
	}
	if len(v4) != 2 {
		t.Errorf("expected 2 IPv4, got %d", len(v4))
	}
}


================================================
FILE: pkg/object/s3.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/url"
	"os"
	"regexp"
	"sort"
	"strings"
	"time"

	"github.com/aws/aws-sdk-go-v2/aws"
	"github.com/aws/aws-sdk-go-v2/aws/middleware"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	"github.com/aws/aws-sdk-go-v2/service/s3/types"
	"github.com/aws/smithy-go"
	smithymiddleware "github.com/aws/smithy-go/middleware"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
)

const awsDefaultRegion = "us-east-1"
const s3RequestIDKey = "X-Amz-Request-Id"

type s3client struct {
	s3              *s3.Client
	bucket          string
	region          string
	sc              string
	disableChecksum bool
}

func (s *s3client) String() string {
	if s.s3.Options().BaseEndpoint != nil {
		endpoint := *s.s3.Options().BaseEndpoint
		if idx := strings.Index(endpoint, "://"); idx >= 0 {
			endpoint = endpoint[idx+3:]
		}
		return fmt.Sprintf("s3://%s/%s/", endpoint, s.bucket)
	}
	return fmt.Sprintf("s3://%s/", s.bucket)
}

func (s *s3client) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              5 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func isExists(err error) bool {
	msg := err.Error()
	return strings.Contains(msg, "BucketAlreadyExists") || strings.Contains(msg, "BucketAlreadyOwnedByYou")
}

func (s *s3client) Create(ctx context.Context) error {
	if _, _, _, err := s.List(ctx, "", "", "", "", 1, true); err == nil {
		return nil
	}
	_, err := s.s3.CreateBucket(ctx, &s3.CreateBucketInput{
		Bucket: &s.bucket,
		CreateBucketConfiguration: &types.CreateBucketConfiguration{
			LocationConstraint: types.BucketLocationConstraint(s.region),
		}})
	if err != nil && isExists(err) {
		err = nil
	}
	return err
}

func (s *s3client) Head(ctx context.Context, key string) (Object, error) {
	param := s3.HeadObjectInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	r, err := s.s3.HeadObject(ctx, &param)
	if err != nil {
		var notFound *types.NotFound
		if errors.As(err, &notFound) {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		key,
		*r.ContentLength,
		*r.LastModified,
		strings.HasSuffix(key, "/"),
		string(r.StorageClass),
	}, nil
}

func (s *s3client) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	params := &s3.GetObjectInput{Bucket: &s.bucket, Key: &key}
	if off > 0 || limit > 0 {
		var r string
		if limit > 0 {
			r = fmt.Sprintf("bytes=%d-%d", off, off+limit-1)
		} else {
			r = fmt.Sprintf("bytes=%d-", off)
		}
		params.Range = &r
	}
	attrs := ApplyGetters(getters...)
	resp, err := s.s3.GetObject(ctx, params)
	if err != nil {
		var re s3.ResponseError
		if errors.As(err, &re) {
			attrs.SetRequestID(re.ServiceRequestID())
		}
		return nil, err
	}
	if reqID, ok := middleware.GetRequestIDMetadata(resp.ResultMetadata); ok {
		attrs.SetRequestID(reqID)
	}
	if off == 0 && limit == -1 && !s.disableChecksum {
		cs := resp.Metadata[strings.ToLower(checksumAlgr)]
		if cs != "" && resp.ContentLength != nil {
			resp.Body = verifyChecksum(resp.Body, cs, *resp.ContentLength)
		}
	}
	attrs.SetStorageClass(string(resp.StorageClass))
	return resp.Body, nil
}

func (s *s3client) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	var body io.ReadSeeker
	if b, ok := in.(io.ReadSeeker); ok {
		body = b
	} else {
		data, err := io.ReadAll(in)
		if err != nil {
			return err
		}
		body = bytes.NewReader(data)
	}
	mimeType := utils.GuessMimeType(key)
	params := &s3.PutObjectInput{
		Bucket:            &s.bucket,
		Key:               &key,
		Body:              body,
		ContentType:       &mimeType,
		StorageClass:      types.StorageClass(s.sc),
		ChecksumAlgorithm: "", // X-Amz-Content-Sha256: UNSIGNED-PAYLOAD
	}
	if !s.disableChecksum {
		checksum := generateChecksum(body)
		params.Metadata = map[string]string{checksumAlgr: checksum}
	}
	attrs := ApplyGetters(getters...)
	attrs.SetStorageClass(s.sc)
	resp, err := s.s3.PutObject(ctx, params)
	if err != nil {
		var re s3.ResponseError
		if errors.As(err, &re) {
			attrs.SetRequestID(re.ServiceRequestID())
		}
		return err
	}
	if reqID, ok := middleware.GetRequestIDMetadata(resp.ResultMetadata); ok {
		attrs.SetRequestID(reqID)
	}
	return err
}

func (s *s3client) Copy(ctx context.Context, dst, src string) error {
	src = s.bucket + "/" + src
	params := &s3.CopyObjectInput{
		Bucket:       &s.bucket,
		Key:          &dst,
		CopySource:   &src,
		StorageClass: types.StorageClass(s.sc),
	}
	_, err := s.s3.CopyObject(ctx, params)
	return err
}

func (s *s3client) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	param := s3.DeleteObjectInput{
		Bucket: &s.bucket,
		Key:    &key,
	}
	resp, err := s.s3.DeleteObject(ctx, &param)
	attrs := ApplyGetters(getters...)
	if err != nil {
		var re s3.ResponseError
		if errors.As(err, &re) {
			attrs.SetRequestID(re.ServiceRequestID())
		}
		if strings.Contains(err.Error(), "NoSuchKey") {
			err = nil
		}
	} else {
		if reqID, ok := middleware.GetRequestIDMetadata(resp.ResultMetadata); ok {
			attrs.SetRequestID(reqID)
		}
	}
	return err
}

func (s *s3client) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 1000 {
		limit = 1000
	}
	param := s3.ListObjectsV2Input{
		Bucket:       &s.bucket,
		Prefix:       &prefix,
		MaxKeys:      aws.Int32(int32(limit)),
		EncodingType: types.EncodingTypeUrl,
		StartAfter:   aws.String(start),
		Delimiter:    aws.String(delimiter),
	}
	if token != "" {
		param.ContinuationToken = aws.String(token)
	}
	resp, err := s.s3.ListObjectsV2(ctx, &param)
	if err != nil {
		return nil, false, "", err
	}
	n := len(resp.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		o := resp.Contents[i]
		oKey, err := decodeKey(*o.Key, aws.String(string(resp.EncodingType)))
		if err != nil {
			return nil, false, "", errors.WithMessagef(err, "failed to decode key %s", *o.Key)
		}
		if !strings.HasPrefix(oKey, prefix) || oKey < start {
			return nil, false, "", fmt.Errorf("found invalid key %s from List, prefix: %s, marker: %s", oKey, prefix, start)
		}
		objs[i] = &obj{
			oKey,
			*o.Size,
			*o.LastModified,
			strings.HasSuffix(oKey, "/"),
			string(o.StorageClass),
		}
	}
	if delimiter != "" {
		for _, p := range resp.CommonPrefixes {
			prefix, err := decodeKey(*p.Prefix, aws.String(string(resp.EncodingType)))
			if err != nil {
				return nil, false, "", errors.WithMessagef(err, "failed to decode commonPrefixes %s", *p.Prefix)
			}
			objs = append(objs, &obj{prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	var isTruncated bool
	if resp.IsTruncated != nil {
		isTruncated = *resp.IsTruncated
	}
	var nextMarker string
	if resp.NextContinuationToken != nil {
		nextMarker = *resp.NextContinuationToken
	}
	return objs, isTruncated, nextMarker, nil
}

func (s *s3client) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (s *s3client) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	params := &s3.CreateMultipartUploadInput{
		Bucket:       &s.bucket,
		Key:          &key,
		StorageClass: types.StorageClass(s.sc),
	}
	resp, err := s.s3.CreateMultipartUpload(ctx, params)
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: *resp.UploadId, MinPartSize: 5 << 20, MaxCount: 10000}, nil
}

func (s *s3client) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	params := &s3.UploadPartInput{
		Bucket:            &s.bucket,
		Key:               &key,
		UploadId:          &uploadID,
		Body:              bytes.NewReader(body),
		PartNumber:        aws.Int32(int32(num)),
		ChecksumAlgorithm: "", // X-Amz-Content-Sha256: UNSIGNED-PAYLOAD
	}
	resp, err := s.s3.UploadPart(ctx, params)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: *resp.ETag}, nil
}

func (s *s3client) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	resp, err := s.s3.UploadPartCopy(ctx, &s3.UploadPartCopyInput{
		Bucket:          aws.String(s.bucket),
		CopySource:      aws.String(s.bucket + "/" + srcKey),
		CopySourceRange: aws.String(fmt.Sprintf("bytes=%d-%d", off, off+size-1)),
		Key:             aws.String(key),
		PartNumber:      aws.Int32(int32(num)),
		UploadId:        aws.String(uploadID),
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: *resp.CopyPartResult.ETag}, nil
}

func (s *s3client) AbortUpload(ctx context.Context, key string, uploadID string) {
	params := &s3.AbortMultipartUploadInput{
		Bucket:   &s.bucket,
		Key:      &key,
		UploadId: &uploadID,
	}
	_, _ = s.s3.AbortMultipartUpload(ctx, params)
}

func (s *s3client) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	var s3Parts []types.CompletedPart
	for i := range parts {
		s3Parts = append(s3Parts, types.CompletedPart{ETag: &parts[i].ETag, PartNumber: aws.Int32(int32(parts[i].Num))})
	}
	params := &s3.CompleteMultipartUploadInput{
		Bucket:          &s.bucket,
		Key:             &key,
		UploadId:        &uploadID,
		MultipartUpload: &types.CompletedMultipartUpload{Parts: s3Parts},
	}
	_, err := s.s3.CompleteMultipartUpload(ctx, params)
	return err
}

func (s *s3client) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	input := &s3.ListMultipartUploadsInput{
		Bucket:    aws.String(s.bucket),
		KeyMarker: aws.String(marker),
	}

	result, err := s.s3.ListMultipartUploads(ctx, input)
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{*u.Key, *u.UploadId, *u.Initiated}
	}
	var nextMarker string
	if result.NextKeyMarker != nil {
		nextMarker = *result.NextKeyMarker
	}
	return parts, nextMarker, nil
}

func (s *s3client) SetStorageClass(sc string) error {
	s.sc = sc
	return nil
}

func autoS3Region(bucketName, accessKey, secretKey string) (string, error) {
	var cfg aws.Config
	var err error
	if accessKey != "" {
		cfg, err = config.LoadDefaultConfig(ctx, config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, "")))
	} else {
		cfg, err = config.LoadDefaultConfig(ctx)
	}
	if err != nil {
		return "", err
	}
	cfg.HTTPClient = httpClient
	var regions []string
	if r := os.Getenv("AWS_DEFAULT_REGION"); r != "" {
		regions = []string{r}
	} else {
		regions = []string{awsDefaultRegion, "cn-north-1"}
	}
	var result *s3.GetBucketLocationOutput
	for _, r := range regions {
		// try to get bucket location
		cfg.Region = r
		client := s3.NewFromConfig(cfg)
		result, err = client.GetBucketLocation(ctx, &s3.GetBucketLocationInput{
			Bucket: aws.String(bucketName),
		})
		if err == nil {
			logger.Debugf("Get location of bucket %q from region %q endpoint success: %s",
				bucketName, r, result.LocationConstraint)
			return string(result.LocationConstraint), nil
		}
		// continue to try other regions if the credentials are invalid, otherwise stop trying.
		var err1 *smithy.GenericAPIError
		if errors.As(err, &err1) {
			if err1.Code != "SignatureDoesNotMatch" && err1.Code != "InvalidAccessKeyId" {
				return "", err
			}
		}
		logger.Debugf("Fail to get location of bucket %q from region %q endpoint: %s", bucketName, r, err)
	}
	return "", err
}

func parseRegion(endpoint string) string {
	if strings.HasPrefix(endpoint, "s3-") || strings.HasPrefix(endpoint, "s3.") {
		endpoint = endpoint[3:]
	}
	if strings.HasPrefix(endpoint, "dualstack") {
		endpoint = endpoint[len("dualstack."):]
	}
	if endpoint == "amazonaws.com" {
		endpoint = awsDefaultRegion + "." + endpoint
	}
	region := strings.Split(endpoint, ".")[0]
	if region == "external-1" {
		region = awsDefaultRegion
	}
	return region
}

func defaultPathStyle() bool {
	v := os.Getenv("JFS_S3_VHOST_STYLE")
	return v == "" || v == "0" || v == "false"
}

var oracleCompileRegexp = `.*\.compat.objectstorage\.(.*)\.oraclecloud\.com`
var OVHCompileRegexp = `^s3\.(\w*)(\.\w*)?\.cloud\.ovh\.net$`

func newS3(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		if len(strings.Split(endpoint, ".")) > 1 && !strings.HasSuffix(endpoint, ".amazonaws.com") {
			endpoint = fmt.Sprintf("http://%s", endpoint)
		} else {
			endpoint = fmt.Sprintf("https://%s", endpoint)
		}
	}
	endpoint = strings.Trim(endpoint, "/")
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err.Error())
	}

	var (
		bucketName string
		region     string
		ep         string
	)

	if uri.Path != "" {
		// [ENDPOINT]/[BUCKET]
		pathParts := strings.Split(uri.Path, "/")
		bucketName = pathParts[1]
		if strings.Contains(uri.Host, ".amazonaws.com") {
			// standard s3
			// s3-[REGION].[REST_OF_ENDPOINT]/[BUCKET]
			// s3.[REGION].amazonaws.com[.cn]/[BUCKET]
			endpoint = uri.Host
			region = parseRegion(endpoint)
		} else {
			// compatible s3
			ep = uri.Host
		}
	} else {
		// [BUCKET].[ENDPOINT]
		hostParts := strings.SplitN(uri.Host, ".", 2)
		if len(hostParts) == 1 {
			// take endpoint as bucketname
			bucketName = hostParts[0]
			if region, err = autoS3Region(bucketName, accessKey, secretKey); err != nil {
				return nil, fmt.Errorf("Can't guess your region for bucket %s: %s", bucketName, err)
			}
		} else {
			// get region or endpoint
			if strings.Contains(uri.Host, ".amazonaws.com") {
				vpcCompile := regexp.MustCompile(`^.*\.(.*)\.vpce\.amazonaws\.com`)
				// vpc link
				if vpcCompile.MatchString(uri.Host) {
					bucketName = hostParts[0]
					ep = hostParts[1]
					if submatch := vpcCompile.FindStringSubmatch(uri.Host); len(submatch) == 2 {
						region = submatch[1]
					}
				} else {
					// standard s3
					// [BUCKET].s3-[REGION].[REST_OF_ENDPOINT]
					// [BUCKET].s3.[REGION].amazonaws.com[.cn]
					hostParts = strings.SplitN(uri.Host, ".s3", 2)
					bucketName = hostParts[0]
					endpoint = "s3" + hostParts[1]
					region = parseRegion(endpoint)
				}
			} else {
				// compatible s3
				bucketName = hostParts[0]
				ep = hostParts[1]

				for _, compileRegexp := range []string{oracleCompileRegexp, OVHCompileRegexp} {
					compile := regexp.MustCompile(compileRegexp)
					if compile.MatchString(ep) {
						if submatch := compile.FindStringSubmatch(ep); len(submatch) >= 2 {
							region = submatch[1]
							break
						}
					}
				}
			}
		}
	}
	if region == "" {
		region = os.Getenv("AWS_REGION")
	}
	if region == "" {
		region = os.Getenv("AWS_DEFAULT_REGION")
	}
	if region == "" {
		region = awsDefaultRegion
	}
	var optFns []func(*s3.Options)
	ssl := strings.ToLower(uri.Scheme) == "https"
	optFns = append(optFns, func(options *s3.Options) {
		options.EndpointOptions.DisableHTTPS = !ssl
		options.Region = region
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})

	disable100Continue := strings.EqualFold(uri.Query().Get("disable-100-continue"), "true")
	if disable100Continue {
		logger.Infof("HTTP header 100-Continue is disabled")
		optFns = append(optFns, func(options *s3.Options) {
			options.ContinueHeaderThresholdBytes = -1
		})
	}
	disableChecksum := strings.EqualFold(uri.Query().Get("disable-checksum"), "true")
	if disableChecksum {
		logger.Infof("default CRC checksum is disabled")
	}

	if ep != "" {
		optFns = append(optFns, func(options *s3.Options) {
			options.BaseEndpoint = aws.String(uri.Scheme + "://" + ep)
			options.UsePathStyle = defaultPathStyle()
		})
	}
	var cfg aws.Config
	if accessKey == "anonymous" {
		cfg, err = config.LoadDefaultConfig(ctx,
			config.WithCredentialsProvider(aws.AnonymousCredentials{}))
	} else if accessKey != "" {
		cfg, err = config.LoadDefaultConfig(ctx,
			config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	} else {
		cfg, err = config.LoadDefaultConfig(ctx)
	}
	if err != nil {
		return nil, err
	}

	cfg.HTTPClient = httpClient
	client := s3.NewFromConfig(cfg, optFns...)
	return &s3client{bucket: bucketName, s3: client, disableChecksum: disableChecksum, region: region}, nil
}

func init() {
	Register("s3", newS3)
}


================================================
FILE: pkg/object/s3_test.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"testing"

	"github.com/stretchr/testify/assert"
)

func Test_s3client_full_string(t *testing.T) {
	tests := []struct {
		endpoint string
		want     string
	}{
		{endpoint: "s3.compatible.site/bucket", want: "s3://s3.compatible.site/bucket/"},
		{endpoint: "http://s3.compatible.site/bucket", want: "s3://s3.compatible.site/bucket/"},
		{endpoint: "s3://s3.compatible.site/bucket", want: "s3://s3.compatible.site/bucket/"},
		{endpoint: "https://mybucket.s3.us-east-2.amazonaws.com", want: "s3://mybucket/"},
	}
	for _, tt := range tests {
		t.Run(tt.endpoint, func(t *testing.T) {
			stor, err := newS3(tt.endpoint, "", "", "")
			if err != nil {
				t.Fatalf("newS3() error = %v", err)
			}
			assert.Equalf(t, tt.want, stor.String(), "Display full address of s3 compatible object storage")
		})
	}
}


================================================
FILE: pkg/object/scw.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"fmt"
	"net/url"
	"os"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
)

type scw struct {
	s3client
}

func (s *scw) String() string {
	return fmt.Sprintf("scw://%s/", s.s3client.bucket)
}

func (s *scw) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              5 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             1000,
	}
}

func newScw(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	ssl := strings.ToLower(uri.Scheme) == "https"
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	region := hostParts[2]
	endpoint = uri.Scheme + "://" + uri.Host[len(bucket)+1:]
	if accessKey == "" {
		accessKey = os.Getenv("SCW_ACCESS_KEY")
	}
	if secretKey == "" {
		secretKey = os.Getenv("SCW_SECRET_KEY")
	}

	awsCfg, err := config.LoadDefaultConfig(ctx,
		config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(awsCfg, func(options *s3.Options) {
		options.Region = region
		options.BaseEndpoint = aws.String(endpoint)
		options.EndpointOptions.DisableHTTPS = !ssl
		options.UsePathStyle = false
		options.HTTPClient = httpClient
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})
	return &scw{s3client{bucket: bucket, s3: client, region: region}}, nil
}

func init() {
	Register("scw", newScw)
}


================================================
FILE: pkg/object/sftp.go
================================================
//go:build !nosftp
// +build !nosftp

// Part of this file is borrowed from Rclone under MIT license:
// https://github.com/ncw/rclone/blob/master/backend/sftp/sftp.go

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net"
	"net/url"
	"os"
	"os/user"
	"path"
	"path/filepath"
	"runtime"
	"sort"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/pkg/errors"
	"github.com/pkg/sftp"
	"golang.org/x/crypto/ssh"
	"golang.org/x/crypto/ssh/agent"
	"golang.org/x/crypto/ssh/knownhosts"
	"golang.org/x/term"
)

// conn encapsulates an ssh client and corresponding sftp client
type conn struct {
	sshClient  *ssh.Client
	sftpClient *sftp.Client
	err        chan error
}

// Wait for connection to close
func (c *conn) wait() {
	c.err <- c.sshClient.Conn.Wait()
}

// Closes the connection
func (c *conn) close() error {
	sftpErr := c.sftpClient.Close()
	sshErr := c.sshClient.Close()
	if sftpErr != nil {
		return sftpErr
	}
	return sshErr
}

// Returns an error if closed
func (c *conn) closed() error {
	select {
	case err := <-c.err:
		return err
	default:
	}
	return nil
}

type sftpStore struct {
	DefaultObjectStorage
	host   string
	port   string
	root   string
	config *ssh.ClientConfig
	poolMu sync.Mutex
	pool   []*conn
}

// Open a new connection to the SFTP server.
func (f *sftpStore) sftpConnection() (c *conn, err error) {
	c = &conn{
		err: make(chan error, 1),
	}
	conn, err := net.Dial("tcp", net.JoinHostPort(f.host, f.port))
	if err != nil {
		return nil, err
	}
	sshc, chans, reqs, err := ssh.NewClientConn(conn, net.JoinHostPort(f.host, f.port), f.config)
	if err != nil {
		return nil, err
	}
	c.sshClient = ssh.NewClient(sshc, chans, reqs)
	c.sftpClient, err = sftp.NewClient(c.sshClient)
	if err != nil {
		_ = c.sshClient.Close()
		return nil, errors.Wrap(err, "couldn't initialise SFTP")
	}
	go c.wait()
	return c, nil
}

// Get an SFTP connection from the pool, or open a new one
func (f *sftpStore) getSftpConnection() (c *conn, err error) {
	f.poolMu.Lock()
	for len(f.pool) > 0 {
		c = f.pool[0]
		f.pool = f.pool[1:]
		err := c.closed()
		if err == nil {
			break
		}
		c = nil
	}
	f.poolMu.Unlock()
	if c != nil {
		return c, nil
	}
	return f.sftpConnection()
}

// Return an SFTP connection to the pool
//
// It nils the pointed to connection out so it can't be reused
//
// if err is not nil then it checks the connection is alive using a
// Getwd request
func (f *sftpStore) putSftpConnection(c *conn, err error) {
	if err != nil {
		// work out if this is an expected error
		underlyingErr := errors.Cause(err)
		isRegularError := false
		switch underlyingErr {
		case os.ErrNotExist:
			isRegularError = true
		default:
			switch underlyingErr.(type) {
			case *sftp.StatusError, *os.PathError:
				isRegularError = true
			}
		}
		// If not a regular SFTP error code then check the connection
		if !isRegularError {
			_, nopErr := c.sftpClient.Getwd()
			if nopErr != nil {
				_ = c.close()
				return
			}
		}
	}
	f.poolMu.Lock()
	f.pool = append(f.pool, c)
	f.poolMu.Unlock()
}

func (f *sftpStore) String() string {
	return fmt.Sprintf("sftp://%s@%s:%s", f.config.User, f.host, f.root)
}

// always preserve suffix `/` for directory key
func (f *sftpStore) path(key string) string {
	return f.root + key
}

func (f *sftpStore) Head(ctx context.Context, key string) (Object, error) {
	c, err := f.getSftpConnection()
	if err != nil {
		return nil, err
	}
	defer func() { f.putSftpConnection(c, err) }()

	info, err := c.sftpClient.Lstat(f.path(key))
	if err != nil {
		return nil, err
	}
	var isSymlink bool
	if info.Mode()&os.ModeSymlink != 0 {
		isSymlink = true
		info, err = c.sftpClient.Stat(f.path(key))
		if err != nil {
			return nil, err
		}
	}
	return f.fileInfo(key, info, isSymlink), nil
}

func (f *sftpStore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	c, err := f.getSftpConnection()
	if err != nil {
		return nil, err
	}
	defer func() { f.putSftpConnection(c, err) }()

	p := f.path(key)
	ff, err := c.sftpClient.Open(p)
	if err != nil {
		return nil, err
	}
	finfo, err := ff.Stat()
	if err != nil {
		return nil, err
	}
	if finfo.IsDir() || off >= finfo.Size() {
		_ = ff.Close()
		return io.NopCloser(bytes.NewBuffer([]byte{})), nil
	}

	if limit > 0 {
		return &SectionReaderCloser{
			SectionReader: io.NewSectionReader(ff, off, limit),
			Closer:        ff,
		}, nil
	}
	return ff, err
}

func (f *sftpStore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) (err error) {
	c, err := f.getSftpConnection()
	if err != nil {
		return err
	}
	defer func() { f.putSftpConnection(c, err) }()

	p := f.path(key)
	if strings.HasSuffix(p, dirSuffix) {
		return c.sftpClient.MkdirAll(p)
	}
	if err := c.sftpClient.MkdirAll(filepath.Dir(p)); err != nil {
		return err
	}

	var tmp string
	if PutInplace {
		tmp = p
	} else {
		name := path.Base(p)
		if len(name) > 200 {
			name = name[:200]
		}
		tmp = TmpFilePath(p, name)
		defer func() {
			if err != nil {
				_ = c.sftpClient.Remove(tmp)
			}
		}()
	}
	ff, err := c.sftpClient.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
	if err != nil {
		return err
	}
	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	_, err = io.CopyBuffer(ff, in, *buf)
	if err != nil {
		_ = ff.Close()
		return err
	}
	err = ff.Close()
	if err != nil {
		return err
	}
	if !PutInplace {
		_ = c.sftpClient.Remove(p)
		return c.sftpClient.Rename(tmp, p)
	}
	return nil
}

func (f *sftpStore) Chtimes(key string, mtime time.Time) (err error) {
	var c *conn
	c, err = f.getSftpConnection()
	if err != nil {
		return err
	}
	defer func() { f.putSftpConnection(c, err) }()
	// fixme: 1. The Chtimes of sftp always follows link 2. Only pass the mtime field to avoid updating atime
	// ref: https://datatracker.ietf.org/doc/html/draft-ietf-secsh-filexfer-13#section-8.6
	return c.sftpClient.Chtimes(f.path(key), mtime, mtime)
}

func (f *sftpStore) Chmod(key string, mode os.FileMode) (err error) {
	var c *conn
	c, err = f.getSftpConnection()
	if err != nil {
		return err
	}
	defer func() { f.putSftpConnection(c, err) }()
	return c.sftpClient.Chmod(f.path(key), mode)
}

func (f *sftpStore) Chown(key string, owner, group string) (err error) {
	var c *conn
	c, err = f.getSftpConnection()
	if err != nil {
		return err
	}
	defer func() { f.putSftpConnection(c, err) }()
	uid := utils.LookupUser(owner)
	gid := utils.LookupGroup(group)
	if uid == -1 || gid == -1 {
		return fmt.Errorf("user(%s):group(%s) not found", owner, group)
	}
	return c.sftpClient.Chown(f.path(key), uid, gid)
}

func (f *sftpStore) Symlink(oldName, newName string) error {
	c, err := f.getSftpConnection()
	if err != nil {
		return err
	}
	defer func() { f.putSftpConnection(c, err) }()
	p := f.path(newName)
	err = c.sftpClient.Symlink(oldName, p)
	if err != nil && os.IsNotExist(err) {
		_ = c.sftpClient.MkdirAll(filepath.Dir(p))
		err = c.sftpClient.Symlink(oldName, p)
	}
	return err
}

func (f *sftpStore) Readlink(name string) (link string, err error) {
	c, err := f.getSftpConnection()
	if err != nil {
		return "", err
	}
	defer func() { f.putSftpConnection(c, err) }()
	return c.sftpClient.ReadLink(f.path(name))
}

func (f *sftpStore) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	c, err := f.getSftpConnection()
	if err != nil {
		return err
	}
	defer func() { f.putSftpConnection(c, err) }()
	err = c.sftpClient.Remove(strings.TrimRight(f.path(key), dirSuffix))
	if err != nil && os.IsNotExist(err) {
		err = nil
	}
	return err
}

func (f *sftpStore) sortByName(c *sftp.Client, path string, fis []os.FileInfo, followLink bool) []*mEntry {
	mEntries := make([]*mEntry, len(fis))
	for i, e := range fis {
		isSymlink := e.Mode()&os.ModeSymlink != 0
		if e.IsDir() {
			mEntries[i] = &mEntry{e, e.Name() + dirSuffix, nil, false}
		} else if isSymlink && followLink {
			var fi os.FileInfo
			p := path + e.Name()
			fi, err := c.Stat(p)
			if err != nil {
				mEntries[i] = &mEntry{e, e.Name(), nil, true}
				continue
			}
			name := e.Name()
			if fi.IsDir() {
				name = e.Name() + dirSuffix
			}
			mEntries[i] = &mEntry{e, name, fi, false}
		} else {
			mEntries[i] = &mEntry{e, e.Name(), nil, isSymlink}
		}
	}
	sort.Slice(mEntries, func(i, j int) bool { return mEntries[i].Name() < mEntries[j].Name() })
	return mEntries
}

func (f *sftpStore) fileInfo(key string, fi os.FileInfo, isSymlink bool) Object {
	owner, group := getOwnerGroup(fi)
	ff := &file{
		obj{key, fi.Size(), fi.ModTime(), fi.IsDir(), ""},
		owner,
		group,
		fi.Mode(),
		isSymlink,
	}
	if fi.IsDir() {
		if key != "" && !strings.HasSuffix(key, "/") {
			ff.key += "/"
		}
		ff.size = 0
	}
	return ff
}

func (f *sftpStore) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}

	c, err := f.getSftpConnection()
	if err != nil {
		return nil, false, "", err
	}
	defer func() { f.putSftpConnection(c, err) }()

	var objs []Object
	dir := f.path(prefix)
	if !strings.HasSuffix(dir, "/") {
		dir = filepath.Dir(dir)
		if !strings.HasSuffix(dir, dirSuffix) {
			dir += dirSuffix
		}
	} else if marker == "" {
		obj, err := f.Head(ctx, prefix)
		if err != nil {
			if os.IsNotExist(err) {
				return nil, false, "", nil
			}
			return nil, false, "", err
		}
		objs = append(objs, obj)
	}
	infos, err := c.sftpClient.ReadDir(dir)
	if err != nil {
		if os.IsPermission(err) {
			logger.Warnf("skip %s: %s", dir, err)
			return nil, false, "", nil
		}
		if os.IsNotExist(err) {
			return nil, false, "", nil
		}
		return nil, false, "", err
	}

	entries := f.sortByName(c.sftpClient, dir, infos, followLink)
	for _, e := range entries {
		p := path.Join(dir, e.Name())
		if e.IsDir() {
			p = p + "/"
		}
		if !strings.HasPrefix(p, f.root) {
			continue
		}
		key := p[len(f.root):]
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}
		info := e.Info()
		f := toFile(key, info, e.isSymlink, getOwnerGroup)
		objs = append(objs, f)
		if len(objs) == int(limit) {
			break
		}
	}
	return generateListResult(objs, limit)
}

func sshInteractive(user, instruction string, questions []string, echos []bool) (answers []string, err error) {
	if len(questions) == 0 {
		fmt.Print(user, instruction)
	} else {
		answers = make([]string, len(questions))
		for i, q := range questions {
			fmt.Print(q)
			var ans []byte
			if echos[i] {
				_, err = fmt.Scanln(&answers[i])
			} else {
				ans, err = term.ReadPassword(int(syscall.Stdin))
				answers[i] = string(ans)
			}
			if err != nil {
				return nil, fmt.Errorf("read password: %s", err)
			}
		}
	}
	return answers, nil
}

func unescape(original string) string {
	if escaped, err := url.QueryUnescape(original); err != nil {
		logger.Warnf("unescape(%s) error: %s", original, err)
		return original
	} else {
		return escaped
	}
}

func newSftp(endpoint, username, pass, token string) (ObjectStorage, error) {
	idx := strings.LastIndex(endpoint, ":")
	host, port, err := net.SplitHostPort(endpoint[:idx])
	if err != nil && strings.Contains(err.Error(), "missing port") {
		host, port, err = net.SplitHostPort(endpoint[:idx] + ":22")
	}
	if err != nil {
		return nil, fmt.Errorf("unable to parse host from endpoint (%s): %q", endpoint, err)
	}
	root := filepath.Clean(endpoint[idx+1:])
	if runtime.GOOS == "windows" {
		root = strings.Replace(root, "\\", "/", -1)
	}
	// append suffix `/` removed by filepath.Clean()
	if strings.HasSuffix(endpoint[idx+1:], dirSuffix) {
		root = root + dirSuffix
	}

	if username == "" {
		u, _ := user.Current()
		if u != nil {
			username = u.Username
		}
	}
	username = unescape(username)
	var auth []ssh.AuthMethod
	if pass != "" {
		auth = append(auth, ssh.Password(unescape(pass)))
	}

	var signers []ssh.Signer
	if privateKeyPath := os.Getenv("SSH_PRIVATE_KEY_PATH"); privateKeyPath != "" {
		key, err := os.ReadFile(privateKeyPath)
		if err != nil {
			return nil, fmt.Errorf("unable to read private key, error: %v", err)
		}
		signer, err := ssh.ParsePrivateKey(key)
		if err != nil {
			return nil, fmt.Errorf("unable to parse private key, error: %v", err)
		}
		signers = append(signers, signer)
	} else {
		home := filepath.Join(os.Getenv("HOME"), ".ssh")
		var algo = []string{"rsa", "dsa", "ecdsa", "ecdsa_sk", "ed25519", "xmss"}
		for _, a := range algo {
			key, err := os.ReadFile(filepath.Join(home, "id_"+a))
			if err != nil {
				key, err = os.ReadFile(filepath.Join(home, "id_"+a+"-cert"))
			}
			if err == nil {
				signer, err := ssh.ParsePrivateKey(key)
				if err == nil {
					signers = append(signers, signer)
				} else {
					logger.Debugf("load private key %s: %s", filepath.Join(home, "id_"+a), err)
				}
			}
		}
	}
	socket := os.Getenv("SSH_AUTH_SOCK")
	if socket != "" {
		conn, err := net.Dial("unix", socket)
		if err != nil {
			logger.Errorf("Failed to open SSH_AUTH_SOCK: %v", err)
		} else {
			agent := agent.NewClient(conn)
			signer, err := agent.Signers()
			if err != nil {
				logger.Warnf("load signer from agent: %s", err)
			} else {
				signers = append(signers, signer...)
			}
		}
	}
	if len(signers) > 0 {
		auth = append(auth, ssh.PublicKeys(signers...))
	}

	if pass == "" {
		auth = append(auth, ssh.KeyboardInteractive(sshInteractive))
	}
	var hostKeyCallback ssh.HostKeyCallback
	if kn := os.Getenv("SSH_KNOWN_HOSTS"); kn != "" {
		var err error
		hostKeyCallback, err = knownhosts.New(kn)
		if err != nil {
			return nil, err
		}
	} else {
		hostKeyCallback = ssh.InsecureIgnoreHostKey()
	}

	config := &ssh.ClientConfig{
		User:            username,
		HostKeyCallback: hostKeyCallback,
		Timeout:         time.Second * 3,
		Auth:            auth,
	}
	f := &sftpStore{
		host:   host,
		port:   port,
		root:   root,
		config: config,
	}

	c, err := f.getSftpConnection()
	if err != nil && strings.Contains(err.Error(), "unable to authenticate") &&
		pass == "" && os.Getenv("SSH_PRIVATE_KEY_PATH") == "" {
		fmt.Printf("%s@%s's password: ", username, host)
		var password []byte
		password, err = term.ReadPassword(int(syscall.Stdin))
		if err != nil {
			return nil, fmt.Errorf("Read password: %s", err.Error())
		}
		f.config.Auth = append(f.config.Auth, ssh.Password(string(password)))
		c, err = f.getSftpConnection()
	}
	if err != nil {
		logger.Errorf("connect to %s failed: %s", host, err)
		return nil, err
	}
	defer func() { f.putSftpConnection(c, err) }()

	return f, nil
}

func init() {
	Register("sftp", newSftp)
}


================================================
FILE: pkg/object/sharding.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"container/heap"
	"context"
	"errors"
	"fmt"
	"hash/fnv"
	"io"
	"strings"
	"time"
)

type sharded struct {
	DefaultObjectStorage
	stores []ObjectStorage
}

func (s *sharded) String() string {
	return fmt.Sprintf("shard%d://%s", len(s.stores), s.stores[0])
}

func (s *sharded) Limits() Limits {
	l := s.stores[0].Limits()
	l.IsSupportUploadPartCopy = false
	return l
}

func (s *sharded) Create(ctx context.Context) error {
	for _, o := range s.stores {
		if err := o.Create(ctx); err != nil {
			return err
		}
	}
	return nil
}

func (s *sharded) pick(key string) ObjectStorage {
	h := fnv.New32a()
	_, _ = h.Write([]byte(key))
	i := h.Sum32() % uint32(len(s.stores))
	return s.stores[i]
}

func (s *sharded) Head(ctx context.Context, key string) (Object, error) {
	return s.pick(key).Head(ctx, key)
}

func (s *sharded) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	return s.pick(key).Get(ctx, key, off, limit, getters...)
}

func (s *sharded) Put(ctx context.Context, key string, body io.Reader, getters ...AttrGetter) error {
	return s.pick(key).Put(ctx, key, body, getters...)
}

func (s *sharded) Copy(ctx context.Context, dst, src string) error {
	return notSupported
}

func (s *sharded) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	return s.pick(key).Delete(ctx, key, getters...)
}

func (s *sharded) SetStorageClass(sc string) error {
	var err = notSupported
	for _, o := range s.stores {
		if os, ok := o.(SupportStorageClass); ok {
			err = os.SetStorageClass(sc)
		}
	}
	return err
}

const maxResults = 10000

// ListAll lists all keys that starts at marker from object storage.
func ListAll(ctx context.Context, store ObjectStorage, prefix, marker string, followLink, sort bool) (<-chan Object, error) {
	if ch, err := store.ListAll(ctx, prefix, marker, followLink); err == nil {
		return ch, nil
	} else if !errors.Is(err, notSupported) {
		return nil, err
	}

	startTime := time.Now()
	out := make(chan Object, maxResults)
	logger.Debugf("Listing objects from %s marker %q", store, marker)
	objs, hasMore, nextToken, err := store.List(ctx, prefix, marker, "", "", maxResults, followLink)
	if errors.Is(err, notSupported) {
		return ListAllWithDelimiter(ctx, store, prefix, marker, "", followLink)
	}
	if err != nil {
		logger.Errorf("Can't list %s: %s", store, err.Error())
		return nil, err
	}
	logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime))
	go func() {
		defer close(out)
		lastkey := ""
		first := true
		for {
			for _, obj := range objs {
				key := obj.Key()
				if sort && !first && key <= lastkey {
					logger.Errorf("The keys are out of order: marker %q, last %q current %q", marker, lastkey, key)
					out <- nil
					return
				}
				lastkey = key
				out <- obj
				first = false
			}
			if !hasMore {
				break
			}

			marker = lastkey
			startTime = time.Now()
			logger.Debugf("Continue listing objects from %s marker %q", store, marker)
			var nextToken2 string
			objs, hasMore, nextToken2, err = store.List(ctx, prefix, marker, nextToken, "", maxResults, followLink)
			for err != nil {
				logger.Warnf("Fail to list: %s, retry again", err.Error())
				// slow down
				time.Sleep(time.Millisecond * 100)
				objs, hasMore, nextToken, err = store.List(ctx, prefix, marker, nextToken, "", maxResults, followLink)
			}
			nextToken = nextToken2
			logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime))
		}
	}()
	return out, nil
}

type nextKey struct {
	o  Object
	ch <-chan Object
}

type nextObjects struct {
	os []nextKey
}

func (s *nextObjects) Len() int           { return len(s.os) }
func (s *nextObjects) Less(i, j int) bool { return s.os[i].o.Key() < s.os[j].o.Key() }
func (s *nextObjects) Swap(i, j int)      { s.os[i], s.os[j] = s.os[j], s.os[i] }
func (s *nextObjects) Push(o interface{}) { s.os = append(s.os, o.(nextKey)) }
func (s *nextObjects) Pop() interface{} {
	o := s.os[len(s.os)-1]
	s.os = s.os[:len(s.os)-1]
	return o
}

func (s *sharded) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	heads := &nextObjects{make([]nextKey, 0)}
	for i := range s.stores {
		ch, err := ListAll(ctx, s.stores[i], prefix, marker, followLink, true)
		if err != nil {
			return nil, fmt.Errorf("list %s: %s", s.stores[i], err)
		}
		first := <-ch
		if first != nil {
			heads.Push(nextKey{first, ch})
		}
	}
	heap.Init(heads)

	out := make(chan Object, 1000)
	go func() {
		for heads.Len() > 0 {
			n := heap.Pop(heads).(nextKey)
			out <- n.o
			o := <-n.ch
			if o != nil {
				heap.Push(heads, nextKey{o, n.ch})
			}
		}
		close(out)
	}()
	return out, nil
}

func (s *sharded) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	return s.pick(key).CreateMultipartUpload(ctx, key)
}

func (s *sharded) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	return s.pick(key).UploadPart(ctx, key, uploadID, num, body)
}

func (s *sharded) AbortUpload(ctx context.Context, key string, uploadID string) {
	s.pick(key).AbortUpload(ctx, key, uploadID)
}

func (s *sharded) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	return s.pick(key).CompleteUpload(ctx, key, uploadID, parts)
}

func NewSharded(name, endpoint, ak, sk, token string, shards int) (ObjectStorage, error) {
	stores := make([]ObjectStorage, shards)
	var err error
	for i := range stores {
		ep := fmt.Sprintf(endpoint, i)
		if strings.HasSuffix(ep, "%!(EXTRA int=0)") {
			return nil, fmt.Errorf("can not generate different endpoint using %s", endpoint)
		}
		stores[i], err = CreateStorage(name, ep, ak, sk, token)
		if err != nil {
			return nil, err
		}
	}
	return &sharded{stores: stores}, nil
}


================================================
FILE: pkg/object/space.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"fmt"
	"net/url"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
)

type space struct {
	s3client
}

func (s *space) String() string {
	return fmt.Sprintf("space://%s/", s.s3client.bucket)
}

func (s *space) Limits() Limits {
	return s.s3client.Limits()
}

func (s *space) SetStorageClass(sc string) error {
	return notSupported
}

func newSpace(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, _ := url.ParseRequestURI(endpoint)
	ssl := strings.ToLower(uri.Scheme) == "https"
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	region := hostParts[1]
	endpoint = uri.Scheme + "://" + uri.Host[len(bucket)+1:]

	awsCfg, err := config.LoadDefaultConfig(ctx,
		config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(awsCfg, func(options *s3.Options) {
		options.Region = region
		options.BaseEndpoint = aws.String(endpoint)
		options.EndpointOptions.DisableHTTPS = !ssl
		options.UsePathStyle = false
		options.HTTPClient = httpClient
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})
	return &space{s3client{bucket: bucket, s3: client, region: region}}, nil
}

func init() {
	Register("space", newSpace)
}


================================================
FILE: pkg/object/sql.go
================================================
//go:build !nosqlite || !nomysql || !nopg
// +build !nosqlite !nomysql !nopg

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"database/sql"
	"errors"
	"fmt"
	"io"
	"net/url"
	"os"
	"strings"
	"time"

	"github.com/sirupsen/logrus"
	"xorm.io/xorm"
	"xorm.io/xorm/log"
	"xorm.io/xorm/names"
)

type sqlStore struct {
	DefaultObjectStorage
	db   *xorm.Engine
	addr string
}

type blob struct {
	Id       int64     `xorm:"pk bigserial"`
	Key      []byte    `xorm:"notnull unique(blob) varbinary(255) "`
	Size     int64     `xorm:"notnull"`
	Modified time.Time `xorm:"notnull updated"`
	Data     []byte    `xorm:"mediumblob"`
}

func (s *sqlStore) String() string {
	driver := s.db.DriverName()
	if driver == "pgx" {
		driver = "postgres"
	}
	return fmt.Sprintf("%s://%s/", driver, s.addr)
}

func (s *sqlStore) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	var b = blob{Key: []byte(key)}
	// TODO: range
	ok, err := s.db.Get(&b)
	if err != nil {
		return nil, err
	}
	if !ok {
		return nil, os.ErrNotExist
	}
	if off > int64(len(b.Data)) {
		off = int64(len(b.Data))
	}
	data := b.Data[off:]
	if limit > 0 && limit < int64(len(data)) {
		data = data[:limit]
	}
	return io.NopCloser(bytes.NewBuffer(data)), nil
}

func (s *sqlStore) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	d, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	var n int64
	now := time.Now()
	b := blob{Key: []byte(key), Data: d, Size: int64(len(d)), Modified: now}
	if name := s.db.DriverName(); name == "postgres" || name == "pgx" {
		var r sql.Result
		r, err = s.db.Exec("INSERT INTO jfs_blob(key, size,modified, data) VALUES(?, ?, ?,? ) "+
			"ON CONFLICT (key) DO UPDATE SET size=?,data=?", []byte(key), b.Size, now, d, b.Size, d)
		if err == nil {
			n, err = r.RowsAffected()
		}
	} else {
		n, err = s.db.Insert(&b)
		if err != nil || n == 0 {
			n, err = s.db.Update(&b, &blob{Key: []byte(key)})
		}
	}
	if err == nil && n == 0 {
		err = errors.New("not inserted or updated")
	}
	return err
}

func (s *sqlStore) Head(ctx context.Context, key string) (Object, error) {
	var b = blob{Key: []byte(key)}
	ok, err := s.db.Cols("key", "modified", "size").Get(&b)
	if err != nil {
		return nil, err
	}
	if !ok {
		return nil, os.ErrNotExist
	}
	return &obj{
		key,
		b.Size,
		b.Modified,
		strings.HasSuffix(key, "/"),
		"",
	}, nil
}

func (s *sqlStore) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	_, err := s.db.Delete(&blob{Key: []byte(key)})
	return err
}

func (s *sqlStore) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if marker == "" {
		marker = prefix
	}
	// todo
	if delimiter != "" {
		return nil, false, "", notSupported
	}
	var bs []blob
	err := s.db.Where("`key` > ?", []byte(marker)).Limit(int(limit)).Cols("`key`", "size", "modified").OrderBy("`key`").Find(&bs)
	if err != nil {
		return nil, false, "", err
	}
	var objs []Object
	for _, b := range bs {
		if strings.HasPrefix(string(b.Key), prefix) {
			objs = append(objs, &obj{
				key:   string(b.Key),
				size:  b.Size,
				mtime: b.Modified,
				isDir: strings.HasSuffix(string(b.Key), "/"),
			})
		} else {
			break
		}
	}
	return generateListResult(objs, limit)
}

func newSQLStore(driver, addr, user, password string) (ObjectStorage, error) {
	var err error
	uri := addr
	if user != "" {
		uri = user + ":" + password + "@" + addr
	}
	var searchPath string
	if driver == "postgres" {
		uri = "postgres://" + uri
		driver = "pgx"

		parse, err := url.Parse(uri)
		if err != nil {
			return nil, fmt.Errorf("parse url %s failed: %s", uri, err)
		}
		searchPath = parse.Query().Get("search_path")
		if searchPath != "" {
			if len(strings.Split(searchPath, ",")) > 1 {
				return nil, fmt.Errorf("currently, only one schema is supported in search_path")
			}
		}
	}
	engine, err := xorm.NewEngine(driver, uri)
	if err != nil {
		return nil, fmt.Errorf("open %s: %s", uri, err)
	}
	switch logger.Level { // make xorm less verbose
	case logrus.TraceLevel:
		engine.SetLogLevel(log.LOG_DEBUG)
	case logrus.DebugLevel:
		engine.SetLogLevel(log.LOG_INFO)
	case logrus.InfoLevel, logrus.WarnLevel:
		engine.SetLogLevel(log.LOG_WARNING)
	case logrus.ErrorLevel:
		engine.SetLogLevel(log.LOG_ERR)
	default:
		engine.SetLogLevel(log.LOG_OFF)
	}
	if searchPath != "" {
		engine.SetSchema(searchPath)
	}
	engine.SetTableMapper(names.NewPrefixMapper(engine.GetTableMapper(), "jfs_"))
	if err := engine.Sync2(new(blob)); err != nil {
		return nil, fmt.Errorf("create table blob: %s", err)
	}
	return &sqlStore{DefaultObjectStorage{}, engine, addr}, nil
}

func removeScheme(addr string) string {
	p := strings.Index(addr, "://")
	if p > 0 {
		addr = addr[p+3:]
	}
	return addr
}


================================================
FILE: pkg/object/sql_mysql.go
================================================
//go:build !nomysql
// +build !nomysql

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	_ "github.com/go-sql-driver/mysql"
)

func init() {
	Register("mysql", func(addr, user, pass, token string) (ObjectStorage, error) {
		return newSQLStore("mysql", removeScheme(addr), user, pass)
	})
}


================================================
FILE: pkg/object/sql_pg.go
================================================
//go:build !nopg
// +build !nopg

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	_ "github.com/jackc/pgx/v5/stdlib"
)

func init() {
	Register("postgres", func(addr, user, pass, token string) (ObjectStorage, error) {
		return newSQLStore("postgres", removeScheme(addr), user, pass)
	})
}


================================================
FILE: pkg/object/sql_sqlite.go
================================================
//go:build !nosqlite
// +build !nosqlite

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	_ "github.com/mattn/go-sqlite3"
)

func init() {
	Register("sqlite3", func(addr, user, pass, token string) (ObjectStorage, error) {
		return newSQLStore("sqlite3", removeScheme(addr), user, pass)
	})
}


================================================
FILE: pkg/object/swift.go
================================================
//go:build !noswift
// +build !noswift

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/ncw/swift/v2"
)

type swiftOSS struct {
	DefaultObjectStorage
	conn       *swift.Connection
	region     string
	storageUrl string
	container  string
}

func (s *swiftOSS) String() string {
	return fmt.Sprintf("swift://%s/", s.container)
}

func (s *swiftOSS) Create(ctx context.Context) error {
	// No error is returned if it already exists but the metadata if any will be updated.
	return s.conn.ContainerCreate(ctx, s.container, nil)
}

func (s *swiftOSS) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	headers := make(map[string]string)
	if off > 0 || limit > 0 {
		if limit > 0 {
			headers["Range"] = fmt.Sprintf("bytes=%d-%d", off, off+limit-1)
		} else {
			headers["Range"] = fmt.Sprintf("bytes=%d-", off)
		}
	}
	f, _, err := s.conn.ObjectOpen(ctx, s.container, key, true, headers)
	return f, err
}

func (s *swiftOSS) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	mimeType := utils.GuessMimeType(key)
	_, err := s.conn.ObjectPut(ctx, s.container, key, in, true, "", mimeType, nil)
	return err
}

func (s *swiftOSS) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	err := s.conn.ObjectDelete(ctx, s.container, key)
	if err != nil && errors.Is(err, swift.ObjectNotFound) {
		err = nil
	}
	return err
}

func (s *swiftOSS) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if limit > 10000 {
		limit = 10000
	}
	var delimiter_ rune
	if delimiter != "" {
		if len([]rune(delimiter)) == 1 {
			delimiter_ = []rune(delimiter)[0]
		} else {
			return nil, false, "", fmt.Errorf("delimiter should be a rune but now is %s", delimiter)
		}
	}
	objects, err := s.conn.Objects(ctx, s.container, &swift.ObjectsOpts{Prefix: prefix, Marker: marker, Delimiter: delimiter_, Limit: int(limit)})
	if err != nil {
		return nil, false, "", err
	}
	var objs = make([]Object, len(objects))
	for i, o := range objects {
		// https://docs.openstack.org/swift/latest/api/pseudo-hierarchical-folders-directories.html
		if delimiter != "" && o.PseudoDirectory {
			objs[i] = &obj{o.SubDir, 0, time.Unix(0, 0), true, ""}
		} else {
			objs[i] = &obj{o.Name, o.Bytes, o.LastModified, strings.HasSuffix(o.Name, "/"), ""}
		}
	}
	return generateListResult(objs, limit)
}

func (s *swiftOSS) Head(ctx context.Context, key string) (Object, error) {
	object, _, err := s.conn.Object(ctx, s.container, key)
	if err != nil {
		if err == swift.ObjectNotFound {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		key,
		object.Bytes,
		object.LastModified,
		strings.HasSuffix(key, "/"),
		"",
	}, err
}

func newSwiftOSS(endpoint, username, apiKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("http://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	if uri.Scheme != "http" && uri.Scheme != "https" {
		return nil, fmt.Errorf("Invalid uri.Scheme: %s", uri.Scheme)
	}

	hostSlice := strings.SplitN(uri.Host, ".", 2)
	if len(hostSlice) != 2 {
		return nil, fmt.Errorf("Invalid host: %s", uri.Host)
	}
	container := hostSlice[0]
	host := hostSlice[1]

	// current only support V1 authentication
	authURL := uri.Scheme + "://" + host + "/auth/v1.0"

	conn := swift.Connection{
		UserName:  username,
		ApiKey:    apiKey,
		AuthToken: token,
		AuthUrl:   authURL,
		Transport: httpClient.Transport.(*http.Transport),
	}
	err = conn.Authenticate(context.Background())
	if err != nil {
		return nil, fmt.Errorf("Auth: %s", err)
	}
	return &swiftOSS{DefaultObjectStorage{}, &conn, conn.Region, conn.StorageUrl, container}, nil
}

func init() {
	Register("swift", newSwiftOSS)
}


================================================
FILE: pkg/object/tikv.go
================================================
//go:build !notikv
// +build !notikv

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/url"
	"os"
	"strings"
	"time"

	plog "github.com/pingcap/log"
	"github.com/sirupsen/logrus"
	"github.com/tikv/client-go/v2/config"
	"github.com/tikv/client-go/v2/rawkv"
)

type tikv struct {
	DefaultObjectStorage
	c    *rawkv.Client
	addr string
}

func (t *tikv) String() string {
	return fmt.Sprintf("tikv://%s/", t.addr)
}

func (t *tikv) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	d, err := t.c.Get(ctx, []byte(key))
	if len(d) == 0 {
		err = os.ErrNotExist
	}
	if err != nil {
		return nil, err
	}
	if off > int64(len(d)) {
		off = int64(len(d))
	}
	data := d[off:]
	if limit > 0 && limit < int64(len(data)) {
		data = data[:limit]
	}
	return io.NopCloser(bytes.NewBuffer(data)), nil
}

func (t *tikv) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	d, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	return t.c.Put(ctx, []byte(key), d)
}

func (t *tikv) Head(ctx context.Context, key string) (Object, error) {
	data, err := t.c.Get(ctx, []byte(key))
	if err == nil && data == nil {
		return nil, os.ErrNotExist
	}
	return &obj{
		key,
		int64(len(data)),
		time.Now(),
		strings.HasSuffix(key, "/"),
		"",
	}, err
}

func (t *tikv) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	return t.c.Delete(ctx, []byte(key))
}

func (t *tikv) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "" {
		return nil, false, "", notSupported
	}
	if marker == "" {
		marker = prefix
	}
	if limit > int64(rawkv.MaxRawKVScanLimit) {
		limit = int64(rawkv.MaxRawKVScanLimit)
	}
	// TODO: key only
	keys, vs, err := t.c.Scan(ctx, []byte(marker), nil, int(limit))
	if err != nil {
		return nil, false, "", err
	}
	var objs = make([]Object, len(keys))
	mtime := time.Now()
	for i, k := range keys {
		// FIXME: mtime
		objs[i] = &obj{string(k), int64(len(vs[i])), mtime, strings.HasSuffix(string(k), "/"), ""}
	}
	return generateListResult(objs, limit)
}

func newTiKV(endpoint, accesskey, secretkey, token string) (ObjectStorage, error) {
	var plvl string // TiKV (PingCap) uses uber-zap logging, make it less verbose
	switch logger.Level {
	case logrus.TraceLevel:
		plvl = "debug"
	case logrus.DebugLevel:
		plvl = "info"
	case logrus.InfoLevel, logrus.WarnLevel:
		plvl = "warn"
	case logrus.ErrorLevel:
		plvl = "error"
	default:
		plvl = "dpanic"
	}
	l, prop, _ := plog.InitLogger(&plog.Config{Level: plvl})
	plog.ReplaceGlobals(l, prop)

	if !strings.HasPrefix(endpoint, "tikv://") {
		endpoint = "tikv://" + endpoint
	}
	tUrl, err := url.Parse(endpoint)
	if err != nil {
		return nil, err
	}
	pds := strings.Split(tUrl.Host, ",")
	for i, pd := range pds {
		pd = strings.TrimSpace(pd)
		if !strings.Contains(pd, ":") {
			pd += ":2379"
		}
		pds[i] = pd
	}

	q := tUrl.Query()
	c, err := rawkv.NewClient(context.TODO(), pds, config.NewSecurity(
		q.Get("ca"),
		q.Get("cert"),
		q.Get("key"),
		strings.Split(q.Get("verify-cn"), ",")))

	if err != nil {
		return nil, err
	}
	return &tikv{c: c, addr: tUrl.Host}, nil
}

func init() {
	Register("tikv", newTiKV)
}


================================================
FILE: pkg/object/tos.go
================================================
//go:build !tos

/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"sort"
	"strings"
	"time"

	"github.com/volcengine/ve-tos-golang-sdk/v2/tos"
	"github.com/volcengine/ve-tos-golang-sdk/v2/tos/codes"
	"github.com/volcengine/ve-tos-golang-sdk/v2/tos/enum"
)

type tosClient struct {
	bucket string
	sc     string
	client *tos.ClientV2
}

func (t *tosClient) String() string {
	return fmt.Sprintf("tos://%s/", t.bucket)
}

func (t *tosClient) Limits() Limits {
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  true,
		MinPartSize:              4 << 20,
		MaxPartSize:              5 << 30,
		MaxPartCount:             10000,
	}
}

func (t *tosClient) Create(ctx context.Context) error {
	_, err := t.client.CreateBucketV2(ctx, &tos.CreateBucketV2Input{Bucket: t.bucket, StorageClass: enum.StorageClassType(t.sc)})
	if e, ok := err.(*tos.TosServerError); ok {
		if e.Code == codes.BucketAlreadyOwnedByYou || e.Code == codes.BucketAlreadyExists {
			return nil
		}
	}
	return err
}

func (t *tosClient) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	rangeStr := getRange(off, limit)
	resp, err := t.client.GetObjectV2(ctx, &tos.GetObjectV2Input{
		Bucket: t.bucket,
		Key:    key,
		Range:  rangeStr, // When Range and RangeStart & RangeEnd appear together, range is preferred
	})
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.RequestID).SetStorageClass(string(resp.StorageClass))
	}
	if err != nil {
		return nil, err
	}
	if err = checkGetStatus(resp.StatusCode, rangeStr != ""); err != nil {
		_ = resp.Content.Close()
		return nil, err
	}
	if off == 0 && limit == -1 {
		v, _ := resp.Meta.Get(checksumAlgr)
		resp.Content = verifyChecksum(resp.Content, v, resp.ContentLength)
	}

	return resp.Content, nil
}

func (t *tosClient) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	var meta map[string]string
	if ins, ok := in.(io.ReadSeeker); ok {
		meta = map[string]string{
			checksumAlgr: generateChecksum(ins),
		}
	}
	resp, err := t.client.PutObjectV2(ctx, &tos.PutObjectV2Input{
		PutObjectBasicInput: tos.PutObjectBasicInput{
			Bucket:       t.bucket,
			Key:          key,
			StorageClass: enum.StorageClassType(t.sc),
			Meta:         meta,
		},
		Content: in,
	})
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.RequestID).SetStorageClass(t.sc)
	}
	return err
}

func (t *tosClient) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	resp, err := t.client.DeleteObjectV2(ctx, &tos.DeleteObjectV2Input{
		Bucket: t.bucket,
		Key:    key,
	})
	if resp != nil {
		attrs := ApplyGetters(getters...)
		attrs.SetRequestID(resp.RequestID)
	}
	return err
}

func (t *tosClient) Head(ctx context.Context, key string) (Object, error) {
	head, err := t.client.HeadObjectV2(ctx, &tos.HeadObjectV2Input{Bucket: t.bucket, Key: key})
	if err != nil {
		if e, ok := err.(*tos.TosServerError); ok {
			if e.StatusCode == http.StatusNotFound {
				err = os.ErrNotExist
			}
		}
		return nil, err
	}
	return &obj{
		key,
		head.ContentLength,
		head.LastModified,
		strings.HasSuffix(key, "/"),
		string(head.StorageClass),
	}, err
}

func (t *tosClient) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	resp, err := t.client.ListObjectsType2(ctx, &tos.ListObjectsType2Input{
		Bucket:            t.bucket,
		Delimiter:         delimiter,
		Prefix:            prefix,
		StartAfter:        start,
		MaxKeys:           int(limit),
		ContinuationToken: token,
	})
	if err != nil {
		return nil, false, "", err
	}
	n := len(resp.Contents)
	objs := make([]Object, n)
	for i := 0; i < n; i++ {
		o := resp.Contents[i]
		if !strings.HasPrefix(o.Key, prefix) || o.Key <= start {
			return nil, false, "", fmt.Errorf("found invalid key %s from List, prefix: %s, marker: %s", o.Key, prefix, start)
		}
		objs[i] = &obj{
			o.Key,
			o.Size,
			o.LastModified,
			strings.HasSuffix(o.Key, "/"),
			string(o.StorageClass),
		}
	}
	if delimiter != "" {
		for _, p := range resp.CommonPrefixes {
			objs = append(objs, &obj{p.Prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	return objs, resp.IsTruncated, resp.NextContinuationToken, nil
}

func (t *tosClient) ListAll(ctx context.Context, prefix, marker string, followLink bool) (<-chan Object, error) {
	return nil, notSupported
}

func (t *tosClient) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	resp, err := t.client.CreateMultipartUploadV2(ctx, &tos.CreateMultipartUploadV2Input{
		Bucket:       t.bucket,
		Key:          key,
		StorageClass: enum.StorageClassType(t.sc),
	})
	if err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: resp.UploadID, MinPartSize: 5 << 20, MaxCount: 10000}, nil
}

func (t *tosClient) UploadPart(ctx context.Context, key string, uploadID string, num int, body []byte) (*Part, error) {
	resp, err := t.client.UploadPartV2(ctx, &tos.UploadPartV2Input{
		UploadPartBasicInput: tos.UploadPartBasicInput{
			Bucket:     t.bucket,
			Key:        key,
			UploadID:   uploadID,
			PartNumber: num,
		},
		Content: bytes.NewReader(body),
	})
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: resp.ETag}, nil
}

func (t *tosClient) UploadPartCopy(ctx context.Context, key string, uploadID string, num int, srcKey string, off, size int64) (*Part, error) {
	resp, err := t.client.UploadPartCopyV2(ctx, &tos.UploadPartCopyV2Input{
		Bucket:          t.bucket,
		Key:             key,
		UploadID:        uploadID,
		PartNumber:      num,
		SrcBucket:       t.bucket,
		SrcKey:          srcKey,
		CopySourceRange: fmt.Sprintf("bytes=%d-%d", off, off+size-1),
	},
	)
	if err != nil {
		return nil, err
	}
	return &Part{Num: num, ETag: resp.ETag}, nil
}

func (t *tosClient) AbortUpload(ctx context.Context, key string, uploadID string) {
	_, _ = t.client.AbortMultipartUpload(ctx, &tos.AbortMultipartUploadInput{
		Bucket:   t.bucket,
		Key:      key,
		UploadID: uploadID,
	})
}

func (t *tosClient) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	var tosParts []tos.UploadedPartV2
	for i := range parts {
		tosParts = append(tosParts, tos.UploadedPartV2{ETag: parts[i].ETag, PartNumber: parts[i].Num})
	}
	_, err := t.client.CompleteMultipartUploadV2(ctx, &tos.CompleteMultipartUploadV2Input{
		Bucket:   t.bucket,
		Key:      key,
		UploadID: uploadID,
		Parts:    tosParts,
	})
	return err
}

func (t *tosClient) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	result, err := t.client.ListMultipartUploadsV2(ctx, &tos.ListMultipartUploadsV2Input{Bucket: t.bucket})
	if err != nil {
		return nil, "", err
	}
	parts := make([]*PendingPart, len(result.Uploads))
	for i, u := range result.Uploads {
		parts[i] = &PendingPart{u.Key, u.UploadID, u.Initiated}
	}
	var nextMarker string
	if result.NextKeyMarker != "" {
		nextMarker = result.NextKeyMarker
	}
	return parts, nextMarker, nil
}

func (t *tosClient) Copy(ctx context.Context, dst, src string) error {
	_, err := t.client.CopyObject(ctx, &tos.CopyObjectInput{
		SrcBucket:    t.bucket,
		Bucket:       t.bucket,
		SrcKey:       src,
		Key:          dst,
		StorageClass: enum.StorageClassType(t.sc),
	})
	return err
}

func (t *tosClient) SetStorageClass(sc string) error {
	t.sc = sc
	return nil
}

func newTOS(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("invalid endpoint: %v, error: %v", endpoint, err)
	}
	disableChecksum := strings.EqualFold(uri.Query().Get("disable-checksum"), "true")
	if disableChecksum {
		logger.Infof("default CRC checksum is disabled")
	}
	hostParts := strings.SplitN(uri.Host, ".", 3)
	credentials := tos.NewStaticCredentials(accessKey, secretKey)
	credentials.WithSecurityToken(token)
	cli, err := tos.NewClientV2(
		hostParts[1]+"."+hostParts[2],
		tos.WithRegion(strings.TrimPrefix(hostParts[1], "tos-")),
		tos.WithCredentials(credentials),
		tos.WithEnableVerifySSL(httpClient.Transport.(*http.Transport).TLSClientConfig.InsecureSkipVerify),
		tos.WithEnableCRC(!disableChecksum))
	if err != nil {
		return nil, err
	}
	return &tosClient{bucket: hostParts[0], client: cli}, nil
}

func init() {
	Register("tos", newTOS)
}


================================================
FILE: pkg/object/ufile.go
================================================
//go:build !noufile
// +build !noufile

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"bytes"
	"context"
	"crypto/hmac"
	"crypto/sha1"
	"encoding/base64"
	"encoding/hex"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"sort"
	"strconv"
	"strings"
	"time"
)

type ufile struct {
	RestfulStorage
}

func (u *ufile) String() string {
	uri, _ := url.ParseRequestURI(u.endpoint)
	return fmt.Sprintf("ufile://%s/", uri.Host)
}

func (u *ufile) Limits() Limits {
	// only support 4MB part size and max object size: 5TB
	return Limits{
		IsSupportMultipartUpload: true,
		IsSupportUploadPartCopy:  false,
		MinPartSize:              4 << 20,
		MaxPartSize:              4 << 20,
		MaxPartCount:             1310720,
	}
}

func ufileSigner(req *http.Request, accessKey, secretKey, signName string) {
	if accessKey == "" {
		return
	}
	toSign := req.Method + "\n"
	for _, n := range HEADER_NAMES {
		toSign += req.Header.Get(n) + "\n"
	}
	bucket := strings.Split(req.URL.Host, ".")[0]
	key := req.URL.Path
	// Hack for UploadHit
	if len(req.URL.RawQuery) > 0 {
		vs, _ := url.ParseQuery(req.URL.RawQuery)
		if _, ok := vs["FileName"]; ok {
			key = "/" + vs.Get("FileName")
		}
	}
	toSign += "/" + bucket + key
	h := hmac.New(sha1.New, []byte(secretKey))
	_, _ = h.Write([]byte(toSign))
	sig := base64.StdEncoding.EncodeToString(h.Sum(nil))
	token := signName + " " + accessKey + ":" + sig
	req.Header.Add("Authorization", token)
}

func (u *ufile) Create(ctx context.Context) error {
	uri, _ := url.ParseRequestURI(u.endpoint)
	parts := strings.Split(uri.Host, ".")
	name := parts[0]
	region := parts[1] // www.cn-bj.ufileos.com
	if region == "ufile" {
		region = parts[2] // www.ufile.cn-north-02.ucloud.cn
	}
	if strings.HasPrefix(region, "internal") {
		// www.internal-hk-01.ufileos.cn
		// www.internal-cn-gd-02.ufileos.cn
		ps := strings.Split(region, "-")
		region = strings.Join(ps[1:len(ps)-1], "-")
	}

	query := url.Values{}
	query.Add("Action", "CreateBucket")
	query.Add("BucketName", name)
	query.Add("PublicKey", u.accessKey)
	query.Add("Region", region)

	// generate signature
	toSign := fmt.Sprintf("ActionCreateBucketBucketName%sPublicKey%sRegion%s",
		name, u.accessKey, region)

	sum := sha1.Sum([]byte(toSign + u.secretKey))
	sig := hex.EncodeToString(sum[:])
	query.Add("Signature", sig)

	req, err := http.NewRequest("GET", "https://api.ucloud.cn/?"+query.Encode(), nil)
	if err != nil {
		return err
	}
	resp, err := httpClient.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()
	err = parseError(resp)
	if strings.Contains(err.Error(), "duplicate bucket name") ||
		strings.Contains(err.Error(), "CreateBucketResponse") {
		err = nil
	}
	return err
}

func (u *ufile) parseResp(resp *http.Response, out interface{}) error {
	defer resp.Body.Close()
	var data []byte
	if resp.ContentLength <= 0 || resp.ContentLength > (1<<31) {
		d, err := io.ReadAll(resp.Body)
		if err != nil {
			return err
		}
		data = d
	} else {
		data = make([]byte, resp.ContentLength)
		if _, err := io.ReadFull(resp.Body, data); err != nil {
			return err
		}
	}

	if resp.StatusCode != 200 {
		return fmt.Errorf("status: %v, message: %s", resp.StatusCode, string(data))
	}
	err := json.Unmarshal(data, out)
	if err != nil {
		return err
	}
	return nil
}

func copyObj(ctx context.Context, store ObjectStorage, dst, src string) error {
	in, err := store.Get(ctx, src, 0, -1)
	if err != nil {
		return err
	}
	defer in.Close()
	d, err := io.ReadAll(in)
	if err != nil {
		return err
	}
	return store.Put(ctx, dst, bytes.NewReader(d))
}

func (u *ufile) Copy(ctx context.Context, dst, src string) error {
	resp, err := u.request(ctx, "HEAD", src, nil, nil)
	if err != nil {
		return copyObj(ctx, u, dst, src)
	}
	if resp.StatusCode != 200 {
		return copyObj(ctx, u, dst, src)
	}

	etag := resp.Header["Etag"]
	if len(etag) < 1 {
		return copyObj(ctx, u, dst, src)
	}
	hash := etag[0][1 : len(etag[0])-1]
	lens := resp.Header["Content-Length"]
	if len(lens) < 1 {
		return copyObj(ctx, u, dst, src)
	}
	uri := fmt.Sprintf("uploadhit?Hash=%s&FileName=%s&FileSize=%s", hash, dst, lens[0])
	resp, err = u.request(ctx, "POST", uri, nil, nil)
	if err != nil {
		return copyObj(ctx, u, dst, src)
	}
	defer cleanup(resp)
	if resp.StatusCode != 200 {
		return copyObj(ctx, u, dst, src)
	}
	return nil
}

type ContentsItem struct {
	Key          string
	Size         string
	LastModified int
	CreateTime   int
	StorageClass string
	ETag         string
}

type CommonPrefixesItem struct {
	Prefix string
}

// uFileListObjectsOutput presents output for ListObjects.
type uFileListObjectsOutput struct {
	Maxkeys     string `json:"MaxKeys,omitempty"`
	Delimiter   string `json:"Delimiter,omitempty"`
	NextMarker  string `json:"NextMarker,omitempty"`
	IsTruncated bool   `json:"IsTruncated,omitempty"`

	// Object keys
	Contents       []*ContentsItem       `json:"Contents,omitempty"`
	CommonPrefixes []*CommonPrefixesItem `json:"CommonPrefixes,omitempty"`
}

func (u *ufile) List(ctx context.Context, prefix, start, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "" {
		return nil, false, "", notSupported
	}
	query := url.Values{}
	query.Add("prefix", prefix)
	query.Add("marker", start)
	query.Add("delimiter", delimiter)
	if limit > 1000 {
		limit = 1000
	}
	query.Add("max-keys", strconv.Itoa(int(limit)))
	resp, err := u.request(ctx, "GET", "?listobjects&"+query.Encode(), nil, nil)
	if err != nil {
		return nil, false, "", err
	}

	var out uFileListObjectsOutput
	if err := u.parseResp(resp, &out); err != nil {
		return nil, false, "", err
	}
	objs := make([]Object, len(out.Contents))
	for i, item := range out.Contents {
		size_, _ := strconv.ParseInt(item.Size, 10, 64)
		objs[i] = &obj{item.Key, size_, time.Unix(int64(item.LastModified), 0), strings.HasSuffix(item.Key, "/"), ""}
	}
	if delimiter != "" {
		for _, item := range out.CommonPrefixes {
			objs = append(objs, &obj{item.Prefix, 0, time.Unix(0, 0), true, ""})
		}
		sort.Slice(objs, func(i, j int) bool { return objs[i].Key() < objs[j].Key() })
	}
	// This is a bug in ufile, NextMarker is not the last one after sorting.
	var lastKey string
	if len(objs) > 0 {
		lastKey = objs[len(objs)-1].Key()
	}
	return objs, out.IsTruncated, lastKey, nil
}

type ufileCreateMultipartUploadResult struct {
	UploadId string
	BlkSize  int
	Bucket   string
	Key      string
}

func (u *ufile) CreateMultipartUpload(ctx context.Context, key string) (*MultipartUpload, error) {
	resp, err := u.request(ctx, "POST", key+"?uploads", nil, nil)
	if err != nil {
		return nil, err
	}
	var out ufileCreateMultipartUploadResult
	if err := u.parseResp(resp, &out); err != nil {
		return nil, err
	}
	return &MultipartUpload{UploadID: out.UploadId, MinPartSize: out.BlkSize, MaxCount: 1000000}, nil
}

func (u *ufile) UploadPart(ctx context.Context, key string, uploadID string, num int, data []byte) (*Part, error) {
	// UFile require the PartNumber to start from 0 (continuous)
	num--
	path := fmt.Sprintf("%s?uploadId=%s&partNumber=%d", key, uploadID, num)
	resp, err := u.request(ctx, "PUT", path, bytes.NewReader(data), nil)
	if err != nil {
		return nil, err
	}
	defer cleanup(resp)
	if resp.StatusCode != 200 {
		return nil, fmt.Errorf("UploadPart: %s", parseError(resp).Error())
	}
	etags := resp.Header["Etag"]
	if len(etags) < 1 {
		return nil, errors.New("No ETag")
	}
	return &Part{Num: num, Size: len(data), ETag: strings.Trim(etags[0], "\"")}, nil
}

func (u *ufile) AbortUpload(ctx context.Context, key string, uploadID string) {
	_, _ = u.request(ctx, "DELETE", key+"?uploads="+uploadID, nil, nil)
}

func (u *ufile) CompleteUpload(ctx context.Context, key string, uploadID string, parts []*Part) error {
	etags := make([]string, len(parts))
	for i, p := range parts {
		etags[i] = p.ETag
	}
	resp, err := u.request(ctx, "POST", key+"?uploadId="+uploadID, bytes.NewReader([]byte(strings.Join(etags, ","))), nil)
	if err != nil {
		return err
	}
	defer cleanup(resp)
	if resp.StatusCode != 200 {
		return fmt.Errorf("CompleteMultipart: %s", parseError(resp).Error())
	}
	return nil
}

type ufileUpload struct {
	FileName  string
	UploadId  string
	StartTime int
}

type ufileListMultipartUploadsResult struct {
	RetCode    int
	ErrMsg     string
	NextMarker string
	DataSet    []*ufileUpload
}

func (u *ufile) ListUploads(ctx context.Context, marker string) ([]*PendingPart, string, error) {
	query := url.Values{}
	query.Add("muploadid", "")
	query.Add("prefix", "")
	query.Add("marker", marker)
	query.Add("limit", strconv.Itoa(1000))
	resp, err := u.request(ctx, "GET", "?"+query.Encode(), nil, nil)
	if err != nil {
		return nil, "", err
	}
	var out ufileListMultipartUploadsResult
	// FIXME: invalid auth
	if err := u.parseResp(resp, &out); err != nil {
		return nil, "", err
	}
	if out.RetCode != 0 {
		return nil, "", errors.New(out.ErrMsg)
	}
	parts := make([]*PendingPart, len(out.DataSet))
	for i, u := range out.DataSet {
		parts[i] = &PendingPart{u.FileName, u.UploadId, time.Unix(int64(u.StartTime), 0)}
	}
	return parts, out.NextMarker, nil
}

func newUFile(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	return &ufile{RestfulStorage{DefaultObjectStorage{}, endpoint, accessKey, secretKey, "UCloud", ufileSigner}}, nil
}

func init() {
	Register("ufile", newUFile)
}


================================================
FILE: pkg/object/wasabi.go
================================================
//go:build !nos3
// +build !nos3

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"fmt"
	"net/url"
	"strings"

	"github.com/aws/aws-sdk-go-v2/aws"
	v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
	"github.com/aws/aws-sdk-go-v2/config"
	"github.com/aws/aws-sdk-go-v2/credentials"
	"github.com/aws/aws-sdk-go-v2/service/s3"
	smithymiddleware "github.com/aws/smithy-go/middleware"
)

type wasabi struct {
	s3client
}

func (s *wasabi) String() string {
	return fmt.Sprintf("wasabi://%s/", s.s3client.bucket)
}

func (s *wasabi) SetStorageClass(_ string) error {
	return notSupported
}

func newWasabi(endpoint, accessKey, secretKey, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("https://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	ssl := strings.ToLower(uri.Scheme) == "https"
	hostParts := strings.Split(uri.Host, ".")
	bucket := hostParts[0]
	region := hostParts[2]
	endpoint = uri.Scheme + "://" + uri.Host[len(bucket)+1:]

	awsCfg, err := config.LoadDefaultConfig(ctx,
		config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(accessKey, secretKey, token)))
	if err != nil {
		return nil, fmt.Errorf("failed to load config: %s", err)
	}
	client := s3.NewFromConfig(awsCfg, func(options *s3.Options) {
		options.Region = region
		options.BaseEndpoint = aws.String(endpoint)
		options.EndpointOptions.DisableHTTPS = !ssl
		options.UsePathStyle = false
		options.HTTPClient = httpClient
		options.APIOptions = append(options.APIOptions, func(stack *smithymiddleware.Stack) error {
			return v4.SwapComputePayloadSHA256ForUnsignedPayloadMiddleware(stack)
		})
		options.RetryMaxAttempts = 1
	})
	return &wasabi{s3client{bucket: bucket, s3: client, region: region}}, nil
}

func init() {
	Register("wasabi", newWasabi)
}


================================================
FILE: pkg/object/webdav.go
================================================
//go:build !nowebdav
// +build !nowebdav

/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package object

import (
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"path"
	"sort"
	"strings"

	"github.com/studio-b12/gowebdav"
)

type webdav struct {
	DefaultObjectStorage
	endpoint *url.URL
	c        *gowebdav.Client
}

func (w *webdav) String() string {
	return fmt.Sprintf("webdav://%s/", w.endpoint.Host)
}

func (w *webdav) Create(ctx context.Context) error {
	return nil
}

func (w *webdav) Head(ctx context.Context, key string) (Object, error) {
	info, err := w.c.Stat(key)
	if err != nil {
		if gowebdav.IsErrNotFound(err) {
			err = os.ErrNotExist
		}
		return nil, err
	}
	return &obj{
		key,
		info.Size(),
		info.ModTime(),
		info.IsDir(),
		"",
	}, nil
}

func (w *webdav) Get(ctx context.Context, key string, off, limit int64, getters ...AttrGetter) (io.ReadCloser, error) {
	if off == 0 && limit <= 0 {
		return w.c.ReadStream(key)
	}
	return w.c.ReadStreamRange(key, off, limit)
}

func (w *webdav) Put(ctx context.Context, key string, in io.Reader, getters ...AttrGetter) error {
	if key == "" {
		return nil
	}
	if strings.HasSuffix(key, dirSuffix) {
		return w.c.MkdirAll(key, 0)
	}
	return w.c.WriteStream(key, in, 0)
}

func (w *webdav) Delete(ctx context.Context, key string, getters ...AttrGetter) error {
	info, err := w.c.Stat(key)
	if gowebdav.IsErrNotFound(err) {
		return nil
	}
	if err != nil {
		return err
	}
	if info.IsDir() {
		infos, err := w.c.ReadDir(key)
		if err != nil {
			if gowebdav.IsErrNotFound(err) {
				return nil
			}
			return err
		}
		if len(infos) != 0 {
			return fmt.Errorf("%s is non-empty directory", key)
		}
	}
	return w.c.Remove(key)
}

func (w *webdav) Copy(ctx context.Context, dst, src string) error {
	return w.c.Copy(src, dst, true)
}

type webDAVFile struct {
	os.FileInfo
	name string
}

func (w webDAVFile) Name() string {
	return w.name
}

func (w *webdav) List(ctx context.Context, prefix, marker, token, delimiter string, limit int64, followLink bool) ([]Object, bool, string, error) {
	if delimiter != "/" {
		return nil, false, "", notSupported
	}

	root := "/" + prefix
	var objs []Object
	if !strings.HasSuffix(root, dirSuffix) {
		// If the root is not ends with `/`, we'll list the directory root resides.
		root = path.Dir(root)
		if !strings.HasSuffix(root, dirSuffix) {
			root += dirSuffix
		}
	}

	infos, err := w.c.ReadDir(root)
	if err != nil {
		if gowebdav.IsErrCode(err, http.StatusForbidden) {
			logger.Warnf("skip %s: %s", root, err)
			return nil, false, "", nil
		}
		if gowebdav.IsErrNotFound(err) {
			return nil, false, "", nil
		}
		return nil, false, "", err
	}
	sortedInfos := make([]os.FileInfo, len(infos))
	for idx, o := range infos {
		if o.IsDir() {
			sortedInfos[idx] = &webDAVFile{name: o.Name() + dirSuffix, FileInfo: o}
		} else {
			sortedInfos[idx] = o
		}
	}
	sort.Slice(sortedInfos, func(i, j int) bool {
		return sortedInfos[i].Name() < sortedInfos[j].Name()
	})
	for _, info := range sortedInfos {
		key := root[1:] + info.Name()
		if !strings.HasPrefix(key, prefix) || (marker != "" && key <= marker) {
			continue
		}
		objs = append(objs, &obj{
			key,
			info.Size(),
			info.ModTime(),
			info.IsDir(),
			"",
		})
		if len(objs) == int(limit) {
			break
		}
	}
	return generateListResult(objs, limit)
}

func newWebDAV(endpoint, user, passwd, token string) (ObjectStorage, error) {
	if !strings.Contains(endpoint, "://") {
		endpoint = fmt.Sprintf("http://%s", endpoint)
	}
	uri, err := url.ParseRequestURI(endpoint)
	if err != nil {
		return nil, fmt.Errorf("Invalid endpoint %s: %s", endpoint, err)
	}
	if uri.Path == "" {
		uri.Path = "/"
	}
	c := gowebdav.NewClient(uri.String(), user, passwd)
	c.SetTransport(httpClient.Transport)
	return &webdav{endpoint: uri, c: c}, nil
}

func init() {
	Register("webdav", newWebDAV)
}


================================================
FILE: pkg/sync/cluster.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sync

import (
	"bufio"
	"bytes"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net"
	"net/http"
	"net/url"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/dustin/go-humanize"
	"github.com/oliverisaac/shellescape"

	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
)

// Stat has the counters to represent the progress.
type Stat struct {
	Copied       int64    // the number of copied files
	CopiedBytes  int64    // total amount of copied data in bytes
	Checked      int64    // the number of checked files
	CheckedBytes int64    // total amount of checked data in bytes
	Deleted      int64    // the number of deleted files
	Skipped      int64    // the number of files skipped
	SkippedBytes int64    // total amount of skipped data in bytes
	Failed       int64    // the number of files that fail to copy
	DelayDelDir  []string // the directories that need to be deleted
}

func updateStats(r *Stat) {
	copied.IncrInt64(r.Copied)
	copiedBytes.IncrInt64(r.CopiedBytes)
	if checked != nil {
		checked.IncrInt64(r.Checked)
		checkedBytes.IncrInt64(r.CheckedBytes)
	}
	if deleted != nil {
		deleted.IncrInt64(r.Deleted)
	}
	skipped.IncrInt64(r.Skipped)
	skippedBytes.IncrInt64(r.SkippedBytes)
	if failed != nil {
		failed.IncrInt64(r.Failed)
	}
	handled.IncrInt64(r.Copied + r.Deleted + r.Skipped + r.Failed)
}

func httpRequest(url string, body []byte) (ans []byte, err error) {
	method := "GET"
	if body != nil {
		method = "POST"
	}
	req, err := http.NewRequest(method, url, bytes.NewReader(body))
	if err != nil {
		return nil, err
	}
	var resp *http.Response
	resp, err = http.DefaultClient.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	return io.ReadAll(resp.Body)
}

var sendStatMu sync.Mutex

func sendStats(addr string) {
	sendStatMu.Lock()
	defer sendStatMu.Unlock()
	var r Stat
	r.Skipped = skipped.Current()
	r.SkippedBytes = skippedBytes.Current()
	r.Copied = copied.Current()
	r.CopiedBytes = copiedBytes.Current()
	srcDelayDelMu.Lock()
	r.DelayDelDir = srcDelayDel
	srcDelayDel = make([]string, 0)
	srcDelayDelMu.Unlock()
	if checked != nil {
		r.Checked = checked.Current()
		r.CheckedBytes = checkedBytes.Current()
	}
	if deleted != nil {
		r.Deleted = deleted.Current()
	}
	if failed != nil {
		r.Failed = failed.Current()
	}
	d, _ := json.Marshal(r)
	ans, err := httpRequest(fmt.Sprintf("http://%s/stats", addr), d)
	if err != nil || string(ans) != "OK" {
		srcDelayDelMu.Lock()
		srcDelayDel = append(srcDelayDel, r.DelayDelDir...)
		srcDelayDelMu.Unlock()
		if errors.Is(err, syscall.ECONNREFUSED) {
			logger.Errorf("the management process has been stopped, so the worker process now exits")
			os.Exit(1)
		}
		logger.Errorf("update stats: %s %s", string(ans), err)
	} else {
		skipped.IncrInt64(-r.Skipped)
		skippedBytes.IncrInt64(-r.SkippedBytes)
		copied.IncrInt64(-r.Copied)
		copiedBytes.IncrInt64(-r.CopiedBytes)
		if checked != nil {
			checked.IncrInt64(-r.Checked)
			checkedBytes.IncrInt64(-r.CheckedBytes)
		}
		if deleted != nil {
			deleted.IncrInt64(-r.Deleted)
		}
		if failed != nil {
			failed.IncrInt64(-r.Failed)
		}
	}
}

func startManager(config *Config, tasks <-chan object.Object) (string, error) {
	http.HandleFunc("/fetch", func(w http.ResponseWriter, req *http.Request) {
		var objs []object.Object
		var total int64
		obj, ok := <-tasks
		if !ok {
			_, _ = w.Write([]byte("[]"))
			return
		}
		objs = append(objs, obj)
		total += obj.Size()
	LOOP:
		for len(objs) < 100 && total < 400<<20 {
			select {
			case obj = <-tasks:
				if obj == nil {
					break LOOP
				}
				objs = append(objs, obj)
				total += obj.Size()
			default:
				break LOOP
			}
		}
		d, err := marshalObjects(objs)
		if err != nil {
			http.Error(w, err.Error(), http.StatusInternalServerError)
			return
		}
		logger.Debugf("send %d objects(%s) to %s", len(objs), humanize.IBytes(uint64(total)), req.RemoteAddr)
		_, _ = w.Write(d)
	})
	http.HandleFunc("/stats", func(w http.ResponseWriter, req *http.Request) {
		if req.Method != "POST" {
			http.Error(w, "POST required", http.StatusBadRequest)
			return
		}
		d, err := io.ReadAll(req.Body)
		if err != nil {
			logger.Errorf("read: %s", err)
			return
		}
		var r Stat
		err = json.Unmarshal(d, &r)
		if err != nil {
			http.Error(w, err.Error(), http.StatusBadRequest)
			return
		}
		updateStats(&r)
		srcDelayDelMu.Lock()
		srcDelayDel = append(srcDelayDel, r.DelayDelDir...)
		srcDelayDelMu.Unlock()
		logger.Debugf("receive stats %+v from %s", r, req.RemoteAddr)
		_, _ = w.Write([]byte("OK"))
	})
	var addr string
	u, err := url.Parse("ssh://" + config.Workers[0])
	if err != nil {
		return "", fmt.Errorf("invalid worker address %s: %s", config.Workers[0], err)
	}
	if config.ManagerAddr != "" {
		addr = config.ManagerAddr
		if strings.HasPrefix(addr, ":") || strings.Contains(addr, "0.0.0.0") {
			ip, err := utils.GetLocalIp(net.JoinHostPort(u.Host, "22"))
			if err != nil {
				return "", fmt.Errorf("get local ip: %s", err)
			}
			addr = ip + addr
		}
	} else {
		ip, err := utils.GetLocalIp(net.JoinHostPort(u.Host, "22"))
		if err != nil {
			return "", fmt.Errorf("not found local ip: %s", err)
		}
		logger.Debugf("Use local ip %s", ip)
		addr = ip
	}

	if !strings.Contains(addr, ":") {
		addr += ":"
	}

	l, err := net.Listen("tcp", addr)
	if err != nil {
		return "", fmt.Errorf("listen: %s", err)
	}
	logger.Infof("Listen at %s", l.Addr())
	go func() { _ = http.Serve(l, nil) }()
	return l.Addr().String(), nil
}

func findSelfPath() (string, error) {
	program := os.Args[0]
	if strings.Contains(program, "/") {
		path, err := filepath.Abs(program)
		if err != nil {
			return "", fmt.Errorf("resolve path %s: %s", program, err)
		}
		return path, nil
	}
	for _, searchPath := range strings.Split(os.Getenv("PATH"), ":") {
		if searchPath != "" {
			p := filepath.Join(searchPath, program)
			if _, err := os.Stat(p); err == nil {
				return p, nil
			}
		}
	}
	return "", fmt.Errorf("can't find path for %s", program)
}

func launchWorker(address string, config *Config, wg *sync.WaitGroup) {
	workers := strings.Split(strings.Join(config.Workers, ","), ",")
	for _, host := range workers {
		wg.Add(1)
		go func(host string) {
			defer wg.Done()
			// copy
			path, err := findSelfPath()
			if err != nil {
				logger.Errorf("find self path: %s", err)
				return
			}
			rpath := filepath.Join("/tmp", filepath.Base(path))
			cmd := exec.Command("rsync", "-a", "-e", "ssh -o StrictHostKeyChecking=no -o PasswordAuthentication=no", path, host+":"+rpath)
			output, err := cmd.CombinedOutput()
			logger.Debugf("exec: %s,err: %s", cmd.String(), string(output))
			if err != nil {
				// fallback to scp
				cmd = exec.Command("scp", "-o", "StrictHostKeyChecking=no", "-o", "PasswordAuthentication=no", path, host+":"+rpath)
				output, err = cmd.CombinedOutput()
				logger.Debugf("exec: %s,err: %s", cmd.String(), string(output))
			}
			if err != nil {
				logger.Errorf("copy itself to %s: %s", host, err)
				return
			}
			// launch itself
			var args = []string{host}
			// set env
			var printEnv []string
			for k, v := range config.Env {
				args = append(args, fmt.Sprintf("%s=%s", k, v))
				if strings.Contains(k, "SECRET") ||
					strings.Contains(k, "TOKEN") ||
					strings.Contains(k, "PASSWORD") ||
					strings.Contains(k, "AZURE_STORAGE_CONNECTION_STRING") ||
					strings.Contains(k, "JFS_RSA_PASSPHRASE") {
					v = "******"
				}
				printEnv = append(printEnv, fmt.Sprintf("%s=%s", k, v))
			}

			args = append(args, rpath)
			args = append(args, os.Args[1:]...)
			args = append(args, "--manager", address)
			if !config.Verbose && !config.Quiet {
				args = append(args, "-q")
			}
			var argsBk = make([]string, len(args))
			copy(argsBk, args)
			for i, s := range printEnv {
				argsBk[i+1] = s
			}
			logger.Debugf("launch worker command args: [ssh, %s]", strings.Join(shellescape.EscapeArgs(argsBk), ", "))
			cmd = exec.Command("ssh", shellescape.EscapeArgs(args)...)
			cmd.Stdin = os.Stdin
			stderr, err := cmd.StderrPipe()
			if err != nil {
				logger.Errorf("redirect stderr: %s", err)
			}
			err = cmd.Start()
			if err != nil {
				logger.Errorf("start itself at %s: %s", host, err)
				return
			}
			logger.Infof("launch a worker on %s", host)
			var finished = make(chan struct{})
			var logRe = regexp.MustCompile(`^.*<([A-Z]+)>: (.*)`)
			go func() {
				r := bufio.NewReader(stderr)
				for {
					line, err := r.ReadString('\n')
					if err != nil || len(line) == 0 {
						finished <- struct{}{}
						return
					}
					line = strings.TrimSuffix(line, "\n")

					var level, content string
					if matches := logRe.FindStringSubmatch(line); len(matches) >= 3 {
						level = matches[1]
						content = matches[2]
					} else {
						level = "INFO"
						content = line
					}

					switch level {
					case "ERROR":
						logger.Errorf("[%s] %s", host, content)
					case "WARNING":
						logger.Warnf("[%s] %s", host, content)
					case "DEBUG":
						logger.Debugf("[%s] %s", host, content)
					default:
						logger.Infof("[%s] %s", host, content)
					}
				}
			}()
			err = cmd.Wait()
			<-finished
			if err != nil {
				logger.Errorf("%s: %s", host, err)
			}
		}(host)
	}
}

func marshalObjects(objs []object.Object) ([]byte, error) {
	var arr []map[string]interface{}
	for _, o := range objs {
		nsize := o.Size()
		o = withoutSize(o)
		obj := object.MarshalObject(o)
		if nsize != o.Size() {
			obj["nsize"] = nsize
		}
		arr = append(arr, obj)
	}
	return json.MarshalIndent(arr, "", " ")
}

func unmarshalObjects(d []byte) ([]object.Object, error) {
	var arr []map[string]interface{}
	err := json.Unmarshal(d, &arr)
	if err != nil {
		return nil, err
	}
	var objs []object.Object
	for _, m := range arr {
		obj := object.UnmarshalObject(m)
		if nsize, ok := m["nsize"]; ok {
			obj = withSize(obj, int64(nsize.(float64)))
		}
		objs = append(objs, obj)
	}
	return objs, nil
}

func fetchJobs(tasks chan<- object.Object, config *Config) {
	for {
		url := fmt.Sprintf("http://%s/fetch", config.Manager)
		ans, err := httpRequest(url, nil)
		if err != nil {
			logger.Errorf("fetch jobs: %s", err)
			time.Sleep(time.Second)
			continue
		}
		var jobs []object.Object
		jobs, err = unmarshalObjects(ans)
		if err != nil {
			logger.Errorf("Unmarshal %s: %s", string(ans), err)
			time.Sleep(time.Second)
			continue
		}
		logger.Debugf("got %d jobs", len(jobs))
		if len(jobs) == 0 {
			logger.Infof("no more jobs")
			break
		}
		for _, obj := range jobs {
			tasks <- obj
		}
	}
	close(tasks)
}


================================================
FILE: pkg/sync/cluster_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package sync

import (
	"os"
	"os/user"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/object"
)

type obj struct {
	key       string
	size      int64
	mtime     time.Time
	isDir     bool
	isSymlink bool
}

func (o *obj) Key() string          { return o.key }
func (o *obj) Size() int64          { return o.size }
func (o *obj) Mtime() time.Time     { return o.mtime }
func (o *obj) IsDir() bool          { return o.isDir }
func (o *obj) IsSymlink() bool      { return o.isSymlink }
func (o *obj) StorageClass() string { return "" }

type file struct {
	obj
}

func (o *file) Owner() string     { return "" }
func (o *file) Group() string     { return "" }
func (o *file) Mode() os.FileMode { return 0 }

func TestCluster(t *testing.T) {
	// manager
	workerAddr := "127.0.0.1"
	if u, err := user.Current(); err != nil {
		logger.Warnf("Failed to get current user: %v", err)
	} else if u.Username != "" {
		workerAddr = u.Username + "@" + workerAddr
	}
	todo := make(chan object.Object, 100)
	var conf Config
	conf.Workers = []string{workerAddr}
	addr, err := startManager(&conf, todo)
	if err != nil {
		t.Fatal(err)
	}
	// sendStats(addr)
	// worker
	conf.Manager = addr
	mytodo := make(chan object.Object, 100)
	go fetchJobs(mytodo, &conf)

	todo <- &obj{key: "test"}
	close(todo)

	obj := <-mytodo
	if obj.Key() != "test" {
		t.Fatalf("expect test but got %s", obj.Key())
	}
	if _, ok := <-mytodo; ok {
		t.Fatalf("should end")
	}
}

func TestMarshal(t *testing.T) {
	mtime := time.Now()
	var objs = []object.Object{
		&obj{key: "test", mtime: mtime},
		withSize(&obj{key: "test1", size: 100}, -4),
		withSize(&file{obj{key: "test2", size: 200}}, -1),
		withSize(&file{obj{key: "test3", size: 200, isSymlink: true}}, -1),
	}
	d, err := marshalObjects(objs)
	if err != nil {
		t.Fatal(err)
	}
	objs2, e := unmarshalObjects(d)
	if e != nil {
		t.Fatal(e)
	}
	if objs2[0].Key() != "test" {
		t.Fatalf("expect test but got %s", objs2[0].Key())
	}
	if !objs2[0].Mtime().Equal(objs[0].Mtime()) {
		t.Fatalf("expect %s but got %s", mtime, objs2[0].Mtime())
	}
	if objs2[1].Key() != "test1" || objs2[1].Size() != -4 || withoutSize(objs2[1]).Size() != 100 {
		t.Fatalf("expect withSize but got %s", objs2[1].Key())
	}
	if objs2[2].Key() != "test2" || objs2[2].Size() != -1 || withoutSize(objs2[2]).Size() != 200 {
		t.Fatalf("expect withFSize but got %s", objs2[2].Key())
	}
	if objs2[3].Key() != "test3" || objs2[3].Size() != -1 || withoutSize(objs2[3]).Size() != 200 && objs2[3].IsSymlink() != true {
		t.Fatalf("expect withFSize but got %s", objs2[3].Key())
	}
}


================================================
FILE: pkg/sync/config.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sync

import (
	"math"
	"os"
	"strconv"
	"strings"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/spf13/cast"
	"github.com/urfave/cli/v2"
)

type Config struct {
	StorageClass      string
	Start             string
	End               string
	Threads           int
	Update            bool
	ForceUpdate       bool
	Perms             bool
	MaxFailure        int64
	Dry               bool
	DeleteSrc         bool
	DeleteDst         bool
	MatchFullPath     bool
	Dirs              bool
	Exclude           []string
	Include           []string
	Existing          bool
	IgnoreExisting    bool
	Links             bool
	Inplace           bool
	Limit             int64
	Manager           string
	Workers           []string
	ManagerAddr       string
	ListThreads       int
	ListDepth         int
	BWLimit           int64
	TrafficControlURL string
	NoHTTPS           bool
	Verbose           bool
	Quiet             bool
	CheckAll          bool
	CheckNew          bool
	CheckChange       bool
	MaxSize           int64
	MinSize           int64
	MaxAge            time.Duration
	MinAge            time.Duration
	StartTime         time.Time
	EndTime           time.Time
	Env               map[string]string

	FilesFrom string

	rules          []rule
	concurrentList chan int
	Registerer     prometheus.Registerer
}

const JFS_UMASK = "JFS_UMASK"

func envList() []string {
	return []string{
		"ACCESS_KEY",
		"SECRET_KEY",
		"SESSION_TOKEN",

		"MINIO_ACCESS_KEY",
		"MINIO_SECRET_KEY",
		"MINIO_REGION",

		"META_PASSWORD",
		"REDIS_PASSWORD",
		"SENTINEL_PASSWORD",
		"SENTINEL_PASSWORD_FOR_OBJ",

		"AZURE_STORAGE_CONNECTION_STRING",

		"BDCLOUD_DEFAULT_REGION",
		"BDCLOUD_ACCESS_KEY",
		"BDCLOUD_SECRET_KEY",

		"COS_SECRETID",
		"COS_SECRETKEY",

		"EOS_ACCESS_KEY",
		"EOS_SECRET_KEY",
		"EOS_TOKEN",

		"GOOGLE_CLOUD_PROJECT",

		"HADOOP_USER_NAME",
		"HADOOP_SUPER_USER",
		"HADOOP_SUPER_GROUP",
		"HADOOP_CONF_DIR",
		"HADOOP_HOME",
		"KRB5_CONFIG",
		"KRB5CCNAME",
		"KRB5KEYTAB",
		"KRB5KEYTAB_BASE64",
		"KRB5PRINCIPAL",

		"AWS_REGION",
		"AWS_DEFAULT_REGION",

		"HWCLOUD_DEFAULT_REGION",
		"HWCLOUD_ACCESS_KEY",
		"HWCLOUD_SECRET_KEY",

		"ALICLOUD_REGION_ID",
		"ALICLOUD_ACCESS_KEY_ID",
		"ALICLOUD_ACCESS_KEY_SECRET",
		"SECURITY_TOKEN",

		"CEPH_ADMIN_SOCKET",
		"CEPH_LOG_FILE",

		"QINIU_DOMAIN",

		"SCW_ACCESS_KEY",
		"SCW_SECRET_KEY",

		"SSH_PRIVATE_KEY_PATH",
		"SSH_AUTH_SOCK",

		"JFS_RSA_PASSPHRASE",
		"PYROSCOPE_AUTH_TOKEN",
		"DISPLAY_PROGRESSBAR",
		"CGOFUSE_TRACE",
		"JUICEFS_DEBUG",
		"JUICEFS_LOGLEVEL",
	}
}

func NewConfigFromCli(c *cli.Context) *Config {
	if c.Int64("limit") < -1 {
		logger.Fatal("limit should not be less than -1")
	}
	var startTime, endTime time.Time
	var err error
	if c.IsSet("start-time") {
		startTime, err = cast.ToTimeInDefaultLocationE(c.String("start-time"), time.Local)
		if err != nil {
			logger.Fatalf("failed to parse start time: %v", err)
		}
	}
	if c.IsSet("end-time") {
		endTime, err = cast.ToTimeInDefaultLocationE(c.String("end-time"), time.Local)
		if err != nil {
			logger.Fatalf("failed to parse end time: %v", err)
		}
	}
	cfg := &Config{
		StorageClass:      c.String("storage-class"),
		Start:             c.String("start"),
		End:               c.String("end"),
		Threads:           c.Int("threads"),
		ListThreads:       c.Int("list-threads"),
		ListDepth:         c.Int("list-depth"),
		Update:            c.Bool("update"),
		ForceUpdate:       c.Bool("force-update"),
		Perms:             c.Bool("perms"),
		Dirs:              c.Bool("dirs"),
		Dry:               c.Bool("dry"),
		MaxFailure:        c.Int64("max-failure"),
		DeleteSrc:         c.Bool("delete-src"),
		DeleteDst:         c.Bool("delete-dst"),
		Exclude:           c.StringSlice("exclude"),
		Include:           c.StringSlice("include"),
		MatchFullPath:     c.Bool("match-full-path"),
		Existing:          c.Bool("existing"),
		IgnoreExisting:    c.Bool("ignore-existing"),
		Links:             c.Bool("links"),
		Inplace:           c.Bool("inplace"),
		Limit:             c.Int64("limit"),
		Workers:           c.StringSlice("worker"),
		ManagerAddr:       c.String("manager-addr"),
		Manager:           c.String("manager"),
		BWLimit:           utils.ParseMbps(c, "bwlimit"),
		TrafficControlURL: c.String("traffic-control-url"),
		NoHTTPS:           c.Bool("no-https"),
		Verbose:           c.Bool("verbose"),
		Quiet:             c.Bool("quiet"),
		CheckAll:          c.Bool("check-all"),
		CheckNew:          c.Bool("check-new"),
		CheckChange:       c.Bool("check-change"),
		MaxSize:           int64(utils.ParseBytes(c, "max-size", 'B')),
		MinSize:           int64(utils.ParseBytes(c, "min-size", 'B')),
		MaxAge:            utils.Duration(c.String("max-age")),
		MinAge:            utils.Duration(c.String("min-age")),
		StartTime:         startTime,
		EndTime:           endTime,
		FilesFrom:         c.String("files-from"),
		Env:               make(map[string]string),
	}
	if !c.IsSet("max-size") {
		cfg.MaxSize = math.MaxInt64
	}
	if cfg.MinSize > cfg.MaxSize {
		logger.Fatal("min-size should not be larger than max-size")
	}
	if cfg.MaxAge > 0 && cfg.MinAge > cfg.MaxAge {
		logger.Fatal("min-age should not be larger than max-age")
	}
	if cfg.Threads <= 0 {
		logger.Warnf("threads should be larger than 0, reset it to 1")
		cfg.Threads = 1
	}
	for _, key := range envList() {
		if os.Getenv(key) != "" {
			cfg.Env[key] = os.Getenv(key)
		}
	}
	// pass all the variable that contains "JFS"
	for _, ekv := range os.Environ() {
		key := strings.Split(ekv, "=")[0]
		if strings.Contains(key, "JFS") {
			cfg.Env[key] = os.Getenv(key)
		}
	}
	// pass umask to workers
	cfg.Env[JFS_UMASK] = strconv.Itoa(utils.GetUmask())

	// workers: set umask for the current process
	if umask := os.Getenv(JFS_UMASK); umask != "" {
		utils.SetUmask(cast.ToInt(umask))
	}

	return cfg
}


================================================
FILE: pkg/sync/download.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sync

import (
	"context"
	"errors"
	"io"
	"sync"

	"github.com/juicedata/juicefs/pkg/object"
)

type parallelDownloader struct {
	sync.Mutex
	notify     *sync.Cond
	src        object.ObjectStorage
	key        string
	fsize      int64
	blockSize  int64
	concurrent chan int
	buffers    map[int64]*[]byte
	off        int64
	err        error
}

func (r *parallelDownloader) hasErr() bool {
	r.Lock()
	defer r.Unlock()
	return r.err != nil
}

func (r *parallelDownloader) setErr(err error) {
	r.Lock()
	defer r.Unlock()
	r.err = err
}

const downloadBufSize = 10 << 20

var downloadBufPool = sync.Pool{
	New: func() interface{} {
		buf := make([]byte, 0, downloadBufSize)
		return &buf
	},
}

func (r *parallelDownloader) download() {
	for off := int64(0); off < r.fsize; off += r.blockSize {
		r.concurrent <- 1
		go func(off int64) {
			var size = r.blockSize
			if off+r.blockSize > r.fsize {
				size = r.fsize - off
			}
			var saved bool
			if !r.hasErr() {
				if limiter != nil {
					limiter.Wait(size)
				}
				var in io.ReadCloser
				e := try(3, func() error {
					var err error
					in, err = r.src.Get(context.Background(), r.key, off, size)
					return err
				})
				if e != nil {
					r.setErr(e)
				} else { //nolint:typecheck
					defer in.Close()
					p := downloadBufPool.Get().(*[]byte)
					*p = (*p)[:size]
					_, e = io.ReadFull(in, *p)
					if e != nil {
						r.setErr(e)
						downloadBufPool.Put(p)
					} else {
						r.Lock()
						if r.buffers != nil {
							r.buffers[off] = p
							saved = true
						} else {
							downloadBufPool.Put(p)
						}
						r.Unlock()
					}
				}
			}
			if !saved {
				<-r.concurrent
			}
			r.notify.Signal()
		}(off)
	}
}

func (r *parallelDownloader) Read(b []byte) (int, error) {
	if len(b) == 0 {
		return 0, nil
	}
	if r.off >= r.fsize {
		return 0, io.EOF
	}
	off := r.off / r.blockSize * r.blockSize
	r.Lock()
	for r.err == nil && r.buffers[off] == nil {
		r.notify.Wait()
	}
	p := r.buffers[off]
	r.Unlock()
	if p == nil {
		return 0, r.err
	}
	n := copy(b, (*p)[r.off-off:])
	r.off += int64(n)
	if r.off == off+int64(len(*p)) {
		downloadBufPool.Put(p)
		r.Lock()
		delete(r.buffers, off)
		r.Unlock()
		<-r.concurrent
	}
	if copiedBytes != nil {
		copiedBytes.IncrInt64(int64(n))
	}
	return n, nil
}

func (r *parallelDownloader) Close() {
	r.Lock()
	defer r.Unlock()
	for _, p := range r.buffers {
		downloadBufPool.Put(p)
	}
	r.buffers = nil
	if r.err == nil {
		r.err = errors.New("closed")
	}
}

func newParallelDownloader(store object.ObjectStorage, key string, size int64, bSize int64, concurrent chan int) *parallelDownloader {
	if bSize < 1 {
		panic("concurrent and blockSize must be positive integer")
	}
	down := &parallelDownloader{
		src:        store,
		key:        key,
		fsize:      size,
		blockSize:  bSize,
		concurrent: concurrent,
		buffers:    make(map[int64]*[]byte),
	}
	down.notify = sync.NewCond(down)
	go down.download()
	return down
}


================================================
FILE: pkg/sync/download_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sync

import (
	"bytes"
	"io"
	"os"
	"testing"

	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
)

func TestDownload(t *testing.T) {
	key := "testDownload"
	a, _ := object.CreateStorage("file", "/tmp/download/", "", "", "")
	t.Cleanup(func() {
		os.RemoveAll("/tmp/download/")
	})
	type config struct {
		concurrent int
		fsize      int64
	}
	type tcase struct {
		config
		tfunc func(t *testing.T, pr *parallelDownloader, content []byte)
	}

	tcases := []tcase{
		{config: config{fsize: downloadBufSize*3 + 100, concurrent: 4}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res, err := io.ReadAll(pr)
			if err != nil {
				t.Fatal(err)
			}
			if !bytes.Equal(res, content) {
				t.Fatalf("get wrong content by io.ReadAll")
			}
		}},

		{config: config{fsize: 97340326, concurrent: 4}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res, err := io.ReadAll(pr)
			if err != nil {
				t.Fatal(err)
			}
			if !bytes.Equal(res, content) {
				t.Fatalf("get wrong content by io.ReadAll")
			}
		}},

		{config: config{fsize: downloadBufSize*3 + 100, concurrent: 5}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res, err := io.ReadAll(pr)
			if err != nil {
				t.Fatal(err)
			}
			if !bytes.Equal(res, content) {
				t.Fatalf("get wrong content by io.ReadAll")
			}
		}},

		{config: config{fsize: 1, concurrent: 5}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res := make([]byte, 1)
			n, err := pr.Read(res)
			if err != nil || n != 1 || res[0] != content[0] {
				t.Fatalf("read 1 byte should succeed")
			}
			n, err = pr.Read(res)
			if err != io.EOF || n != 0 {
				t.Fatalf("err should be io.EOF or n should equal 0, but got %s %d", err, n)
			}
		}},

		{config: config{fsize: 2, concurrent: 5}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res := make([]byte, 1)
			n, err := pr.Read(res)
			if err != nil || n != 1 || res[0] != content[0] {
				t.Fatalf("read 1 byte should succeed")
			}
			n, err = pr.Read(res)
			if err != nil || n != 1 || res[0] != content[1] {
				t.Fatalf("read 1 byte should succeed")
			}
			n, err = pr.Read(res)
			if err != io.EOF || n != 0 {
				t.Fatalf("err should be io.EOF or n should equal 0, but got %s %d", err, n)
			}
		}},

		{config: config{fsize: 2, concurrent: 1}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res := make([]byte, 1)
			n, err := pr.Read(res)

			if err != nil || n != 1 || res[0] != content[0] {
				t.Fatalf("read 1 byte should succeed")
			}
			n, err = pr.Read(res)
			if err != nil || n != 1 || res[0] != content[1] {
				t.Fatalf("read 1 byte should succeed")
			}
			n, err = pr.Read(res)
			if err != io.EOF || n != 0 {
				t.Fatalf("err should be io.EOF or n should equal 0, but got %s %d", err, n)
			}
		}},

		{config: config{fsize: downloadBufSize * 20, concurrent: 3}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			resSize := 4 * downloadBufSize
			res := make([]byte, 4*downloadBufSize)
			n, err := io.ReadFull(pr, res)

			if err != nil || n != resSize || res[0] != content[0] {
				t.Fatalf("read %v byte should succeed, but got %d, %s", resSize, n, err)
			}
			n, err = io.ReadFull(pr, res)
			if err != nil || n != resSize || res[0] != content[resSize] {
				t.Fatalf("read %v byte should succeed, but got %d, %s", resSize, n, err)
			}
			_ = a.Delete(ctx, key)
			n, err = io.ReadFull(pr, res)
			n, err = io.ReadFull(pr, res)
			if !os.IsNotExist(err) {
				t.Fatalf("err should be ErrNotExist, but got %d, %s", n, err)
			}
		}},

		{config: config{fsize: 0, concurrent: 5}, tfunc: func(t *testing.T, pr *parallelDownloader, content []byte) {
			defer pr.Close()
			res := make([]byte, 1)
			n, err := pr.Read(res)
			if err != io.EOF || n != 0 {
				t.Fatalf("err should be io.EOF or n should equal 0, but got %s %d", err, n)
			}
		}},
	}

	for _, c := range tcases {
		content := make([]byte, c.config.fsize)
		utils.RandRead(content)
		_ = a.Put(ctx, key, bytes.NewReader(content))
		c.tfunc(t, newParallelDownloader(a, key, c.config.fsize, downloadBufSize, make(chan int, c.concurrent)), content)
	}

	downloader := newParallelDownloader(a, "notExist", 10*downloadBufSize, downloadBufSize, make(chan int, 5))
	res := make([]byte, 1)
	n, err := downloader.Read(res)
	if !os.IsNotExist(err) || n != 0 {
		t.Fatalf("err should be ErrNotExist or n should equal 0, but got %s %d", err, n)
	}
}


================================================
FILE: pkg/sync/sync.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package sync

import (
	"bufio"
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"hash/crc32"
	"io"
	"math"
	"net/http"
	"os"
	"path"
	"runtime"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"
	"unicode"

	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juju/ratelimit"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/vimeo/go-util/crc32combine"
)

// The max number of key per listing request
const (
	maxResults      = 1000
	defaultPartSize = 5 << 20
	bufferSize      = 32 << 10
	maxBlock        = defaultPartSize * 2
	markDeleteSrc   = -1
	markDeleteDst   = -2
	markCopyPerms   = -3
	markChecksum    = -4
)

var (
	handled                 *utils.Bar
	pending                 *utils.Bar
	copied, copiedBytes     *utils.Bar
	checked, checkedBytes   *utils.Bar
	skipped, skippedBytes   *utils.Bar
	excluded, excludedBytes *utils.Bar
	extra, extraBytes       *utils.Bar
	deleted, failed         *utils.Bar
	listedPrefix            *utils.Bar
	concurrent              chan int
	limiter                 *mixedLimiter
	totalHandled            atomic.Int64
)

type mixedLimiter struct {
	global *globalLimit
	local  *ratelimit.Bucket
}

func (l *mixedLimiter) Wait(count int64) {
	if l.local != nil {
		l.local.Wait(count)
	}
	if l.global != nil {
		l.global.wait(count)
	}
}

type globalLimit struct {
	sync.Mutex
	balance int64
	due     time.Time
	need    int64
	waiters []*sync.Cond

	address string
}
type req struct {
	// Positive numbers indicate a request, negative numbers indicate a payback.
	Bytes int64 `json:"bytes"`
}

type resp struct {
	Granted int64 `json:"granted"` // bytes
	Expired int64 `json:"expired"` // Millisecond
}

func (l *globalLimit) request(ask int64) (int64, int64, error) {
	r := req{Bytes: ask}
	data, err := json.Marshal(r)
	if err != nil {
		return 0, 0, err
	}
	result, err := http.Post(l.address, "application/json", bytes.NewReader(data))
	if err != nil || result.StatusCode != http.StatusOK {
		var status string
		if result != nil {
			status = http.StatusText(result.StatusCode)
		}
		logger.Errorf("request traffic control %s failed: %s, http status: %s", l.address, err, status)
		return 0, 0, err
	}
	defer result.Body.Close()
	content, err := io.ReadAll(result.Body)
	if err != nil {
		return 0, 0, err
	}
	res := resp{}
	if err := json.Unmarshal(content, &res); err != nil {
		return 0, 0, err
	}
	return res.Granted, res.Expired, nil
}

func (l *globalLimit) wait(bytes int64) {
	l.Lock()
	defer l.Unlock()
	if bytes <= 0 || l.balance >= bytes && len(l.waiters) == 0 {
		l.balance -= bytes
		return
	}
	l.need += bytes

	var me = sync.NewCond(l)
	l.waiters = append(l.waiters, me)
	for l.waiters[0] != me {
		me.Wait()
	}

	if l.balance < bytes {
		// request credit for other waiters together
		ask := l.need - l.balance
		if ask >= bytes*10 {
			// don't wait for too long
			ask = bytes * 10
		}
		l.Unlock()
		granted, expire, err := l.request(ask)
		l.Lock()
		if err == nil {
			l.balance += granted
			l.due = time.Now().Add(time.Millisecond * time.Duration(expire))
			logger.Debugf("grant %d from %s until %s", granted, l.address, l.due)
		}
	}

	l.balance -= bytes
	l.need -= bytes
	l.waiters = l.waiters[1:]
	if len(l.waiters) > 0 {
		l.waiters[0].Signal()
	}
}

func (l *globalLimit) checkBalance() {
	now := time.Now()
	l.Lock()
	if l.balance > 0 && l.need == 0 && l.due.Before(now) {
		payback := l.balance
		if payback > 1<<30 {
			payback = 1 << 30
		}
		l.balance -= payback
		l.Unlock()
		_, _, _ = l.request(-payback)
	} else {
		l.Unlock()
	}
}

var crcTable = crc32.MakeTable(crc32.Castagnoli)
var logger = utils.GetLogger("juicefs")
var ctx = context.Background()

func incrTotal(n int64) {
	totalHandled.Add(n)
}

func incrHandled(n int) {
	old := totalHandled.Swap(0)
	handled.IncrTotal(old)
	handled.IncrBy(n)
}

type chksumReader struct {
	io.Reader
	chksum uint32
	cal    bool
}

func (r *chksumReader) Read(p []byte) (n int, err error) {
	n, err = r.Reader.Read(p)
	if r.cal {
		r.chksum = crc32.Update(r.chksum, crcTable, p[:n])
	}
	return
}

type chksumWithSz struct {
	chksum uint32
	size   int64
}

// human readable bytes size
func formatSize(bytes int64) string {
	units := [7]string{" ", "K", "M", "G", "T", "P", "E"}
	if bytes < 1024 {
		return fmt.Sprintf("%v B", bytes)
	}
	z := 0
	v := float64(bytes)
	for v > 1024.0 {
		z++
		v /= 1024.0
	}
	return fmt.Sprintf("%.2f %siB", v, units[z])
}

// ListAll on all the keys that starts at marker from object storage.
func ListAll(store object.ObjectStorage, prefix, start, end string, followLink bool) (<-chan object.Object, error) {
	startTime := time.Now()
	logger.Debugf("Iterating objects from %s with prefix %s start %q", store, prefix, start)

	out := make(chan object.Object, maxResults*10)

	// As the result of object storage's List method doesn't include the marker key,
	// we try List the marker key separately.
	if start != "" && strings.HasPrefix(start, prefix) {
		if obj, err := store.Head(ctx, start); err == nil {
			logger.Debugf("Found start key: %s from %s in %s", start, store, time.Since(startTime))
			out <- obj
		}
	}

	if ch, err := store.ListAll(ctx, prefix, start, followLink); err == nil {
		go func() {
			for obj := range ch {
				if obj != nil && end != "" && obj.Key() > end {
					break
				}
				out <- obj
			}
			close(out)
		}()
		return out, nil
	} else if !errors.Is(err, utils.ENOTSUP) {
		return nil, err
	}

	marker := start
	logger.Debugf("Listing objects from %s marker %q", store, marker)

	objs, hasMore, nextToken, err := store.List(ctx, prefix, marker, "", "", maxResults, followLink)
	if errors.Is(err, utils.ENOTSUP) {
		return object.ListAllWithDelimiter(ctx, store, prefix, start, end, followLink)
	}
	if err != nil {
		logger.Errorf("Can't list %s: %s", store, err.Error())
		return nil, err
	}
	logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime))
	go func() {
		lastkey := ""
		first := true
	END:
		for {
			for _, obj := range objs {
				key := obj.Key()
				if !first && key <= lastkey {
					logger.Errorf("The keys are out of order: marker %q, last %q current %q", marker, lastkey, key)
					out <- nil
					break END
				}
				if end != "" && key > end {
					break END
				}
				lastkey = key
				// logger.Debugf("key: %s", key)
				out <- obj
				first = false
			}
			if !hasMore {
				break
			}

			marker = lastkey
			startTime = time.Now()
			logger.Debugf("Continue listing objects from %s marker %q", store, marker)
			var nextToken2 string
			objs, hasMore, nextToken2, err = store.List(ctx, prefix, marker, nextToken, "", maxResults, followLink)
			count := 0
			for err != nil && count < 3 {
				logger.Warnf("Fail to list: %s, retry again", err.Error())
				// slow down
				time.Sleep(time.Millisecond * 100)

				objs, hasMore, nextToken2, err = store.List(ctx, prefix, marker, nextToken, "", maxResults, followLink)
				count++
			}
			logger.Debugf("Found %d object from %s in %s", len(objs), store, time.Since(startTime))
			if err != nil {
				// Telling that the listing has failed
				out <- nil
				logger.Errorf("Fail to list after %s: %s", marker, err.Error())
				break
			}
			nextToken = nextToken2
			if len(objs) > 0 && objs[0].Key() == marker {
				// workaround from a object store that is not compatible to S3.
				objs = objs[1:]
			}
		}
		close(out)
	}()
	return out, nil
}

var bufPool = sync.Pool{
	New: func() interface{} {
		buf := make([]byte, bufferSize)
		return &buf
	},
}

func shouldRetry(err error) bool {
	if err == nil || errors.Is(err, utils.ErrSkipped) || errors.Is(err, utils.ErrExtlink) {
		return false
	}

	var eno syscall.Errno
	if errors.As(err, &eno) {
		switch eno {
		case syscall.EAGAIN, syscall.EINTR, syscall.EBUSY, syscall.ETIMEDOUT, syscall.EIO:
			return true
		default:
			return false
		}
	}
	return true
}

func try(n int, f func() error) (err error) {
	for i := 0; i < n; i++ {
		err = f()
		if !shouldRetry(err) {
			return
		}
		logger.Debugf("Try %d failed: %s", i+1, err)
		time.Sleep(time.Second * time.Duration(i*i))
	}
	return
}

func deleteObj(storage object.ObjectStorage, key string, dry bool) {
	if dry {
		logger.Debugf("Will delete %s from %s", key, storage)
		deleted.Increment()
		return
	}
	start := time.Now()
	if err := try(3, func() error { return storage.Delete(ctx, key) }); err == nil {
		deleted.Increment()
		logger.Debugf("Deleted %s from %s in %s", key, storage, time.Since(start))
	} else {
		failed.Increment()
		logger.Errorf("Failed to delete %s from %s in %s: %s", key, storage, time.Since(start), err)
	}
}

func needCopyPerms(o1, o2 object.Object) bool {
	f1 := o1.(object.File)
	f2 := o2.(object.File)
	return f2.Mode() != f1.Mode() || f2.Owner() != f1.Owner() || f2.Group() != f1.Group()
}

func copyPerms(dst object.ObjectStorage, obj object.Object, config *Config) {
	start := time.Now()
	key := obj.Key()
	fi := obj.(object.File)
	if !fi.IsSymlink() || !config.Links {
		// chmod needs to be executed after chown, because chown will change setuid setgid to be invalid.
		if err := dst.(object.FileSystem).Chown(key, fi.Owner(), fi.Group()); err != nil {
			logger.Warnf("Chown %s to (%s,%s): %s", key, fi.Owner(), fi.Group(), err)
		}
		if err := dst.(object.FileSystem).Chmod(key, fi.Mode()); err != nil {
			logger.Warnf("Chmod %s to %o: %s", key, fi.Mode(), err)
		}
	}
	logger.Debugf("Copied permissions (%s:%s:%s) for %s in %s", fi.Owner(), fi.Group(), fi.Mode(), key, time.Since(start))
}

func calPartChksum(objStor object.ObjectStorage, key string, abort chan struct{}, offset, length int64) (uint32, error) {
	if limiter != nil {
		limiter.Wait(length)
	}
	select {
	case <-abort:
		return 0, fmt.Errorf("aborted")
	case concurrent <- 1:
		defer func() {
			<-concurrent
		}()
	}
	in, err := objStor.Get(ctx, key, offset, length)
	if err != nil {
		return 0, fmt.Errorf("dest get: %s", err)
	}
	defer in.Close()

	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	var chksum uint32
	for left := int(length); left > 0; left -= bufferSize {
		bs := bufferSize
		if left < bufferSize {
			bs = left
		}
		*buf = (*buf)[:bs]
		if _, err = io.ReadFull(in, *buf); err != nil {
			return 0, fmt.Errorf("dest read: %s", err)
		}
		chksum = crc32.Update(chksum, crcTable, *buf)
	}
	return chksum, nil
}

func calObjChksum(objStor object.ObjectStorage, key string, abort chan struct{}, obj object.Object) (uint32, error) {
	var err error
	var chksum uint32
	if obj.Size() < maxBlock {
		return calPartChksum(objStor, key, abort, 0, obj.Size())
	}
	n := int((obj.Size()-1)/defaultPartSize) + 1
	errs := make(chan error, n)
	chksums := make([]chksumWithSz, n)
	for i := 0; i < n; i++ {
		go func(num int) {
			sz := int64(defaultPartSize)
			if num == n-1 {
				sz = obj.Size() - int64(num)*defaultPartSize
			}
			chksum, err := calPartChksum(objStor, key, abort, int64(num)*defaultPartSize, sz)
			chksums[num] = chksumWithSz{chksum, sz}
			errs <- err
		}(i)
	}
	for i := 0; i < n; i++ {
		if err = <-errs; err != nil {
			close(abort)
			break
		}
	}
	if err != nil {
		return 0, err
	}
	chksum = chksums[0].chksum
	for i := 1; i < n; i++ {
		chksum = crc32combine.CRC32Combine(crc32.Castagnoli, chksum, chksums[i].chksum, chksums[i].size)
	}
	return chksum, nil
}

func compObjPartBinary(src, dst object.ObjectStorage, key string, abort chan struct{}, offset, length int64) error {
	if limiter != nil {
		limiter.Wait(length)
	}
	select {
	case <-abort:
		return fmt.Errorf("aborted")
	case concurrent <- 1:
		defer func() {
			<-concurrent
		}()
	}
	in, err := src.Get(ctx, key, offset, length)
	if err != nil {
		return fmt.Errorf("src get: %s", err)
	}
	defer in.Close()
	in2, err := dst.Get(ctx, key, offset, length)
	if err != nil {
		return fmt.Errorf("dest get: %s", err)
	}
	defer in2.Close()

	buf := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf)
	buf2 := bufPool.Get().(*[]byte)
	defer bufPool.Put(buf2)
	for left := int(length); left > 0; left -= bufferSize {
		bs := bufferSize
		if left < bufferSize {
			bs = left
		}
		*buf = (*buf)[:bs]
		*buf2 = (*buf2)[:bs]
		if _, err = io.ReadFull(in, *buf); err != nil {
			return fmt.Errorf("src read: %s", err)
		}
		if _, err = io.ReadFull(in2, *buf2); err != nil {
			return fmt.Errorf("dest read: %s", err)
		}
		if !bytes.Equal(*buf, *buf2) {
			return fmt.Errorf("bytes not equal")
		}
	}
	return nil
}

func compObjBinary(src, dst object.ObjectStorage, key string, abort chan struct{}, obj object.Object) (bool, error) {
	var err error
	if obj.Size() < maxBlock {
		err = compObjPartBinary(src, dst, key, abort, 0, obj.Size())
	} else {
		n := int((obj.Size()-1)/defaultPartSize) + 1
		errs := make(chan error, n)
		for i := 0; i < n; i++ {
			go func(num int) {
				sz := int64(defaultPartSize)
				if num == n-1 {
					sz = obj.Size() - int64(num)*defaultPartSize
				}
				errs <- compObjPartBinary(src, dst, key, abort, int64(num)*defaultPartSize, sz)
			}(i)
		}
		for i := 0; i < n; i++ {
			if err = <-errs; err != nil {
				close(abort)
				break
			}
		}
	}
	equal := false
	if err != nil && err.Error() == "bytes not equal" {
		err = nil
	} else {
		equal = err == nil
	}
	return equal, err
}

func doCheckSum(src, dst object.ObjectStorage, key string, srcChksumPtr *uint32, obj object.Object, config *Config, equal *bool) error {
	if obj.IsSymlink() && config.Links && (config.CheckAll || config.CheckNew) {
		var srcLink, dstLink string
		var err error
		if s, ok := src.(object.SupportSymlink); ok {
			if srcLink, err = s.Readlink(key); err != nil {
				return err
			}
		}
		if s, ok := dst.(object.SupportSymlink); ok {
			if dstLink, err = s.Readlink(key); err != nil {
				return err
			}
		}
		*equal = srcLink == dstLink && srcLink != "" && dstLink != ""
		return nil
	}
	abort := make(chan struct{})
	var err error
	if srcChksumPtr != nil {
		var srcChksum uint32
		var dstChksum uint32
		srcChksum = *srcChksumPtr
		dstChksum, err = calObjChksum(dst, key, abort, obj)
		if err == nil {
			*equal = srcChksum == dstChksum
		} else {
			*equal = false
		}
	} else {
		*equal, err = compObjBinary(src, dst, key, abort, obj)
	}
	return err
}

func checkSum(src, dst object.ObjectStorage, key string, srcChksum *uint32, obj object.Object, config *Config) (bool, error) {
	start := time.Now()
	var equal bool
	err := try(3, func() error { return doCheckSum(src, dst, key, srcChksum, obj, config, &equal) })
	if err == nil {
		checked.Increment()
		checkedBytes.IncrInt64(obj.Size())
		if equal {
			logger.Debugf("Checked %s OK (and equal) in %s,", key, time.Since(start))
		} else {
			logger.Warnf("Checked %s OK (but NOT equal) in %s,", key, time.Since(start))
		}
	} else {
		logger.Errorf("Failed to check %s in %s: %s", key, time.Since(start), err)
	}
	return equal, err
}

var fastStreamRead = map[string]struct{}{"file": {}, "hdfs": {}, "jfs": {}, "gluster": {}}
var streamWrite = map[string]struct{}{"file": {}, "hdfs": {}, "sftp": {}, "gs": {}, "wasb": {}, "ceph": {}, "swift": {}, "webdav": {}, "jfs": {}, "gluster": {}}
var readInMem = map[string]struct{}{"mem": {}, "etcd": {}, "redis": {}, "tikv": {}, "mysql": {}, "postgres": {}, "sqlite3": {}}

func inMap(obj object.ObjectStorage, m map[string]struct{}) bool {
	_, ok := m[strings.Split(obj.String(), "://")[0]]
	return ok
}

func doCopySingle(src, dst object.ObjectStorage, key string, size int64, calChksum bool) (uint32, error) {
	if size > maxBlock && !inMap(dst, readInMem) && !inMap(src, fastStreamRead) {
		var err error
		var in io.Reader
		downer := newParallelDownloader(src, key, size, downloadBufSize, concurrent)
		defer downer.Close()
		if inMap(dst, streamWrite) {
			in = downer
		} else {
			var f *os.File
			// download the object into disk
			if f, err = os.CreateTemp("", "rep"); err != nil {
				logger.Warnf("create temp file: %s", err)
				return doCopySingle0(src, dst, key, size, calChksum)
			}
			_ = os.Remove(f.Name()) // will be deleted after Close()
			defer f.Close()
			buf := bufPool.Get().(*[]byte)
			defer bufPool.Put(buf)
			// hide f.ReadFrom to avoid discarding buf
			if _, err = io.CopyBuffer(struct{ io.Writer }{f}, downer, *buf); err == nil {
				_, err = f.Seek(0, 0)
				in = f
			}
		}
		r := &chksumReader{in, 0, calChksum}
		if err == nil {
			err = dst.Put(ctx, key, r)
		}
		if err != nil {
			if _, e := src.Head(ctx, key); os.IsNotExist(e) {
				logger.Debugf("Head src %s: %s", key, err)
				err = utils.ErrSkipped
			}
		}
		return r.chksum, err
	}
	return doCopySingle0(src, dst, key, size, calChksum)
}

func doCopySingle0(src, dst object.ObjectStorage, key string, size int64, calChksum bool) (uint32, error) {
	concurrent <- 1
	defer func() {
		<-concurrent
	}()
	var in io.ReadCloser
	var err error
	if size == 0 {
		if key == "" && !object.IsFileSystem(dst) {
			ps := strings.SplitN(dst.String(), "/", 4)
			if len(ps) == 4 && ps[3] == "" {
				logger.Warnf("empty key is not support by %s, ignore it", dst)
				return 0, nil
			}
		}
		if object.IsFileSystem(src) {
			// for check permissions
			r, err := src.Get(ctx, key, 0, -1)
			if err != nil {
				return 0, err
			}
			_ = r.Close()
		}
		in = io.NopCloser(bytes.NewReader(nil))
	} else {
		in, err = src.Get(ctx, key, 0, size)
		if err != nil {
			if _, e := src.Head(ctx, key); os.IsNotExist(e) {
				logger.Debugf("Head src %s: %s", key, err)
				err = utils.ErrSkipped
			}
			return 0, err
		}
	}
	r := &chksumReader{in, 0, calChksum}
	defer in.Close()
	err = dst.Put(ctx, key, &withProgress{r})
	return r.chksum, err
}

type withProgress struct {
	r io.Reader
}

func (w *withProgress) Read(b []byte) (int, error) {
	if limiter != nil {
		limiter.Wait(int64(len(b)))
	}
	n, err := w.r.Read(b)
	copiedBytes.IncrInt64(int64(n))
	return n, err
}

func dynAlloc(size int) []byte {
	zeros := utils.PowerOf2(size)
	b := *dynPools[zeros].Get().(*[]byte)
	if cap(b) < size {
		panic(fmt.Sprintf("%d < %d", cap(b), size))
	}
	return b[:size]
}

func dynFree(b []byte) {
	dynPools[utils.PowerOf2(cap(b))].Put(&b)
}

var dynPools []*sync.Pool

func init() {
	dynPools = make([]*sync.Pool, 33) // 1 - 8G
	for i := 0; i < 33; i++ {
		func(bits int) {
			dynPools[i] = &sync.Pool{
				New: func() interface{} {
					b := make([]byte, 1<<bits)
					return &b
				},
			}
		}(i)
	}
}

func doUploadPart(src, dst object.ObjectStorage, srckey string, off, size int64, key, uploadID string, num int, calChksum bool) (*object.Part, uint32, error) {
	if limiter != nil {
		limiter.Wait(size)
	}
	start := time.Now()
	sz := size
	data := dynAlloc(int(size))
	defer dynFree(data)
	var part *object.Part
	var chksum uint32
	err := try(3, func() error {
		in, err := src.Get(ctx, srckey, off, sz)
		if err != nil {
			return err
		}
		defer in.Close()
		r := &chksumReader{in, 0, calChksum}
		if _, err = io.ReadFull(r, data); err != nil {
			return err
		}
		chksum = r.chksum
		// PartNumber starts from 1
		part, err = dst.UploadPart(ctx, key, uploadID, num+1, data)
		return err
	})
	if err != nil {
		logger.Warnf("Failed to copy data of %s part %d: %s", key, num, err)
		return nil, 0, fmt.Errorf("part %d: %s", num, err)
	}
	logger.Debugf("Copied data of %s part %d in %s", key, num, time.Since(start))
	copiedBytes.IncrInt64(sz)
	return part, chksum, nil
}

func choosePartSize(upload *object.MultipartUpload, size int64) int64 {
	partSize := int64(upload.MinPartSize)
	if partSize == 0 {
		partSize = defaultPartSize
	}
	if size > partSize*int64(upload.MaxCount) {
		partSize = size / int64(upload.MaxCount)
		partSize = ((partSize-1)>>20 + 1) << 20 // align to MB
	}
	return partSize
}

func doCopyRange(src, dst object.ObjectStorage, key string, off, size int64, upload *object.MultipartUpload, num int, abort chan struct{}, calChksum bool) (*object.Part, uint32, error) {
	select {
	case <-abort:
		return nil, 0, fmt.Errorf("aborted")
	case concurrent <- 1:
		defer func() {
			<-concurrent
		}()
	}

	limits := dst.Limits()
	if size <= 32<<20 || !limits.IsSupportUploadPartCopy {
		return doUploadPart(src, dst, key, off, size, key, upload.UploadID, num, calChksum)
	}

	tmpkey := fmt.Sprintf("%s.part%d", key, num)
	var up *object.MultipartUpload
	var err error
	err = try(3, func() error {
		up, err = dst.CreateMultipartUpload(ctx, tmpkey)
		return err
	})
	if err != nil {
		return nil, 0, fmt.Errorf("range(%d,%d): %s", off, size, err)
	}

	partSize := choosePartSize(up, size)
	n := int((size-1)/partSize) + 1
	logger.Debugf("Copying data of %s (range: %d,%d) as %d parts (size: %d): %s", key, off, size, n, partSize, up.UploadID)
	parts := make([]*object.Part, n)
	var tmpChksum uint32
	first := true

	for i := 0; i < n; i++ {
		sz := partSize
		if i == n-1 {
			sz = size - int64(i)*partSize
		}
		select {
		case <-abort:
			dst.AbortUpload(ctx, tmpkey, up.UploadID)
			return nil, 0, fmt.Errorf("aborted")
		default:
		}
		var chksum uint32
		parts[i], chksum, err = doUploadPart(src, dst, key, off+int64(i)*partSize, sz, tmpkey, up.UploadID, i, calChksum)
		if err != nil {
			dst.AbortUpload(ctx, tmpkey, up.UploadID)
			return nil, 0, fmt.Errorf("range(%d,%d): %s", off, size, err)
		}
		if calChksum {
			if first {
				tmpChksum = chksum
				first = false
			} else {
				tmpChksum = crc32combine.CRC32Combine(crc32.Castagnoli, tmpChksum, chksum, sz)
			}
		}
	}

	err = try(3, func() error { return dst.CompleteUpload(ctx, tmpkey, up.UploadID, parts) })
	if err != nil {
		dst.AbortUpload(ctx, tmpkey, up.UploadID)
		return nil, 0, fmt.Errorf("multipart: %s", err)
	}
	var part *object.Part
	err = try(3, func() error {
		part, err = dst.UploadPartCopy(ctx, key, upload.UploadID, num+1, tmpkey, 0, size)
		return err
	})
	_ = dst.Delete(ctx, tmpkey)
	return part, tmpChksum, err
}

func doCopyMultiple(src, dst object.ObjectStorage, key string, size int64, upload *object.MultipartUpload, calChksum bool) (uint32, error) {
	limits := dst.Limits()
	if size > limits.MaxPartSize*int64(upload.MaxCount) {
		return 0, fmt.Errorf("object size %d is too large to copy", size)
	}

	partSize := choosePartSize(upload, size)
	n := int((size-1)/partSize) + 1
	logger.Debugf("Copying data of %s as %d parts (size: %d): %s", key, n, partSize, upload.UploadID)
	abort := make(chan struct{})
	parts := make([]*object.Part, n)
	errs := make(chan error, n)
	chksums := make([]chksumWithSz, n)
	var err error

	for i := 0; i < n; i++ {
		go func(num int) {
			sz := partSize
			if num == n-1 {
				sz = size - int64(num)*partSize
			}
			var copyErr error
			var chksum uint32
			parts[num], chksum, copyErr = doCopyRange(src, dst, key, int64(num)*partSize, sz, upload, num, abort, calChksum)
			chksums[num] = chksumWithSz{chksum, sz}
			errs <- copyErr
		}(i)
	}

	for i := 0; i < n; i++ {
		if err = <-errs; err != nil {
			close(abort)
			break
		}
	}
	if err == nil {
		err = try(3, func() error { return dst.CompleteUpload(ctx, key, upload.UploadID, parts) })
	}
	if err != nil {
		dst.AbortUpload(ctx, key, upload.UploadID)
		return 0, fmt.Errorf("multipart: %s", err)
	}
	var chksum uint32
	if calChksum {
		chksum = chksums[0].chksum
		for i := 1; i < n; i++ {
			chksum = crc32combine.CRC32Combine(crc32.Castagnoli, chksum, chksums[i].chksum, chksums[i].size)
		}
	}

	return chksum, nil
}

func InitForCopyData() {
	concurrent = make(chan int, 10)
	progress := utils.NewProgress(true)
	copied = progress.AddCountSpinner("Copied objects")
	copiedBytes = progress.AddByteSpinner("Copied bytes")
}

func CopyData(src, dst object.ObjectStorage, key string, size int64, calChksum bool) (uint32, error) {
	start := time.Now()
	var err error
	var srcChksum uint32
	if size < maxBlock {
		err = try(3, func() (err error) {
			srcChksum, err = doCopySingle(src, dst, key, size, calChksum)
			return
		})
	} else {
		var upload *object.MultipartUpload
		if upload, err = dst.CreateMultipartUpload(ctx, key); err == nil {
			srcChksum, err = doCopyMultiple(src, dst, key, size, upload, calChksum)
		} else if err == utils.ENOTSUP {
			err = try(3, func() (err error) {
				srcChksum, err = doCopySingle(src, dst, key, size, calChksum)
				return
			})
		} else { // other error retry
			if err = try(2, func() error {
				upload, err = dst.CreateMultipartUpload(ctx, key)
				return err
			}); err == nil {
				srcChksum, err = doCopyMultiple(src, dst, key, size, upload, calChksum)
			}
		}
	}
	if err == nil {
		logger.Debugf("Copied data of %s (%d bytes) in %s", key, size, time.Since(start))
	} else {
		logger.Errorf("Failed to copy data of %s in %s: %s", key, time.Since(start), err)
	}
	return srcChksum, err
}

type holder struct {
	done chan struct{}
}

var muHolder sync.Mutex
var holders []*holder

func fetchTask(tasks chan object.Object) (t object.Object, done func()) {
	muHolder.Lock()
	defer muHolder.Unlock()
	if len(holders) > 0 {
		h := holders[len(holders)-1]
		holders = holders[:len(holders)-1]
		muHolder.Unlock()
		<-h.done
		muHolder.Lock()
	}
	if t = <-tasks; t == nil {
		return nil, func() {}
	}
	size := t.Size()
	if size == markChecksum {
		size = withoutSize(t).Size()
	}
	if size >= maxBlock*2 {
		done := make(chan struct{})
		h := &holder{done: done}
		n := min(int(size)/maxBlock, 20)
		for i := 1; i < n; i++ {
			holders = append(holders, h)
		}
		return t, func() { close(done) }
	} else {
		return t, func() {}
	}
}

func worker(tasks chan object.Object, src, dst object.ObjectStorage, config *Config) {
	for {
		obj, done := fetchTask(tasks)
		if obj == nil {
			break
		}
		key := obj.Key()
		switch obj.Size() {
		case markDeleteSrc:
			deleteObj(src, key, config.Dry)
		case markDeleteDst:
			deleteObj(dst, key, config.Dry)
		case markCopyPerms:
			if config.Dry {
				logger.Debugf("Will copy permissions for %s", key)
			} else {
				copyPerms(dst, withoutSize(obj), config)
			}
			copied.Increment()
		case markChecksum:
			if config.Dry {
				logger.Debugf("Will compare checksum for %s", key)
				checked.Increment()
				break
			}
			obj = withoutSize(obj)
			if equal, err := checkSum(src, dst, key, nil, obj, config); err != nil {
				failed.Increment()
				break
			} else if equal {
				if config.DeleteSrc {
					if obj.IsDir() {
						srcDelayDelMu.Lock()
						srcDelayDel = append(srcDelayDel, key)
						srcDelayDelMu.Unlock()
					} else {
						deleteObj(src, key, false)
					}
				} else if config.Perms && (!obj.IsSymlink() || !config.Links) {
					if o, e := dst.Head(ctx, key); e == nil {
						if needCopyPerms(obj, o) {
							copyPerms(dst, obj, config)
							copied.Increment()
						} else {
							skipped.Increment()
							skippedBytes.IncrInt64(obj.Size())
						}
					} else {
						logger.Warnf("Failed to head object %s: %s", key, e)
						failed.Increment()
					}
				} else {
					skipped.Increment()
					skippedBytes.IncrInt64(obj.Size())
				}
				break
			}
			// checkSum not equal, copy the object
			fallthrough
		default:
			if config.Dry {
				logger.Debugf("Will copy %s (%d bytes)", obj.Key(), obj.Size())
				copied.Increment()
				copiedBytes.IncrInt64(obj.Size())
				break
			}
			var err error
			var srcChksum uint32

			if config.Links && obj.IsSymlink() {
				if err = copyLink(src, dst, key); err != nil {
					logger.Errorf("copy link failed: %s", err)
				}
			} else {
				srcChksum, err = CopyData(src, dst, key, obj.Size(), config.CheckAll || config.CheckNew)
			}
			if errors.Is(err, utils.ErrExtlink) {
				logger.Warnf("Skip external link %s: %s", key, err)
				err = utils.ErrSkipped
			}

			if err == nil && config.CheckChange {
				err = checkChange(src, dst, obj, key, config)
			}

			if err == nil && (config.CheckAll || config.CheckNew) {
				var equal bool
				if equal, err = checkSum(src, dst, key, &srcChksum, obj, config); err == nil && !equal {
					err = fmt.Errorf("checksums of copied object %s don't match", key)
				}
			}
			if err == nil {
				if mc, ok := dst.(object.MtimeChanger); ok {
					if err = mc.Chtimes(obj.Key(), obj.Mtime()); err != nil && !errors.Is(err, utils.ENOTSUP) {
						logger.Warnf("Update mtime of %s: %s", key, err)
					}
				}
				if config.Perms {
					copyPerms(dst, obj, config)
				}
				copied.Increment()
			} else if errors.Is(err, utils.ErrSkipped) {
				skipped.Increment()
			} else {
				failed.Increment()
				logger.Errorf("Failed to copy object %s: %s", key, err)
			}
		}
		incrHandled(1)
		done()
	}
}

func checkChange(src, dst object.ObjectStorage, obj object.Object, key string, config *Config) error {
	if obj == nil || config.Links && obj.IsSymlink() {
		return nil // ignore symlink
	}
	if cur, err := src.Head(ctx, key); err == nil {
		if !config.CheckAll && !config.CheckNew {
			checked.Increment()
			checkedBytes.IncrInt64(obj.Size())
		}
		equal := cur.Size() == obj.Size()
		if equal && !cur.Mtime().Equal(obj.Mtime()) {
			// Head of an object may not return the millisecond part of mtime as List
			equal = cur.Mtime().Unix() == obj.Mtime().Unix() && cur.Mtime().UnixMilli()%1000 == 0
		}
		if !equal {
			return fmt.Errorf("%s changed during sync. Original: size=%d, mtime=%s; Current: size=%d, mtime=%s",
				cur.Key(), obj.Size(), obj.Mtime(), cur.Size(), cur.Mtime())
		}
		if dstObj, err := dst.Head(ctx, key); err == nil {
			if cur.Size() != dstObj.Size() {
				return fmt.Errorf("copied %s size mismatch: original=%d, current=%d", key, obj.Size(), dstObj.Size())
			}
			return nil
		} else {
			return fmt.Errorf("check %s in %s: %s", key, dst, err)
		}
	} else if errors.Is(err, os.ErrNotExist) {
		return fmt.Errorf("object %s was removed during sync", key)
	} else {
		return fmt.Errorf("check %s in %s: %s", key, src, err)
	}
}

func copyLink(src object.ObjectStorage, dst object.ObjectStorage, key string) error {
	if p, err := src.(object.SupportSymlink).Readlink(key); err != nil {
		return err
	} else {
		if err := dst.Delete(ctx, key); err != nil {
			logger.Debugf("Deleted %s from %s ", key, dst)
			return err
		}
		// TODO: use relative path based on option
		return dst.(object.SupportSymlink).Symlink(p, key)
	}
}

type objWithSize struct {
	object.Object
	nsize int64
}

func (o *objWithSize) Size() int64 {
	return o.nsize
}

type fileWithSize struct {
	object.File
	nsize int64
}

func (o *fileWithSize) Size() int64 {
	return o.nsize
}

func withSize(o object.Object, nsize int64) object.Object {
	if f, ok := o.(object.File); ok {
		return &fileWithSize{f, nsize}
	}
	return &objWithSize{o, nsize}
}

func withoutSize(o object.Object) object.Object {
	switch w := o.(type) {
	case *objWithSize:
		return w.Object
	case *fileWithSize:
		return w.File
	}
	return o
}

var dstDelayDelMu sync.Mutex
var dstDelayDel []string
var srcDelayDelMu sync.Mutex
var srcDelayDel []string

func handleExtraObject(tasks chan<- object.Object, dstobj object.Object, config *Config) bool {
	incrTotal(1)
	if !config.DeleteDst || !config.Dirs && dstobj.IsDir() || config.Limit == 0 {
		logger.Debug("Ignore extra object", dstobj.Key())
		extra.Increment()
		extraBytes.IncrInt64(dstobj.Size())
		return false
	}
	config.Limit--
	if dstobj.IsDir() {
		dstDelayDelMu.Lock()
		dstDelayDel = append(dstDelayDel, dstobj.Key())
		dstDelayDelMu.Unlock()
	} else {
		tasks <- withSize(dstobj, markDeleteDst)
	}
	return config.Limit == 0
}

func startSingleProducer(tasks chan<- object.Object, src, dst object.ObjectStorage, prefix string, config *Config) error {
	start, end := config.Start, config.End
	logger.Debugf("maxResults: %d, defaultPartSize: %d, maxBlock: %d", maxResults, defaultPartSize, maxBlock)

	srckeys, err := ListAll(src, prefix, start, end, !config.Links)
	if err != nil {
		return fmt.Errorf("list %s: %s", src, err)
	}

	var dstkeys <-chan object.Object
	if config.ForceUpdate {
		t := make(chan object.Object)
		close(t)
		dstkeys = t
	} else {
		dstkeys, err = ListAll(dst, prefix, start, end, !config.Links)
		if err != nil {
			return fmt.Errorf("list %s: %s", dst, err)
		}
	}
	return produce(tasks, srckeys, dstkeys, config)
}

func produce(tasks chan<- object.Object, srckeys, dstkeys <-chan object.Object, config *Config) error {
	srckeys = filter(srckeys, config.rules, config)
	dstkeys = filter(dstkeys, config.rules, config)
	var dstobj object.Object
	var (
		skip, skipBytes int64
		lastUpdate      time.Time
	)
	flushProgress := func() {
		skipped.IncrInt64(skip)
		skippedBytes.IncrInt64(skipBytes)
		incrHandled(int(skip))
		skip, skipBytes = 0, 0
	}
	defer flushProgress()
	skipIt := func(obj object.Object) {
		skip++
		skipBytes += obj.Size()
		if skip > 100 || time.Since(lastUpdate) > time.Millisecond*100 {
			lastUpdate = time.Now()
			flushProgress()
		}
	}
	for obj := range srckeys {
		if obj == nil {
			return fmt.Errorf("listing failed, stop syncing, waiting for pending ones")
		}
		if !config.Dirs && obj.IsDir() {
			logger.Debug("Ignore directory ", obj.Key())
			continue
		}
		if config.Limit >= 0 {
			if config.Limit == 0 {
				return nil
			}
			config.Limit--
		}
		incrTotal(1)

		if dstobj != nil && obj.Key() > dstobj.Key() {
			if handleExtraObject(tasks, dstobj, config) {
				return nil
			}
			dstobj = nil
		}
		if dstobj == nil {
			for dstobj = range dstkeys {
				if dstobj == nil {
					return fmt.Errorf("listing failed, stop syncing, waiting for pending ones")
				}
				if obj.Key() <= dstobj.Key() {
					break
				}
				if handleExtraObject(tasks, dstobj, config) {
					return nil
				}
				dstobj = nil
			}
		}

		// FIXME: there is a race when source is modified during coping
		if dstobj == nil || obj.Key() < dstobj.Key() {
			if config.Existing {
				skipIt(obj)
				continue
			}
			tasks <- obj
		} else { // obj.key == dstobj.key
			if config.IgnoreExisting {
				skipIt(obj)
				dstobj = nil
				continue
			}
			if config.ForceUpdate ||
				(config.Update && obj.Mtime().Unix() > dstobj.Mtime().Unix()) ||
				(!config.Update && obj.Size() != dstobj.Size()) {
				tasks <- obj
			} else if config.Update && obj.Mtime().Unix() < dstobj.Mtime().Unix() {
				skipIt(obj)
			} else if config.CheckAll { // two objects are likely the same
				tasks <- withSize(obj, markChecksum)
			} else if config.DeleteSrc {
				if obj.IsDir() {
					srcDelayDelMu.Lock()
					srcDelayDel = append(srcDelayDel, obj.Key())
					srcDelayDelMu.Unlock()
				} else {
					tasks <- withSize(obj, markDeleteSrc)
				}
			} else if config.Perms && needCopyPerms(obj, dstobj) {
				tasks <- withSize(obj, markCopyPerms)
			} else {
				skipIt(obj)
			}
			dstobj = nil
		}
	}
	if config.DeleteDst {
		if dstobj != nil {
			if handleExtraObject(tasks, dstobj, config) {
				return nil
			}
		}
		for dstobj = range dstkeys {
			if dstobj == nil {
				return fmt.Errorf("listing failed, stop syncing, waiting for pending ones")
			}
			if handleExtraObject(tasks, dstobj, config) {
				return nil
			}
		}
	}
	return nil
}

type rule struct {
	pattern string
	include bool
}

func parseRule(name, p string) rule {
	if runtime.GOOS == "windows" {
		p = strings.Replace(p, "\\", "/", -1)
	}
	return rule{pattern: p, include: name == "-include"}
}

func parseIncludeRules(args []string) (rules []rule) {
	l := len(args)
	for i, a := range args {
		if strings.HasPrefix(a, "--") {
			a = a[1:]
		}
		if l-1 > i && (a == "-include" || a == "-exclude") {
			if _, err := path.Match(args[i+1], "xxxx"); err != nil {
				logger.Warnf("ignore invalid pattern: %s %s", a, args[i+1])
				continue
			}
			rules = append(rules, parseRule(a, args[i+1]))
		} else if strings.HasPrefix(a, "-include=") || strings.HasPrefix(a, "-exclude=") {
			if s := strings.Split(a, "="); len(s) == 2 && s[1] != "" {
				if _, err := path.Match(s[1], "xxxx"); err != nil {
					logger.Warnf("ignore invalid pattern: %s", a)
					continue
				}
				rules = append(rules, parseRule(s[0], s[1]))
			}
		}
	}
	return
}

func filterKey(o object.Object, now time.Time, rules []rule, config *Config) bool {
	var ok bool = true
	if !o.IsDir() && !o.IsSymlink() {
		ok = o.Size() >= int64(config.MinSize) && o.Size() <= int64(config.MaxSize)
		if ok && config.MaxAge > 0 {
			ok = o.Mtime().After(now.Add(-config.MaxAge))
		}
		if ok && config.MinAge > 0 {
			ok = o.Mtime().Before(now.Add(-config.MinAge))
		}
		if ok && !config.StartTime.IsZero() {
			ok = o.Mtime().After(config.StartTime)
		}
		if ok && !config.EndTime.IsZero() {
			ok = o.Mtime().Before(config.EndTime)
		}
	}
	if ok {
		if config.MatchFullPath {
			ok = matchFullPath(rules, o.Key())
		} else {
			ok = matchLeveledPath(rules, o.Key())
		}
	}
	return ok
}

func filter(keys <-chan object.Object, rules []rule, config *Config) <-chan object.Object {
	r := make(chan object.Object)
	now := time.Now()
	go func() {
		for o := range keys {
			if o == nil {
				// Telling that the listing has failed
				r <- nil
				break
			}
			if filterKey(o, now, rules, config) {
				r <- o
			} else {
				logger.Debugf("exclude %s size: %d, mtime: %s", o.Key(), o.Size(), o.Mtime())
				excluded.Increment()
				excludedBytes.IncrInt64(o.Size())
			}
		}
		close(r)
	}()
	return r
}

func matchTwoStar(p string, s []string) bool {
	if len(s) == 0 {
		return p == "*"
	}
	idx := strings.Index(p, "**")
	if idx == -1 {
		ok, _ := path.Match(p, strings.Join(s, "/"))
		return ok
	}
	ok, _ := path.Match(p[:idx+1], s[0])
	if !ok {
		return false
	}
	for i := 0; i <= len(s); i++ {
		tp := p[idx+1:]
		if i == 0 {
			tp = p[:idx] + p[idx+1:]
		}
		if matchTwoStar(tp, s[i:]) {
			return true
		}
	}
	return false
}

func matchPrefix(p, s []string) bool {
	if len(p) == 0 || len(s) == 0 {
		return len(p) == len(s)
	}
	first := p[0]
	n := len(s)
	switch {
	case first == "***":
		return true
	case strings.Contains(first, "**"):
		for i := 1; i <= n; i++ {
			if matchTwoStar(first, s[:i]) && matchPrefix(p[1:], s[i:]) {
				return true
			}
		}
		return false
	default:
		ok, _ := path.Match(first, s[0])
		return ok && matchPrefix(p[1:], s[1:])
	}
}

func matchSuffix(p, s []string) bool {
	if len(p) == 0 {
		return true
	}
	last := p[len(p)-1]
	if len(s) == 0 {
		return len(p) == 1 && (last == "***" || last == "**")
	}
	prefix := p[:len(p)-1]
	n := len(s)
	switch {
	case last == "***":
		for i := 0; i < n; i++ {
			if matchSuffix(prefix, s[:i]) {
				return true
			}
		}
		return false
	case strings.Contains(last, "**"):
		for i := 0; i < n; i++ {
			if matchTwoStar(last, s[i:]) && matchSuffix(prefix, s[:i]) {
				return true
			}
		}
		return false
	default:
		ok, _ := path.Match(last, s[n-1])
		return ok && matchSuffix(prefix, s[:n-1])
	}
}

func matchFullPath(rules []rule, key string) bool {
	ps := strings.Split(key, "/")
	for _, rule := range rules {
		p := strings.Split(rule.pattern, "/")
		var ok bool
		if p[0] == "" {
			if ps[0] != "" {
				p = p[1:]
			}
			ok = matchPrefix(p, ps)
		} else {
			ok = matchSuffix(p, ps)
		}
		if ok {
			if rule.include {
				break // try next level
			} else {
				return false
			}
		}
	}
	return true
}

// Consistent with rsync behavior, the matching order is adjusted according to the order of the "include" and "exclude" options
func matchLeveledPath(rules []rule, key string) bool {
	parts := strings.Split(key, "/")
	for i := range parts {
		if parts[i] == "" {
			continue
		}
		for _, rule := range rules {
			ps := parts[:i+1]
			p := strings.Split(rule.pattern, "/")
			if i < len(parts)-1 && (p[len(p)-1] == "" || p[len(p)-1] == "***") {
				ps = append(append([]string{}, ps...), "") // don't overwrite parts
			}
			var ok bool
			if p[0] == "" {
				if ps[0] != "" {
					p = p[1:]
				}
				ok = matchPrefix(p, ps)
			} else {
				ok = matchSuffix(p, ps)
			}
			if ok {
				if rule.include {
					break // try next level
				} else {
					return false
				}
			}
		}
	}
	return true
}

func listCommonPrefix(store object.ObjectStorage, prefix string, cp chan object.Object, followLink bool) (chan object.Object, error) {
	var total []object.Object
	var objs []object.Object
	var err error
	var nextToken string
	var marker string
	var hasMore bool
	var thisListMaxResults int64 = maxResults
	if strings.HasPrefix(store.String(), "file://") || strings.HasPrefix(store.String(), "nfs://") ||
		strings.HasPrefix(store.String(), "gluster://") || strings.HasPrefix(store.String(), "jfs://") ||
		strings.HasPrefix(store.String(), "hdfs://") || strings.HasPrefix(store.String(), "webdav://") {
		thisListMaxResults = math.MaxInt64
	}
	for {
		objs, hasMore, nextToken, err = store.List(ctx, prefix, marker, nextToken, "/", thisListMaxResults, followLink)
		if err != nil {
			return nil, err
		}
		if len(objs) > 0 {
			total = append(total, objs...)
			marker = objs[len(objs)-1].Key()
		}
		if !hasMore {
			break
		}
	}
	srckeys := make(chan object.Object, 1000)
	go func() {
		defer close(srckeys)
		for _, o := range total {
			if o.IsDir() && o.Key() > prefix {
				if cp != nil {
					cp <- o
				}
			} else {
				srckeys <- o
			}
		}
	}()
	return srckeys, nil
}

func produceFromList(tasks chan<- object.Object, src, dst object.ObjectStorage, config *Config) error {
	f, err := os.Open(config.FilesFrom)
	if err != nil {
		return fmt.Errorf("open %s: %s", config.FilesFrom, err)
	}
	defer f.Close()

	prefixs := make(chan string, config.Threads)
	var wg sync.WaitGroup
	wg.Add(config.Threads)
	for i := 0; i < config.Threads; i++ {
		go func() {
			defer wg.Done()
			for key := range prefixs {
				if !strings.HasSuffix(key, "/") {
					if err := produceSingleObject(tasks, src, dst, key, config); err == nil {
						listedPrefix.Increment()
						continue
					} else if errors.Is(err, errDirSuffix) {
						key += "/"
					} else if os.IsNotExist(err) {
						atomic.AddInt64(&ignoreFiles, 1)
						listedPrefix.Increment()
						continue
					}
				}
				logger.Debugf("start listing prefix %s", key)
				err = startProducer(tasks, src, dst, key, config.ListDepth, config)
				if err != nil {
					logger.Errorf("list prefix %s: %s", key, err)
					failed.Increment()
				}
				listedPrefix.Increment()
			}
		}()
	}

	scanner := bufio.NewScanner(f)
	for scanner.Scan() {
		key := scanner.Text()
		if key == "" {
			continue
		}
		trimKey := strings.TrimRightFunc(key, unicode.IsSpace)
		if trimKey != key {
			logger.Infof("found a prefix with a space character:%q", key)
		}
		prefixs <- trimKey
	}
	close(prefixs)

	wg.Wait()
	listedPrefix.Done()
	return nil
}

var errDirSuffix = errors.New("dir miss suffix '/'")
var ignoreFiles int64

func produceSingleObject(tasks chan<- object.Object, src, dst object.ObjectStorage, key string, config *Config) error {
	obj, err := src.Head(ctx, key)
	if err != nil {
		logger.Warnf("head %s from %s: %s", key, src, err)
		return err
	}
	if obj.IsDir() {
		// only `files-from` will hit this case
		if !strings.HasSuffix(key, "/") {
			return errDirSuffix
		}
		if !config.Dirs {
			return nil
		}
	}
	var srckeys = make(chan object.Object, 1)
	srckeys <- obj
	close(srckeys)
	if dobj, e := dst.Head(ctx, key); e == nil || os.IsNotExist(e) {
		var dstkeys = make(chan object.Object, 1)
		if dobj != nil {
			dstkeys <- dobj
		}
		close(dstkeys)
		logger.Debugf("produce single key %s", key)
		_ = produce(tasks, srckeys, dstkeys, config)
		return nil
	} else {
		logger.Warnf("head %s from %s: %s", key, dst, e)
		err = e
	}
	return err
}

func startProducer(tasks chan<- object.Object, src, dst object.ObjectStorage, prefix string, listDepth int, config *Config) error {
	config.concurrentList <- 1
	defer func() {
		<-config.concurrentList
	}()
	if config.Limit == 1 && len(config.rules) == 0 {
		if produceSingleObject(tasks, src, dst, prefix, config) == nil {
			return nil
		}
	}
	if config.ListThreads <= 1 || listDepth <= 0 {
		return startSingleProducer(tasks, src, dst, prefix, config)
	}

	commonPrefix := make(chan object.Object, 1000)
	done := make(chan bool)
	go func() {
		defer close(done)
		var mu sync.Mutex
		processing := make(map[string]bool)
		var wg sync.WaitGroup
		defer wg.Wait()
		for c := range commonPrefix {
			mu.Lock()
			if processing[c.Key()] {
				mu.Unlock()
				continue
			}
			processing[c.Key()] = true
			mu.Unlock()

			if len(config.rules) > 0 && !matchLeveledPath(config.rules, c.Key()) {
				logger.Infof("exclude prefix %s", c.Key())
				continue
			}
			if c.Key() < config.Start {
				logger.Infof("ignore prefix %s", c.Key())
				continue
			}
			if config.End != "" && c.Key() > config.End {
				logger.Infof("ignore prefix %s", c.Key())
				continue
			}
			wg.Add(1)
			go func(prefix string) {
				defer wg.Done()
				err := startProducer(tasks, src, dst, prefix, listDepth-1, config)
				if err != nil {
					logger.Errorf("list prefix %s: %s", prefix, err)
					failed.Increment()
				}
			}(c.Key())
		}
	}()

	srckeys, err := listCommonPrefix(src, prefix, commonPrefix, !config.Links)
	if err == utils.ENOTSUP {
		return startSingleProducer(tasks, src, dst, prefix, config)
	} else if err != nil {
		return fmt.Errorf("list %s with delimiter: %s", src, err)
	}
	var dcp chan object.Object
	if config.DeleteDst {
		dcp = commonPrefix // search common prefix in dst
	}
	var dstkeys <-chan object.Object
	if config.ForceUpdate {
		t := make(chan object.Object)
		close(t)
		dstkeys = t
	} else {
		dstkeys, err = listCommonPrefix(dst, prefix, dcp, !config.Links)
		if err == utils.ENOTSUP {
			return startSingleProducer(tasks, src, dst, prefix, config)
		} else if err != nil {
			return fmt.Errorf("list %s with delimiter: %s", dst, err)
		}
	}
	// sync returned objects
	if err := produce(tasks, srckeys, dstkeys, config); err != nil {
		return err
	}
	// consume all the keys from dst
	for range dstkeys {
	}
	close(commonPrefix)

	<-config.concurrentList
	<-done
	config.concurrentList <- 1
	return nil
}

// Sync syncs all the keys between to object storage
func Sync(src, dst object.ObjectStorage, config *Config) error {
	if strings.HasPrefix(src.String(), "file://") && strings.HasPrefix(dst.String(), "file://") {
		major, minor := utils.GetKernelVersion()
		// copy_file_range() system call first appeared in Linux 4.5, and reworked in 5.3
		// Go requires kernel >= 5.3 to use copy_file_range(), see:
		// https://github.com/golang/go/blob/go1.17.11/src/internal/poll/copy_file_range_linux.go#L58-L66
		if major > 5 || (major == 5 && minor >= 3) {
			d1 := utils.GetDev(src.String()[7:]) // remove prefix "file://"
			d2 := utils.GetDev(dst.String()[7:])
			if d1 != -1 && d1 == d2 {
				object.TryCFR = true
			}
		}
	}

	if config.Inplace {
		object.PutInplace = true
	}

	var bufferSize = 10240
	if config.Manager != "" {
		// No support for work-stealing, so workers shouldnot buffer tasks to prevent piling up in their own queues, which could cause imbalance among workers.
		bufferSize = 1
	}
	tasks := make(chan object.Object, bufferSize)
	wg := sync.WaitGroup{}
	concurrent = make(chan int, config.Threads)
	var localLimit *ratelimit.Bucket
	if config.BWLimit > 0 {
		bps := float64(config.BWLimit*1e6/8) * 0.85 // 15% overhead
		localLimit = ratelimit.NewBucketWithRate(bps, int64(bps)/10)
	}
	var gLimit *globalLimit
	if config.TrafficControlURL != "" {
		gLimit = &globalLimit{address: config.TrafficControlURL}
		go func() {
			for {
				time.Sleep(time.Millisecond * 10)
				gLimit.checkBalance()
			}
		}()
	}
	if localLimit != nil || gLimit != nil {
		limiter = &mixedLimiter{
			global: gLimit,
			local:  localLimit,
		}
	}

	progress := utils.NewProgress(config.Verbose || config.Quiet || config.Manager != "")
	handled = progress.AddCountBar("Scanned objects", 0)
	excluded = progress.AddCountSpinner("Excluded objects")
	excludedBytes = progress.AddByteSpinner("Excluded bytes")
	skipped = progress.AddCountSpinner("Skipped objects")
	skippedBytes = progress.AddByteSpinner("Skipped bytes")
	extra = progress.AddCountSpinner("Extra objects")
	extraBytes = progress.AddByteSpinner("Extra bytes")
	pending = progress.AddCountSpinner("Pending objects")
	copied = progress.AddCountSpinner("Copied objects")
	copiedBytes = progress.AddByteSpinner("Copied bytes")
	if config.CheckAll || config.CheckNew || config.CheckChange {
		checked = progress.AddCountSpinner("Checked objects")
		checkedBytes = progress.AddByteSpinner("Checked bytes")
	}
	if config.DeleteSrc || config.DeleteDst {
		deleted = progress.AddCountSpinner("Deleted objects")
	}

	syncExitFunc := func() error {
		if config.Manager == "" {
			val := atomic.LoadInt64(&ignoreFiles)
			if val > 0 {
				logger.Infof("Ignored %d non-existent paths from the file list", val)
			}
			pending.SetCurrent(0)
			incrHandled(0)
			total := handled.GetTotal()
			progress.Done()

			msg := fmt.Sprintf("Found: %d, excluded: %d (%s), skipped: %d (%s), copied: %d (%s), extra: %d (%s)", total,
				excluded.Current(), formatSize(excludedBytes.Current()),
				skipped.Current(), formatSize(skippedBytes.Current()),
				copied.Current(), formatSize(copiedBytes.Current()),
				extra.Current(), formatSize(extraBytes.Current()))
			if checked != nil {
				msg += fmt.Sprintf(", checked: %d (%s)", checked.Current(), formatSize(checkedBytes.Current()))
			}
			if deleted != nil {
				msg += fmt.Sprintf(", deleted: %d", deleted.Current())
			}
			if failed != nil {
				msg += fmt.Sprintf(", failed: %d", failed.Current())
			}
			if total-handled.Current()-extra.Current() > 0 {
				msg += fmt.Sprintf(", lost: %d", total-handled.Current())
			}
			logger.Info(msg)

			if failed != nil {
				if n := failed.Current(); n > 0 || total > handled.Current()+extra.Current() {
					return fmt.Errorf("failed to handle %d objects", n+total-handled.Current()-extra.Current())
				}
			}
		} else {
			sendStats(config.Manager)
			for len(srcDelayDel) > 0 {
				sendStats(config.Manager)
			}
			logger.Infof("This worker process has already completed its tasks")
		}
		return nil
	}

	if !config.Dry {
		failed = progress.AddCountSpinner("Failed objects")
		if config.MaxFailure > 0 {
			go func() {
				for {
					if failed.Current() >= config.MaxFailure {
						logger.Infof("the maximum error limit of %d was reached, stop now", config.MaxFailure)
						_ = syncExitFunc()
						os.Exit(1)
					}
					time.Sleep(time.Millisecond * 100)
				}
			}()
		}
	}

	if config.Manager == "" && config.FilesFrom != "" {
		listedPrefix = progress.AddCountSpinner("Prefix")
	}

	go func() {
		for {
			pending.SetCurrent(int64(len(tasks)))
			time.Sleep(time.Millisecond * 100)
		}
	}()

	initSyncMetrics(config)
	for i := 0; i < config.Threads; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			worker(tasks, src, dst, config)
		}()
	}

	if len(config.Exclude) > 0 {
		config.rules = parseIncludeRules(os.Args)
	}

	if config.Manager == "" {
		if len(config.Workers) > 0 {
			addr, err := startManager(config, tasks)
			if err != nil {
				return err
			}
			launchWorker(addr, config, &wg)
		}
		logger.Infof("Syncing from %s to %s", src, dst)
		if config.Start != "" {
			logger.Infof("first key: %q", config.Start)
		}
		if config.End != "" {
			logger.Infof("last key: %q", config.End)
		}
		config.concurrentList = make(chan int, config.ListThreads)
		var err error
		if config.FilesFrom != "" {
			err = produceFromList(tasks, src, dst, config)
		} else {
			err = startProducer(tasks, src, dst, "", config.ListDepth, config)
		}
		if err != nil {
			return err
		}
		close(tasks)
	} else {
		go fetchJobs(tasks, config)
		go func() {
			for {
				sendStats(config.Manager)
				time.Sleep(time.Second)
			}
		}()
	}
	wg.Wait()

	if config.Manager == "" {
		delayDelFunc := func(storage object.ObjectStorage, keys []string) {
			if len(keys) > 0 {
				logger.Infof("delete %d dirs from %s", len(keys), storage)
				sort.Strings(keys)
			}
			for i := len(keys) - 1; i >= 0; i-- {
				incrHandled(1)
				deleteObj(storage, keys[i], config.Dry)
			}
		}
		delWg := sync.WaitGroup{}

		delWg.Add(1)
		go func() {
			delayDelFunc(src, srcDelayDel)
			delWg.Done()
		}()
		delWg.Add(1)
		go func() {
			delayDelFunc(dst, dstDelayDel)
			delWg.Done()
		}()
		delWg.Wait()
	}
	return syncExitFunc()
}

func initSyncMetrics(config *Config) {
	if config.Registerer != nil {
		config.Registerer.MustRegister(
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "scanned",
				Help: "Scanned objects",
			}, func() float64 {
				return float64(handled.Total())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "excluded",
				Help: "Excluded objects",
			}, func() float64 {
				return float64(excluded.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "excluded_bytes",
				Help: "Excluded bytes",
			}, func() float64 {
				return float64(copied.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "extra",
				Help: "Extra objects",
			}, func() float64 {
				return float64(excluded.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "extra_bytes",
				Help: "Extra bytes",
			}, func() float64 {
				return float64(copied.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "handled",
				Help: "Handled objects",
			}, func() float64 {
				return float64(handled.Current())
			}),
			prometheus.NewGaugeFunc(prometheus.GaugeOpts{
				Name: "pending",
				Help: "Pending objects",
			}, func() float64 {
				return float64(pending.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "copied",
				Help: "Copied objects",
			}, func() float64 {
				return float64(copied.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "copied_bytes",
				Help: "Copied bytes",
			}, func() float64 {
				return float64(copiedBytes.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "skipped",
				Help: "Skipped objects",
			}, func() float64 {
				return float64(skipped.Current())
			}),
			prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "skipped_bytes",
				Help: "Skipped bytes",
			}, func() float64 {
				return float64(skippedBytes.Current())
			}),
		)
		if failed != nil {
			config.Registerer.MustRegister(prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "failed",
				Help: "Failed objects",
			}, func() float64 {
				return float64(failed.Current())
			}))
		}
		if deleted != nil {
			config.Registerer.MustRegister(prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "deleted",
				Help: "Deleted objects",
			}, func() float64 {
				return float64(deleted.Current())
			}))
		}
		if checked != nil && checkedBytes != nil {
			config.Registerer.MustRegister(
				prometheus.NewCounterFunc(prometheus.CounterOpts{
					Name: "checked",
					Help: "Checked objects",
				}, func() float64 {
					return float64(checked.Current())
				}),
				prometheus.NewCounterFunc(prometheus.CounterOpts{
					Name: "checked_bytes",
					Help: "Checked bytes",
				}, func() float64 {
					return float64(checkedBytes.Current())
				}))
		}
		if listedPrefix != nil {
			config.Registerer.MustRegister(prometheus.NewCounterFunc(prometheus.CounterOpts{
				Name: "Prefix",
				Help: "listed prefix",
			}, func() float64 {
				return float64(listedPrefix.Current())
			}))
		}
	}
}


================================================
FILE: pkg/sync/sync_test.go
================================================
/*
 * JuiceFS, Copyright 2018 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package sync

import (
	"bytes"
	"fmt"
	"io"
	"math"
	"os"
	"reflect"
	"strings"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/object"
)

func collectAll(c <-chan object.Object) []string {
	r := make([]string, 0)
	for s := range c {
		r = append(r, s.Key())
	}
	return r
}

// nolint:errcheck
func TestIterator(t *testing.T) {
	m, _ := object.CreateStorage("mem", "", "", "", "")
	m.Put(ctx, "a", bytes.NewReader([]byte("a")))
	m.Put(ctx, "b", bytes.NewReader([]byte("a")))
	m.Put(ctx, "aa", bytes.NewReader([]byte("a")))
	m.Put(ctx, "c", bytes.NewReader([]byte("a")))

	ch, _ := ListAll(m, "", "a", "b", true)
	keys := collectAll(ch)
	if len(keys) != 3 {
		t.Fatalf("length should be 3, but got %d", len(keys))
	}
	if !reflect.DeepEqual(keys, []string{"a", "aa", "b"}) {
		t.Fatalf("result wrong: %s", keys)
	}

	// Single object
	s, _ := object.CreateStorage("mem", "", "", "", "")
	s.Put(ctx, "a", bytes.NewReader([]byte("a")))
	ch, _ = ListAll(s, "", "", "", true)
	keys = collectAll(ch)
	if !reflect.DeepEqual(keys, []string{"a"}) {
		t.Fatalf("result wrong: %s", keys)
	}
}

func TestIeratorSingleEmptyKey(t *testing.T) {
	// utils.SetLogLevel(logrus.DebugLevel)

	// Construct mem storage
	s, _ := object.CreateStorage("mem", "", "", "", "")
	err := s.Put(ctx, "abc", bytes.NewReader([]byte("abc")))
	if err != nil {
		t.Fatalf("Put error: %q", err)
	}

	// Simulate command line prefix in SRC or DST
	s = object.WithPrefix(s, "abc")
	ch, _ := ListAll(s, "", "", "", true)
	keys := collectAll(ch)
	if !reflect.DeepEqual(keys, []string{""}) {
		t.Fatalf("result wrong: %s", keys)
	}
}

func deepEqualWithOutMtime(a, b object.Object) bool {
	return a.IsDir() == b.IsDir() && a.Key() == b.Key() && a.Size() == b.Size() &&
		math.Abs(a.Mtime().Sub(b.Mtime()).Seconds()) < 1
}

// nolint:errcheck
func TestSync(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()
	config := &Config{
		Start:       "",
		End:         "",
		Threads:     50,
		ListThreads: 1,
		Update:      true,
		Perms:       true,
		Dry:         false,
		DeleteSrc:   false,
		Limit:       -1,
		DeleteDst:   false,
		Exclude:     []string{"c*"},
		Include:     []string{"a[1-9]", "a*"},
		MaxSize:     math.MaxInt64,
		Verbose:     false,
		Quiet:       true,
	}
	os.Args = []string{"--include", "a[1-9]", "--exclude", "a*", "--exclude", "c*"}
	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	a.Put(ctx, "a1", bytes.NewReader([]byte("a1")))
	a.Put(ctx, "a2", bytes.NewReader([]byte("a2")))
	a.Put(ctx, "abc", bytes.NewReader([]byte("abc")))
	a.Put(ctx, "c1", bytes.NewReader([]byte("c1")))
	a.Put(ctx, "c2", bytes.NewReader([]byte("c2")))

	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")
	b.Put(ctx, "a1", bytes.NewReader([]byte("a1")))
	b.Put(ctx, "ba", bytes.NewReader([]byte("a1")))

	// Copy a2
	if err := Sync(a, b, config); err != nil {
		t.Fatalf("sync: %s", err)
	}
	if c := copied.Current(); c != 1 {
		t.Fatalf("should copy 1 keys, but got %d", c)
	}

	if err := Sync(a, b, config); err != nil {
		t.Fatalf("sync: %s", err)
	}
	// No copy occurred
	if c := copied.Current(); c != 0 {
		t.Fatalf("should copy 0 keys, but got %d", c)
	}

	// Now a: {"a1", "a2", "abc", "c1", "c2"}, b: {"a1", "a2", "ba"}
	// Copy "ba" from b to a
	os.Args = []string{}
	config.Exclude = nil
	config.rules = nil
	if err := Sync(b, a, config); err != nil {
		t.Fatalf("sync: %s", err)
	}
	if c := copied.Current(); c != 1 {
		t.Fatalf("should copy 1 keys, but got %d", c)
	}
	// Now a: {"a1", "a2", "abc", "ba", "c1", "c2"}, b: {"a1", "a2", "ba"}
	aRes, _ := ListAll(a, "", "", "", true)
	bRes, _ := ListAll(b, "", "", "", true)

	var aObjs, bObjs []object.Object
	for obj := range aRes {
		aObjs = append(aObjs, obj)
	}
	for obj := range bRes {
		bObjs = append(bObjs, obj)
	}

	if !deepEqualWithOutMtime(aObjs[1], bObjs[1]) {
		t.FailNow()
	}

	if !deepEqualWithOutMtime(aObjs[4], bObjs[len(bObjs)-1]) {
		t.Fatalf("expect %+v but got %+v", aObjs[4], bObjs[len(bObjs)-1])
	}
	// Test --force-update option
	config.ForceUpdate = true
	// Forcibly copy {"a1", "a2", "abc","c1","c2","ba"} from a to b.
	if err := Sync(a, b, config); err != nil {
		t.Fatalf("sync: %s", err)
	}
}

// nolint:errcheck
func TestSyncIncludeAndExclude(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()
	config := &Config{
		Start:       "",
		End:         "",
		Threads:     50,
		ListThreads: 1,
		Update:      true,
		Perms:       true,
		Dry:         false,
		DeleteSrc:   false,
		DeleteDst:   false,
		Verbose:     false,
		Limit:       -1,
		Quiet:       true,
		MaxSize:     math.MaxInt64,
		Exclude:     []string{"1"},
	}
	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")

	simple := []string{"a1/z1/z2", "a2", "ab1", "ab2", "b1", "b2", "c1", "c2"}
	testCases := []struct {
		srcKey, args, want []string
	}{
		{
			srcKey: simple,
			args:   []string{"--include", "xx*", "--include", "xxx*"},
			want:   []string{"a1/", "a1/z1/", "a1/z1/z2", "a2", "ab1", "ab2", "b1", "b2", "c1", "c2"},
		},
		{
			srcKey: simple,
			args:   []string{"--exclude", "a*", "--exclude", "c*"},
			want:   []string{"b1", "b2"},
		},
		{
			srcKey: simple,
			args:   []string{"--exclude", "a[1-2]", "--include", "a*"},
			want:   []string{"ab1", "ab2", "b1", "b2", "c1", "c2"},
		},
		{
			srcKey: simple,
			args:   []string{"--exclude", "ab?", "--include", "a*"},
			want:   []string{"a1/", "a1/z1/", "a1/z1/z2", "a2", "b1", "b2", "c1", "c2"},
		},
		{
			srcKey: simple,
			args:   []string{"--include", "a*", "--exclude", "c*"},
			want:   []string{"a1/", "a1/z1/", "a1/z1/z2", "a2", "ab1", "ab2", "b1", "b2"},
		},
		{
			srcKey: simple,
			args:   []string{"--exclude", "a*", "--exclude", "c*"},
			want:   []string{"b1", "b2"},
		},
		{
			srcKey: []string{"a1/b1/c1", "a1/b1/c2", "a1/b2/c1", "a1/b2/c2", "a2/b1/c2", "a3/b2/c2", "a4"},
			args:   []string{"--exclude", "a*/b[1-2]/c1", "--exclude", "a4"},
			want:   []string{"a1/", "a1/b1/", "a1/b1/c2", "a1/b2/", "a1/b2/c2", "a2/", "a2/b1/", "a2/b1/c2", "a3/", "a3/b2/", "a3/b2/c2"},
		},
	}

	for _, testCase := range testCases {
		_ = os.RemoveAll("/tmp/a/")
		_ = os.RemoveAll("/tmp/b/")
		os.Args = testCase.args
		for _, k := range testCase.srcKey {
			a.Put(ctx, k, bytes.NewReader([]byte(k)))
		}
		if err := Sync(a, b, config); err != nil {
			t.Fatalf("sync: %s", err)
		}

		bRes, _ := ListAll(b, "", "", "", true)
		var bKeys []string
		for obj := range bRes {
			bKeys = append(bKeys, obj.Key())
		}
		if !reflect.DeepEqual(bKeys[1:], testCase.want) {
			t.Errorf("sync args  %v, want %v, but get %v", os.Args, testCase.want, bKeys)
		}
	}
}

func TestParseRules(t *testing.T) {
	tests := []struct {
		args      []string
		wantRules []rule
	}{
		{
			args:      []string{"--include", "a"},
			wantRules: []rule{{pattern: "a", include: true}},
		},
		{
			args:      []string{"--exclude", "a", "--include", "b"},
			wantRules: []rule{{pattern: "a"}, {pattern: "b", include: true}},
		},
		{
			args:      []string{"--include", "a", "--test", "t", "--exclude", "b"},
			wantRules: []rule{{pattern: "a", include: true}, {pattern: "b"}},
		},
		{
			args:      []string{"--include", "a", "--test", "t", "--exclude"},
			wantRules: []rule{{pattern: "a", include: true}},
		},
		{
			args:      []string{"--include", "a", "--exclude", "b", "--include", "c", "--exclude", "d"},
			wantRules: []rule{{pattern: "a", include: true}, {pattern: "b"}, {pattern: "c", include: true}, {pattern: "d"}},
		},
		{
			args:      []string{"--include", "a", "--include", "b", "--test", "--exclude", "c", "--exclude", "d"},
			wantRules: []rule{{pattern: "a", include: true}, {pattern: "b", include: true}, {pattern: "c"}, {pattern: "d"}},
		},
		{
			args:      []string{"--include=a", "--include=b", "--exclude=c", "--exclude=d", "--test=aaa"},
			wantRules: []rule{{pattern: "a", include: true}, {pattern: "b", include: true}, {pattern: "c"}, {pattern: "d"}},
		},
		{
			args:      []string{"-include=a", "--test", "t", "--include=b", "--exclude=c", "-exclude="},
			wantRules: []rule{{pattern: "a", include: true}, {pattern: "b", include: true}, {pattern: "c"}},
		},
	}
	for _, tt := range tests {
		if gotRules := parseIncludeRules(tt.args); !reflect.DeepEqual(gotRules, tt.wantRules) {
			t.Errorf("got %+v, want %+v", gotRules, tt.wantRules)
		}
	}
}

func TestSyncLink(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()

	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	a.Put(ctx, "a1", bytes.NewReader([]byte("test")))
	as := a.(object.SupportSymlink)
	as.Symlink("/tmp/a/a1", "l1")
	as.Symlink("./../a1", "d1/l2")
	as.Symlink("./../notExist", "l3")

	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")
	bs := b.(object.SupportSymlink)
	bs.Symlink("/tmp/b/a1", "l1")

	if err := Sync(a, b, &Config{
		Threads:     50,
		Update:      true,
		Perms:       true,
		ListThreads: 1,
		Links:       true,
		Quiet:       true,
		Limit:       -1,
		ForceUpdate: true,
		MaxSize:     math.MaxInt64,
	}); err != nil {
		t.Fatalf("sync: %s", err)
	}

	l1, err := bs.Readlink("l1")
	if err != nil || l1 != "/tmp/a/a1" {
		t.Fatalf("readlink: %s content: %s", err, l1)
	}
	content, err := b.Get(ctx, "l1", 0, -1)
	if err != nil {
		t.Fatalf("get content failed: %s", err)
	}
	if c, err := io.ReadAll(content); err != nil || string(c) != "test" {
		t.Fatalf("read content failed: err %s content %s", err, string(c))
	}

	l2, err := bs.Readlink("d1/l2")
	if err != nil || l2 != "./../a1" {
		t.Fatalf("readlink: %s", err)
	}
	content, err = b.Get(ctx, "d1/l2", 0, -1)
	if err != nil {
		t.Fatalf("content failed: %s", err)
	}
	if c, err := io.ReadAll(content); err != nil || string(c) != "test" {
		t.Fatalf("read content failed: err %s content %s", err, string(c))
	}

	l3, err := bs.Readlink("l3")
	if err != nil || l3 != "./../notExist" {
		t.Fatalf("readlink: %s", err)
	}
}

func TestSyncLinkWithOutFollow(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()

	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	a.Put(ctx, "a1", bytes.NewReader([]byte("test")))
	as := a.(object.SupportSymlink)
	as.Symlink("/tmp/a/a1", "l1")
	as.Symlink("./../notExist", "l3")

	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")

	if err := Sync(a, b, &Config{
		Threads:     50,
		ListThreads: 1,
		Update:      true,
		Perms:       true,
		Quiet:       true,
		ForceUpdate: true,
		Limit:       -1,
		MaxSize:     math.MaxInt64,
	}); err != nil {
		t.Fatalf("sync: %s", err)
	}
	content, err := b.Get(ctx, "l1", 0, -1)
	if err != nil {
		t.Fatalf("get content error: %s", err)
	}
	if c, err := io.ReadAll(content); err != nil || string(c) != "test" {
		t.Fatalf("read content error: %s", err)
	}

	if lstat, err := os.Lstat("/tmp/b/l1"); err != nil && lstat.Mode()&os.ModeSymlink != 0 {
		t.Fatalf("should follow link")
	}
	if _, err := os.Stat("/tmp/b/l3"); !os.IsNotExist(err) {
		t.Fatalf("should not copy broken link")
	}
}

func TestSingleLink(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()
	_ = os.Symlink("/tmp/aa", "/tmp/a")
	a, _ := object.CreateStorage("file", "/tmp/a", "", "", "")
	b, _ := object.CreateStorage("file", "/tmp/b", "", "", "")
	if err := Sync(a, b, &Config{
		Threads:     50,
		ListThreads: 1,
		Update:      true,
		Perms:       true,
		Links:       true,
		Quiet:       true,
		Limit:       -1,
		MaxSize:     math.MaxInt64,
		ForceUpdate: true,
	}); err != nil {
		t.Fatalf("sync: %s", err)
	}
	readlink, _ := os.Readlink("/tmp/a")
	readlink2, err := os.Readlink("/tmp/b")
	if err != nil {
		t.Fatalf("sync err: %v", err)
	}

	if readlink != readlink2 || readlink != "/tmp/aa" {
		t.Fatalf("sync link failed")
	}
}

func TestSyncCheckAllLink(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()

	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	a.Put(ctx, "a1", bytes.NewReader([]byte("test")))
	as := a.(object.SupportSymlink)
	as.Symlink("/tmp/a/a1", "l1")

	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")
	bs := b.(object.SupportSymlink)
	bs.Symlink("/tmp/b/a1", "l1")

	if err := Sync(a, b, &Config{
		Threads:     50,
		Perms:       true,
		Links:       true,
		Quiet:       true,
		ListThreads: 1,
		Limit:       -1,
		MaxSize:     math.MaxInt64,
		CheckAll:    true,
	}); err != nil {
		t.Fatalf("sync: %s", err)
	}

	l1, err := bs.Readlink("l1")
	if err != nil || l1 != "/tmp/a/a1" {
		t.Fatalf("readlink: %s content: %s", err, l1)
	}
	content, err := b.Get(ctx, "l1", 0, -1)
	if err != nil {
		t.Fatalf("get content failed: %s", err)
	}
	if c, err := io.ReadAll(content); err != nil || string(c) != "test" {
		t.Fatalf("read content failed: err %s content %s", err, string(c))
	}
}

func TestSyncCheckNewLink(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a")
		_ = os.RemoveAll("/tmp/b")
	}()

	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	a.Put(ctx, "a1", bytes.NewReader([]byte("test")))
	as := a.(object.SupportSymlink)
	as.Symlink("/tmp/a/a1", "l1")

	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")
	bs := b.(object.SupportSymlink)

	if err := Sync(a, b, &Config{
		Threads:     50,
		Perms:       true,
		Links:       true,
		Quiet:       true,
		ListThreads: 1,
		Limit:       -1,
		MaxSize:     math.MaxInt64,
		CheckNew:    true,
	}); err != nil {
		t.Fatalf("sync: %s", err)
	}

	l1, err := bs.Readlink("l1")
	if err != nil || l1 != "/tmp/a/a1" {
		t.Fatalf("readlink: %s content: %s", err, l1)
	}
	content, err := b.Get(ctx, "l1", 0, -1)
	if err != nil {
		t.Fatalf("get content failed: %s", err)
	}
	if c, err := io.ReadAll(content); err != nil || string(c) != "test" {
		t.Fatalf("read content failed: err %s content %s", err, string(c))
	}
}

func TestLimits(t *testing.T) {
	defer func() {
		_ = os.RemoveAll("/tmp/a/")
		_ = os.RemoveAll("/tmp/b/")
		_ = os.RemoveAll("/tmp/c/")
	}()
	a, _ := object.CreateStorage("file", "/tmp/a/", "", "", "")
	b, _ := object.CreateStorage("file", "/tmp/b/", "", "", "")
	c, _ := object.CreateStorage("file", "/tmp/c/", "", "", "")
	put := func(storage object.ObjectStorage, keys []string) {
		for _, key := range keys {
			if key != "" {
				_ = storage.Put(ctx, key, bytes.NewReader([]byte{}))
			}
		}
	}
	commonKeys := []string{"", "a1", "a2", "a3", "a4", "a5", "a6"}
	put(a, commonKeys)
	put(c, []string{"c1", "c2", "c3"})
	type subConfig struct {
		dst          object.ObjectStorage
		limit        int64
		deleteDst    bool
		expectedKeys []string
	}
	testCases := []subConfig{
		{b, 2, false, []string{"", "a1", "a2"}},
		{b, -1, false, commonKeys},
		{b, 0, false, commonKeys},
		{c, 7, true, append(commonKeys, "c2", "c3")},
	}
	config := &Config{
		Threads:     50,
		Update:      true,
		Perms:       true,
		MaxSize:     math.MaxInt64,
		ListThreads: 1,
	}
	setConfig := func(config *Config, subC subConfig) {
		config.Limit = subC.limit
		config.DeleteDst = subC.deleteDst
	}

	for _, tcase := range testCases {
		setConfig(config, tcase)
		if err := Sync(a, tcase.dst, config); err != nil {
			t.Fatalf("sync: %s", err)
		}

		all, err := ListAll(tcase.dst, "", "", "", true)
		if err != nil {
			t.Fatalf("list all b: %s", err)
		}

		err = testKeysEqual(all, tcase.expectedKeys)
		if err != nil {
			t.Fatalf("testKeysEqual fail: %s", err)
		}
	}
}

func testKeysEqual(objsCh <-chan object.Object, expectedKeys []string) error {
	var gottenKeys []string
	for obj := range objsCh {
		gottenKeys = append(gottenKeys, obj.Key())
	}
	if len(gottenKeys) != len(expectedKeys) {
		return fmt.Errorf("expected {%s}, got {%s}", strings.Join(expectedKeys, ", "),
			strings.Join(gottenKeys, ", "))
	}

	for idx, key := range gottenKeys {
		if key != expectedKeys[idx] {
			return fmt.Errorf("expected {%s}, got {%s}", strings.Join(expectedKeys, ", "),
				strings.Join(gottenKeys, ", "))
		}
	}
	return nil
}

func TestMatchObjects(t *testing.T) {
	type tcase struct {
		rules []rule
		key   string
		want  bool
	}
	tests := []tcase{
		{rules: []rule{{pattern: "a*"}}, key: "a1"},
		{rules: []rule{{pattern: "a*/b*"}}, key: "a1/b1"},
		{rules: []rule{{pattern: "/a*"}}, key: "/a1"},
		{rules: []rule{{pattern: "/a"}}, key: "/a1", want: true},
		{rules: []rule{{pattern: "/a/b/c"}}, key: "/a1", want: true},
		{rules: []rule{{pattern: "a*/b?"}}, key: "a1/b1/c2/d1"},
		{rules: []rule{{pattern: "a*/b?/"}}, key: "a1/", want: true},
		{rules: []rule{{pattern: "a*/b?/c.txt"}}, key: "a1/b1", want: true},
		{rules: []rule{{pattern: "a*/b?/"}}, key: "a1/b1/"},
		{rules: []rule{{pattern: "a*/b?/"}}, key: "a1/b1/c.txt"},
		{rules: []rule{{pattern: "a*/"}}, key: "a1/b1"},
		{rules: []rule{{pattern: "a*/b*/"}}, key: "a1/b1/c1/d.txt/"},
		{rules: []rule{{pattern: "/a*/b*"}}, key: "/a1/b1/c1/d.txt/"},
		{rules: []rule{{pattern: "a*/b*/c"}}, key: "a1/b1/c1/d.txt/", want: true},
		{rules: []rule{{pattern: "a"}}, key: "a/b/c/d/"},
		{rules: []rule{{pattern: "a.go", include: true}, {pattern: "pkg"}}, key: "a/pkg/c/a.go"},
		{rules: []rule{{pattern: "a"}, {pattern: "pkg", include: true}}, key: "a/pkg/c/a.go"},
		{rules: []rule{{pattern: "a.go", include: true}, {pattern: "pkg"}}, key: "", want: true},
		{rules: []rule{{pattern: "a", include: true}, {pattern: "b/"}, {pattern: "c", include: true}}, key: "a/b/c"},
		{rules: []rule{{pattern: "a/", include: true}, {pattern: "a"}}, key: "a/b", want: true},
		{rules: []rule{{pattern: "/***"}}, key: "a"},
		{rules: []rule{{pattern: "/***"}}, key: "a/b"},
		{rules: []rule{{pattern: "/a/***"}}, key: "a/"},
		{rules: []rule{{pattern: "/a/***"}}, key: "a/b"},
		{rules: []rule{{pattern: "/a/***"}}, key: "a/b/c"},
		{rules: []rule{{pattern: "/a/***"}}, key: "b/a/", want: true},
		{rules: []rule{{pattern: "a/***"}}, key: "a/"},
		{rules: []rule{{pattern: "a/***"}}, key: "a/b"},
		{rules: []rule{{pattern: "a/***"}}, key: "a/b/c"},
		{rules: []rule{{pattern: "a/***"}}, key: "d/a/b/c"},
		{rules: []rule{{pattern: "a/***"}}, key: "a", want: true},
		{rules: []rule{{pattern: "a/***"}}, key: "ba", want: true},
		{rules: []rule{{pattern: "a/***"}}, key: "ba/", want: true},
		{rules: []rule{{pattern: "*/a/***"}}, key: "/a/"},
		{rules: []rule{{pattern: "*/a/***"}}, key: "b/a/"},
		{rules: []rule{{pattern: "*/a/***"}}, key: "b/a/c"},
		{rules: []rule{{pattern: "/*/a/***"}}, key: "/b/a/"},
		{rules: []rule{{pattern: "/*/a/***"}}, key: "/b/a/c"},
		{rules: []rule{{pattern: "/*/a/***"}}, key: "c/b/a/", want: true},
		{rules: []rule{{pattern: "a/**/b"}}, key: "a/c/b"},
		{rules: []rule{{pattern: "a/**/b"}}, key: "a/c/d/b"},
		{rules: []rule{{pattern: "a/**/b"}}, key: "a/c/d/e/b"},
		{rules: []rule{{pattern: "/**/b"}}, key: "a/c/b"},
		{rules: []rule{{pattern: "/**/b"}}, key: "a/c/d/b/"},
		{rules: []rule{{pattern: "a**/b"}}, key: "a/c/d/b/"},
		{rules: []rule{{pattern: "a**/b"}}, key: "a/c/d/ab/", want: true},
		{rules: []rule{{pattern: "a**b"}}, key: "a/c/d/b/"},
		{rules: []rule{{pattern: "a**b"}}, key: "b/c/d/b/", want: true},
		{rules: []rule{{pattern: "a?**"}}, key: "a/a", want: true},
		{rules: []rule{{pattern: "**a"}}, key: "a"},
		{rules: []rule{{pattern: "a**"}}, key: "a"},
		{rules: []rule{{pattern: "a**a"}}, key: "a", want: true},
		{rules: []rule{{pattern: "aa**a"}}, key: "aa", want: true},
		{rules: []rule{{pattern: "**/d2/**a"}}, key: "/d2/d3/1a"},
		{rules: []rule{{pattern: "**/d2/**a"}}, key: "d2/d3/1a"},
		{rules: []rule{{pattern: "a/**/a"}}, key: "a", want: true},
		{rules: []rule{{pattern: "a/**/a"}}, key: "a/", want: true},
		{rules: []rule{{pattern: "**aa**", include: true}, {pattern: "a"}}, key: "aa/a", want: true},
	}
	for _, c := range tests {
		if got := matchLeveledPath(c.rules, c.key); got != c.want {
			t.Errorf("matchKey(%+v, %s) = %v, want %v", c.rules, c.key, got, c.want)
		}
	}
}

func TestMatchFullPatch(t *testing.T) {
	type tcase struct {
		rules []rule
		key   string
	}
	matchedCases := []tcase{
		{rules: []rule{{pattern: "a"}}, key: "b/a"},
		{rules: []rule{{pattern: "a*"}}, key: "a1"},
		{rules: []rule{{pattern: "a*/b*"}}, key: "a1/b1"},
		{rules: []rule{{pattern: "/a*"}}, key: "/a1"},
		{rules: []rule{{pattern: "a*/b?/"}}, key: "a1/b1/"},
		{rules: []rule{{pattern: "a/**/b"}}, key: "a/c/b"},
		{rules: []rule{{pattern: "a/**/b"}}, key: "a/c/d/b"},
		{rules: []rule{{pattern: "a/**/b"}}, key: "a/c/d/e/b"},
		{rules: []rule{{pattern: "/**/b"}}, key: "a/c/b"},
		{rules: []rule{{pattern: "a**/b"}}, key: "a/c/d/b"},
		{rules: []rule{{pattern: "a**b"}}, key: "a/c/d/b"},
		{rules: []rule{{pattern: "**a"}}, key: "a"},
		{rules: []rule{{pattern: "a**"}}, key: "a"},
		{rules: []rule{{pattern: "**/d2/**a"}}, key: "/d2/d3/1a"},
		{rules: []rule{{pattern: "**/d2/**a"}}, key: "d2/d3/1a"},
	}
	for _, c := range matchedCases {
		if got := matchFullPath(c.rules, c.key); got != false {
			t.Errorf("matchKey(%+v, %s) = %v, want %v", c.rules, c.key, got, false)
		}
	}
	unmatchedCases := []tcase{
		{rules: []rule{{pattern: "/a"}}, key: "/a1"},
		{rules: []rule{{pattern: "a*/b?"}}, key: "a1/b1/c2/d1"},
		{rules: []rule{{pattern: "/a/b/c"}}, key: "/a1"},
		{rules: []rule{{pattern: "a*/b?/"}}, key: "a1/"},
		{rules: []rule{{pattern: "a*/b?/c.txt"}}, key: "a1/b1"},
		{rules: []rule{{pattern: "a*/b?/"}}, key: "a1/b1/c.txt"},
		{rules: []rule{{pattern: "a*/"}}, key: "a1/b1"},
		{rules: []rule{{pattern: "a*/b*/"}}, key: "a1/b1/c1/d.txt/"},
		{rules: []rule{{pattern: "/a*/b*"}}, key: "/a1/b1/c1/d.txt/"},
		{rules: []rule{{pattern: "a"}}, key: "a/b/c/d/"},
		{rules: []rule{{pattern: "a*/b*/c"}}, key: "a1/b1/c1/d.txt/"},
		{rules: []rule{{pattern: "a**/b"}}, key: "a/c/d/ab/"},
		{rules: []rule{{pattern: "a**b"}}, key: "b/c/d/b"},
		{rules: []rule{{pattern: "/**/b"}}, key: "a/c/d/b/"},
		{rules: []rule{{pattern: "a?**"}}, key: "a/a"},
		{rules: []rule{{pattern: "a**a"}}, key: "a"},
		{rules: []rule{{pattern: "aa**a"}}, key: "aa"},
		{rules: []rule{{pattern: "a/**/a"}}, key: "a"},
		{rules: []rule{{pattern: "a/**/a"}}, key: "a/"},
		{rules: []rule{{pattern: "**aa**", include: true}, {pattern: "a"}}, key: "aa/a"},
	}
	for _, c := range unmatchedCases {
		if got := matchFullPath(c.rules, c.key); got != true {
			t.Errorf("matchKey(%+v, %s) = %v, want %v", c.rules, c.key, got, true)
		}
	}
}

func TestParseFilterRule(t *testing.T) {
	type tcase struct {
		args  []string
		rules []rule
	}
	cases := []tcase{
		{[]string{"--include", "a"}, []rule{{pattern: "a", include: true}}},
		{[]string{"--exclude", "a", "--include", "b"}, []rule{{pattern: "a"}, {pattern: "b", include: true}}},
		{[]string{"--include", "a", "--test", "t", "--exclude", "b"}, []rule{{pattern: "a", include: true}, {pattern: "b"}}},
		{[]string{"--include=a", "--test", "t", "--exclude"}, []rule{{pattern: "a", include: true}}},
		{[]string{"--include", "a", "--test", "t", "--exclude"}, []rule{{pattern: "a", include: true}}},
		{[]string{"-include=", "a", "--test", "t", "--exclude=*"}, []rule{{pattern: "*"}}},
	}

	for _, c := range cases {
		if got := parseIncludeRules(c.args); !reflect.DeepEqual(got, c.rules) {
			t.Errorf("parseIncludeRules(%+v) = %v, want %v", c.args, got, c.rules)
		}
	}
}

type mockObject struct {
	size  int64
	mtime time.Time
}

func (o *mockObject) Key() string          { return "" }
func (o *mockObject) IsDir() bool          { return false }
func (o *mockObject) IsSymlink() bool      { return false }
func (o *mockObject) Size() int64          { return o.size }
func (o *mockObject) Mtime() time.Time     { return o.mtime }
func (o *mockObject) StorageClass() string { return "" }

func TestFilterSizeAndAge(t *testing.T) {
	config := &Config{
		MaxSize: 100,
		MinSize: 10,
		MaxAge:  time.Second * 100,
		MinAge:  time.Second * 10,
	}
	now := time.Now()
	if !filterKey(&mockObject{10, now.Add(-time.Second * 15)}, now, nil, config) {
		t.Fatalf("filterKey failed")
	}
	if filterKey(&mockObject{200, now.Add(-time.Second * 200)}, now, nil, config) {
		t.Fatalf("filterKey should fail")
	}

	config = &Config{
		MaxSize:   math.MaxInt64,
		StartTime: time.Now().Add(-time.Hour),
		EndTime:   time.Now().Add(-time.Minute),
	}
	if !filterKey(&mockObject{200, now.Add(-time.Minute * 30)}, now, nil, config) {
		t.Fatalf("filterKey fail")
	}

	if filterKey(&mockObject{200, now.Add(-time.Hour * 2)}, now, nil, config) {
		t.Fatalf("filterKey should fail")
	}
}


================================================
FILE: pkg/usage/usage.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package usage

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"math/rand"
	"net/http"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
)

var reportUrl = "https://juicefs.com/report-usage"

var logger = utils.GetLogger("juicefs")

type usage struct {
	VolumeID   string `json:"volumeID"`
	SessionID  int64  `json:"sessionID"`
	UsedSpace  int64  `json:"usedBytes"`
	UsedInodes int64  `json:"usedInodes"`
	Version    string `json:"version"`
	Uptime     int64  `json:"uptime"`
	MetaEngine string `json:"metaEngine"` // type of meta engine
	DataStore  string `json:"dataStore"`  // type of object store
}

func sendUsage(u usage) error {
	body, err := json.Marshal(u)
	if err != nil {
		return err
	}
	req, err := http.NewRequest("POST", reportUrl, bytes.NewReader(body))
	if err != nil {
		return err
	}
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return err
	}
	if resp.StatusCode != 200 {
		return fmt.Errorf("got %s", resp.Status)
	}
	_, err = io.ReadAll(resp.Body)
	return err
}

// ReportUsage will send anonymous usage data to juicefs.com to help the team
// understand how the community is using it. You can use `--no-usage-report`
// to disable this.
func ReportUsage(m meta.Meta, version string) {
	ctx := meta.Background()
	var u usage
	if format, err := m.Load(false); err == nil {
		u.VolumeID = format.UUID
		u.DataStore = format.Storage
	}
	u.MetaEngine = m.Name()
	u.SessionID = int64(rand.Uint32())
	u.Version = version
	var start = time.Now()
	for {
		var totalSpace, availSpace, iused, iavail uint64
		_ = m.StatFS(ctx, meta.RootInode, &totalSpace, &availSpace, &iused, &iavail)
		u.Uptime = int64(time.Since(start).Seconds())
		u.UsedSpace = int64(totalSpace - availSpace)
		u.UsedInodes = int64(iused)

		if err := sendUsage(u); err != nil {
			logger.Debugf("send usage: %s", err)
		}
		time.Sleep(time.Hour)
	}
}


================================================
FILE: pkg/usage/usage_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package usage

import (
	"encoding/json"
	"fmt"
	"io"
	"net"
	"net/http"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
)

// nolint:errcheck
func TestUsageReport(t *testing.T) {
	// invalid addr
	reportUrl = "http://127.0.0.1/report-usage"
	m := meta.NewClient("memkv://", nil)
	format := &meta.Format{
		Name:      "test",
		BlockSize: 4096,
		Capacity:  1 << 30,
		DirStats:  true,
	}
	_ = m.Init(format, true)
	go ReportUsage(m, "unittest")
	// wait for it to report to unavailable address, it should not panic.
	time.Sleep(time.Millisecond * 100)

	l, err := net.Listen("tcp", "127.0.0.1:0")
	if err != nil {
		t.Fatal(err)
	}
	defer l.Close()

	mux := http.NewServeMux()
	var u usage
	done := make(chan bool)
	mux.HandleFunc("/report-usage", func(rw http.ResponseWriter, r *http.Request) {
		d, _ := io.ReadAll(r.Body)
		_ = json.Unmarshal(d, &u)
		_, _ = rw.Write([]byte("OK"))
		done <- true
	})
	go http.Serve(l, mux)

	addr := l.Addr().String()
	reportUrl = fmt.Sprintf("http://%s/report-usage", addr)
	go ReportUsage(m, "unittest")

	deadline := time.NewTimer(time.Second * 3)
	select {
	case <-done:
		if u.MetaEngine != "memkv" {
			t.Fatalf("unexpected meta engine: %s", u.MetaEngine)
		}
		if u.Version != "unittest" {
			t.Fatalf("unexpected version: %s", u.Version)
		}
	case <-deadline.C:
		t.Fatalf("no report after 3 seconds")
	}
	time.Sleep(time.Millisecond * 100) // wait for the client to finish
}


================================================
FILE: pkg/utils/alloc.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"fmt"
	"math/bits"
	"runtime"
	"sync"
	"sync/atomic"
	"time"
)

var used int64

// Alloc returns size bytes memory from Go heap.
func Alloc(size int) []byte {
	b := Alloc0(size)
	atomic.AddInt64(&used, int64(cap(b)))
	return b
}

// Alloc returns size bytes memory from Go heap.
func Alloc0(size int) []byte {
	zeros := PowerOf2(size)
	b := *pools[zeros].Get().(*[]byte)
	if cap(b) < size {
		panic(fmt.Sprintf("%d < %d", cap(b), size))
	}
	return b[:size]
}

// Free returns memory to Go heap.
func Free(b []byte) {
	// buf could be zero length
	atomic.AddInt64(&used, -int64(cap(b)))
	Free0(b)
}

// Free returns memory to Go heap.
func Free0(b []byte) {
	// buf could be zero length
	pools[PowerOf2(cap(b))].Put(&b)
}

// AllocMemory returns the allocated memory
func AllocMemory() int64 {
	return atomic.LoadInt64(&used)
}

var pools []*sync.Pool

// PowerOf2 returns the smallest power of 2 that is >= s
func PowerOf2(s int) int {
	if s <= 0 {
		return 0
	}
	// Find position of the most significant bit (MSB)
	return bits.Len(uint(s - 1))
}

func init() {
	pools = make([]*sync.Pool, 34) // 1 - 8G
	for i := 0; i < 34; i++ {
		func(bits int) {
			pools[i] = &sync.Pool{
				New: func() interface{} {
					b := make([]byte, 1<<bits)
					return &b
				},
			}
		}(i)
	}
	go func() {
		for {
			time.Sleep(time.Minute * 10)
			runtime.GC()
		}
	}()
}


================================================
FILE: pkg/utils/alloc_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"testing"
)

func TestAlloc(t *testing.T) {
	old := AllocMemory()
	b := Alloc(10)
	if AllocMemory()-old != 16 {
		t.Fatalf("alloc 16 bytes, but got %d", AllocMemory()-old)
	}
	Free(b)
	if AllocMemory()-old != 0 {
		t.Fatalf("free all allocated memory, but got %d", AllocMemory()-old)
	}
}

func PowerOf2Loop(s int) int {
	var bits int
	var p int = 1
	for p < s {
		bits++
		p *= 2
	}
	return bits
}

func BenchmarkPowerOf2(b *testing.B) {
	b.Run("bits.Len", func(b *testing.B) {
		for i := 0; i < b.N; i++ {
			for j := 0; j < 100000; j++ {
				_ = PowerOf2(j)
			}
		}
	})

	b.Run("Loop", func(b *testing.B) {
		for i := 0; i < b.N; i++ {
			for j := 0; j < 100000; j++ {
				_ = PowerOf2Loop(j)
			}
		}
	})
}


================================================
FILE: pkg/utils/buffer.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"encoding/binary"
	"unsafe"
)

// Buffer is a buffer to read/write integers.
type Buffer struct {
	endian binary.ByteOrder
	off    int
	buf    []byte
}

// NewBuffer returns a buffer with sz number of bytes.
func NewBuffer(sz uint32) *Buffer {
	return FromBuffer(make([]byte, sz))
}

// ReadBuffer utility to create *Buffer from slice of bytes
func ReadBuffer(buf []byte) *Buffer {
	return FromBuffer(buf)
}

// FromBuffer utility to create *Buffer
func FromBuffer(buf []byte) *Buffer {
	return &Buffer{binary.BigEndian, 0, buf}
}

// Len returns length of buffer
func (b *Buffer) Len() int {
	return len(b.buf)
}

// HasMore checks if offset is less than length
func (b *Buffer) HasMore() bool {
	return b.off < len(b.buf)
}

// Left returns number of bytes after offset
func (b *Buffer) Left() int {
	return len(b.buf) - b.off
}

// Seek seeks or sets offset to `p`
func (b *Buffer) Seek(p int) {
	b.off = p
}

func (b *Buffer) Offset() int {
	return b.off
}

// Buffer returns
func (b *Buffer) Buffer() []byte {
	return b.buf[b.off:]
}

// Put8 appends uint8 to Buffer
func (b *Buffer) Put8(v uint8) {
	b.buf[b.off] = v
	b.off++
}

// Get8 returns uint8
func (b *Buffer) Get8() uint8 {
	v := b.buf[b.off]
	b.off++
	return v
}

// Put16 appends uint16 to Buffer
func (b *Buffer) Put16(v uint16) {
	b.endian.PutUint16(b.buf[b.off:b.off+2], v)
	b.off += 2
}

// Get16 returns uint16
func (b *Buffer) Get16() uint16 {
	v := b.endian.Uint16(b.buf[b.off : b.off+2])
	b.off += 2
	return v
}

// Put32 appends uint32 to Buffer
func (b *Buffer) Put32(v uint32) {
	b.endian.PutUint32(b.buf[b.off:b.off+4], v)
	b.off += 4
}

// Get32 returns uint32
func (b *Buffer) Get32() uint32 {
	v := b.endian.Uint32(b.buf[b.off : b.off+4])
	b.off += 4
	return v
}

// Put64 appends uint64 to Buffer
func (b *Buffer) Put64(v uint64) {
	b.endian.PutUint64(b.buf[b.off:b.off+8], v)
	b.off += 8
}

// Get64 returns uint64
func (b *Buffer) Get64() uint64 {
	v := b.endian.Uint64(b.buf[b.off : b.off+8])
	b.off += 8
	return v
}

// Put appends slice of byte to Buffer
func (b *Buffer) Put(v []byte) {
	l := len(v)
	copy(b.buf[b.off:b.off+l], v)
	b.off += l
}

// Get returns `l` bytes from offset
func (b *Buffer) Get(l int) []byte {
	b.off += l
	return b.buf[b.off-l : b.off]
}

// SetBytes initializes the Buffer with BigEndian ordering
func (b *Buffer) SetBytes(buf []byte) {
	b.endian = binary.BigEndian
	b.off = 0
	b.buf = buf
}

// Bytes returns the bytes
func (b *Buffer) Bytes() []byte {
	return b.buf
}

var NativeEndian binary.ByteOrder

// NewNativeBuffer utility to create *Buffer of given size with nativeEndian
func NewNativeBuffer(buf []byte) *Buffer {
	return &Buffer{NativeEndian, 0, buf}
}

func init() {
	buf := [2]byte{}
	*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)

	switch buf {
	case [2]byte{0xCD, 0xAB}:
		NativeEndian = binary.LittleEndian
	case [2]byte{0xAB, 0xCD}:
		NativeEndian = binary.BigEndian
	default:
		panic("Could not determine native endianness.")
	}
}


================================================
FILE: pkg/utils/buffer_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"fmt"
	"reflect"
	"testing"
)

func assertEqual(t *testing.T, a interface{}, b interface{}) {
	if reflect.DeepEqual(a, b) {
		return
	}
	message := fmt.Sprintf("%v != %v", a, b)
	t.Fatal(message)
}

func TestBuffer(t *testing.T) {
	b := NewBuffer(20)
	b.Put8(1)
	b.Put16(2)
	b.Put32(3)
	b.Put64(4)
	b.Put([]byte("hello"))
	assertEqual(t, b.Len(), 20)

	r := ReadBuffer(b.Bytes())
	assertEqual(t, r.Get8(), uint8(1))
	assertEqual(t, r.Get16(), uint16(2))
	assertEqual(t, r.Get32(), uint32(3))
	assertEqual(t, r.Get64(), uint64(4))
	assertEqual(t, r.HasMore(), true)
	assertEqual(t, r.Left(), 5)
	if len(r.Buffer()) != 5 {
		t.Fatal("rest buffer should be 5 bytes")
	}
	assertEqual(t, string(r.Get(5)), "hello")
	r.Seek(10)
	assertEqual(t, r.Left(), 10)
}

func TestSetBytes(t *testing.T) {
	var w Buffer
	w.SetBytes(make([]byte, 3))
	w.Put8(1)
	w.Put16(2)
	r := ReadBuffer(w.Bytes())
	assertEqual(t, r.Get8(), uint8(1))
	assertEqual(t, r.Get16(), uint16(2))
}

func TestNativeBuffer(t *testing.T) {
	b := NewNativeBuffer(make([]byte, 20))
	b.Put8(1)
	b.Put16(2)
	b.Put32(3)
	b.Put64(4)
	b.Put([]byte("hello"))

	r := NewNativeBuffer(b.Bytes())
	assertEqual(t, r.Get8(), uint8(1))
	assertEqual(t, r.Get16(), uint16(2))
	assertEqual(t, r.Get32(), uint32(3))
	assertEqual(t, r.Get64(), uint64(4))
	assertEqual(t, string(r.Get(5)), "hello")
}


================================================
FILE: pkg/utils/clock_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"log"
	"testing"
	"time"
)

func TestClock(t *testing.T) {
	now := Now()
	if time.Since(now).Microseconds() > 1000 {
		t.Fatal("time is not accurate")
	}
	c1 := Clock()
	c2 := Clock()
	if c2 < c1 {
		t.Fatalf("clock is not monotonic: %s > %s", c1, c2)
	}
}

func BenchmarkNow(b *testing.B) {
	var now time.Time
	for i := 0; i < b.N; i++ {
		now = Now()
	}
	log.Print(now)
}

func BenchmarkClock(b *testing.B) {
	var now time.Duration
	for i := 0; i < b.N; i++ {
		now = Clock()
	}
	log.Print(now)
}


================================================
FILE: pkg/utils/clock_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import "time"

var started = time.Now()

func Now() time.Time {
	return time.Now()
}

func Clock() time.Duration {
	return time.Since(started)
}


================================================
FILE: pkg/utils/clock_windows.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"syscall"
	"time"
	"unsafe"
)

type clock struct {
	t    time.Time
	tick time.Duration
}

var last *clock

func Now() time.Time {
	c := last
	return c.t.Add(Clock() - c.tick)
}

// Clock returns the number of milliseconds that have elapsed since the program
// was started.
var Clock func() time.Duration

func init() {
	QPCTimer := func() func() time.Duration {
		lib, _ := syscall.LoadLibrary("kernel32.dll")
		qpc, _ := syscall.GetProcAddress(lib, "QueryPerformanceCounter")
		qpf, _ := syscall.GetProcAddress(lib, "QueryPerformanceFrequency")
		if qpc == 0 || qpf == 0 {
			return nil
		}

		var freq, start uint64
		syscall.Syscall(qpf, 1, uintptr(unsafe.Pointer(&freq)), 0, 0)
		syscall.Syscall(qpc, 1, uintptr(unsafe.Pointer(&start)), 0, 0)
		if freq <= 0 {
			return nil
		}

		freqns := float64(freq) / 1e9
		return func() time.Duration {
			var now uint64
			syscall.Syscall(qpc, 1, uintptr(unsafe.Pointer(&now)), 0, 0)
			return time.Duration(float64(now-start) / freqns)
		}
	}
	if Clock = QPCTimer(); Clock == nil {
		// Fallback implementation
		start := time.Now()
		Clock = func() time.Duration { return time.Since(start) }
	}
	last = &clock{time.Now(), Clock()}
	go func() {
		for {
			last = &clock{time.Now(), Clock()}
			time.Sleep(time.Hour)
		}
	}()
}


================================================
FILE: pkg/utils/cond.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"sync"
	"time"
)

// Cond is similar to sync.Cond, but you can wait with a timeout.
type Cond struct {
	L      sync.Locker
	signal chan struct{}
}

// Signal wakes up a waiter.
// It's required for the caller to hold L.
func (c *Cond) Signal() {
	select {
	case c.signal <- struct{}{}:
	default:
	}
}

// Broadcast wake up all the waiters.
// It's required for the caller to hold L.
func (c *Cond) Broadcast() {
	close(c.signal)
	c.signal = make(chan struct{})
}

var timerPool = sync.Pool{}

// WaitWithTimeout wait for a signal or a period of timeout eclipsed.
// returns true in case of timeout else false
func (c *Cond) WaitWithTimeout(d time.Duration) bool {
	ch := c.signal
	c.L.Unlock()
	var t *time.Timer
	if e := timerPool.Get(); e == nil {
		t = time.NewTimer(d)
	} else {
		t = e.(*time.Timer)
		t.Reset(d)
	}
	defer func() {
		timerPool.Put(t)
		c.L.Lock()
	}()
	select {
	case <-ch:
		if !t.Stop() {
			<-t.C
		}
		return false
	case <-t.C:
		return true
	}
}

// NewCond creates a Cond.
func NewCond(lock sync.Locker) *Cond {
	return &Cond{lock, make(chan struct{})}
}


================================================
FILE: pkg/utils/cond_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"sync"
	"testing"
	"time"
)

func TestCond(t *testing.T) {
	// test Wait and Signal
	var m sync.Mutex
	c := NewCond(&m)
	var ready bool
	start := time.Now()
	go func() {
		for i := 0; i < 10; i++ {
			m.Lock()
			ready = true
			c.Signal()
			for ready {
				c.WaitWithTimeout(time.Millisecond * 100)
			}
			m.Unlock()
		}
	}()
	for i := 0; i < 10; i++ {
		m.Lock()
		for !ready {
			c.WaitWithTimeout(time.Millisecond * 100)
		}
		ready = false
		c.Signal()
		m.Unlock()
	}
	if ready {
		t.Fatalf("the work should finish with ready = false")
	}
	if time.Since(start) > time.Second {
		t.Fatalf("the work should finish in 1 second")
	}

	// test WaitWithTimeout
	done := make(chan bool)
	var timeout bool
	go func() {
		m.Lock()
		defer m.Unlock()
		timeout = c.WaitWithTimeout(time.Millisecond * 10)
		done <- true
	}()
	select {
	case <-done:
		if !timeout {
			t.Fatalf("it should timeout")
		}
	case <-time.NewTimer(time.Second).C:
		t.Fatalf("wait did not return after 1 second")
	}

	// test Broadcast to wake up all goroutines
	var N = 1000
	done2 := make(chan bool, N)
	var wg2 sync.WaitGroup
	for i := 0; i < N; i++ {
		wg2.Add(1)
		go func() {
			m.Lock()
			wg2.Done()
			timeout := c.WaitWithTimeout(time.Second)
			m.Unlock()
			done2 <- timeout
		}()
	}
	wg2.Wait()
	m.Lock()
	c.Broadcast()
	m.Unlock()
	deadline := time.NewTimer(time.Millisecond * 500)
	for i := 0; i < N; i++ {
		select {
		case timeout := <-done2:
			if timeout {
				t.Fatalf("cond should not timeout")
			}
		case <-deadline.C:
			t.Fatalf("not all goroutines wakeup in 500 ms; i %d", i)
		}
	}
}


================================================
FILE: pkg/utils/errors.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"errors"
	"syscall"
)

var (
	ENOTSUP        = errors.New("not supported")
	ErrFuncTimeout = errors.New("function timeout")
	ErrSkipped     = errors.New("skipped")
	ErrExtlink     = syscall.Errno(1000)
)


================================================
FILE: pkg/utils/general.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"math/rand"
	"time"
)

func SleepWithJitter(d time.Duration) {
	time.Sleep(JitterIt(d))
}

func JitterIt[T float64 | time.Duration](d T) T {
	j := int64(d / 20) // +- 5%
	return d + T(rand.Int63n(2*j+1)-j)
}


================================================
FILE: pkg/utils/humanize.go
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"errors"
	"strconv"

	"github.com/urfave/cli/v2"
)

func ParseBytes(ctx *cli.Context, key string, unit byte) uint64 {
	str := ctx.String(key)
	if len(str) == 0 {
		return 0
	}
	return ParseBytesStr(key, str, unit)
}

func ParseBytesStr(key, str string, unit byte) uint64 {
	s := str
	if c := s[len(s)-1]; c < '0' || c > '9' {
		unit = c
		s = s[:len(s)-1]
	}
	val, err := strconv.ParseFloat(s, 64)
	if err == nil {
		var shift int
		switch unit {
		case 'B':
		case 'k', 'K':
			shift = 10
		case 'm', 'M':
			shift = 20
		case 'g', 'G':
			shift = 30
		case 't', 'T':
			shift = 40
		case 'p', 'P':
			shift = 50
		case 'e', 'E':
			shift = 60
		default:
			err = errors.New("invalid unit")
		}
		val *= float64(uint64(1) << shift)
	}
	if err != nil {
		logger.Fatalf("Invalid value \"%s\" for \"%s\": %s", str, key, err)
	}
	return uint64(val)
}

func ParseMbps(ctx *cli.Context, key string) int64 {
	str := ctx.String(key)
	if len(str) == 0 {
		return 0
	}

	return ParseMbpsStr(key, str)
}

func ParseMbpsStr(key, str string) int64 {
	s := str
	var unit byte = 'M'
	if c := s[len(s)-1]; c < '0' || c > '9' {
		unit = c
		s = s[:len(s)-1]
	}
	val, err := strconv.ParseFloat(s, 64)
	if err == nil {
		switch unit {
		case 'm', 'M':
		case 'g', 'G':
			val *= 1e3
		case 't', 'T':
			val *= 1e6
		case 'p', 'P':
			val *= 1e9
		default:
			err = errors.New("invalid unit")
		}
	}
	if err != nil {
		logger.Fatalf("Invalid value \"%s\" for \"%s\"", str, key)
	}
	return int64(val)
}

func Mbps(val int64) string {
	v := float64(val)
	if v < 1e3 {
		return strconv.FormatFloat(v, 'f', 1, 64) + " Mbps"
	} else if v < 1e6 {
		return strconv.FormatFloat(v/1e3, 'f', 1, 64) + " Gbps"
	} else if v < 1e9 {
		return strconv.FormatFloat(v/1e6, 'f', 1, 64) + " Tbps"
	}
	return strconv.FormatFloat(v/1e9, 'f', 1, 64) + " Pbps"
}


================================================
FILE: pkg/utils/logger.go
================================================
// Copyright 2015 Ka-Hing Cheung
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package utils

import (
	"fmt"
	"io"
	"os"
	"path"
	"runtime"
	"strings"
	"sync"

	"github.com/sirupsen/logrus"
)

var mu sync.Mutex
var loggers = make(map[string]*logHandle)

var syslogHook logrus.Hook
var framePlaceHolder = runtime.Frame{Function: "???", File: "???", Line: 0}

type logHandle struct {
	logrus.Logger

	name     string
	logid    string
	pid      int
	lvl      *logrus.Level
	colorful bool
}

func (l *logHandle) Format(e *logrus.Entry) ([]byte, error) {
	lvl := e.Level
	if l.lvl != nil {
		lvl = *l.lvl
	}
	lvlStr := strings.ToUpper(lvl.String())
	if l.colorful {
		var color int
		switch lvl {
		case logrus.ErrorLevel, logrus.FatalLevel, logrus.PanicLevel:
			color = 31 // RED
		case logrus.WarnLevel:
			color = 33 // YELLOW
		case logrus.InfoLevel:
			color = 34 // BLUE
		default: // logrus.TraceLevel, logrus.DebugLevel
			color = 35 // MAGENTA
		}
		lvlStr = fmt.Sprintf("\033[1;%dm%s\033[0m", color, lvlStr)
	}
	const timeFormat = "2006/01/02 15:04:05.000000"
	caller := e.Caller
	if caller == nil { // for unknown reason, sometimes e.Caller is nil
		caller = &framePlaceHolder
	}
	str := fmt.Sprintf("%s%v %s[%d] <%v>: %v [%s@%s:%d]",
		l.logid,
		e.Time.Format(timeFormat),
		l.name,
		l.pid,
		lvlStr,
		strings.TrimRight(e.Message, "\n"),
		MethodName(caller.Function),
		path.Base(caller.File),
		caller.Line)

	if len(e.Data) != 0 {
		str += " " + fmt.Sprint(e.Data)
	}
	if !strings.HasSuffix(str, "\n") {
		str += "\n"
	}
	return []byte(str), nil
}

// Returns a human-readable method name, removing internal markers added by Go
func MethodName(fullFuncName string) string {
	firstSlash := strings.Index(fullFuncName, "/")
	if firstSlash != -1 && firstSlash < len(fullFuncName)-1 {
		fullFuncName = fullFuncName[firstSlash+1:]
	}
	lastDot := strings.LastIndex(fullFuncName, ".")
	if lastDot == -1 || lastDot == len(fullFuncName)-1 {
		return fullFuncName
	}
	method := fullFuncName[lastDot+1:]
	// avoid func1
	if strings.HasPrefix(method, "func") && method[4] >= '0' && method[4] <= '9' {
		candidate := MethodName(fullFuncName[:lastDot])
		if candidate != "" {
			method = candidate
		}
	}
	// avoid init.3
	if len(method) == 1 && method[0] >= '0' && method[0] <= '9' {
		candidate := MethodName(fullFuncName[:lastDot])
		if candidate != "" {
			method = candidate
		}
	}
	return method
}

// for aws.Logger
func (l *logHandle) Log(args ...interface{}) {
	l.Debugln(args...)
}

func newLogger(name string) *logHandle {
	l := &logHandle{Logger: *logrus.New(), name: name, pid: os.Getpid(), colorful: SupportANSIColor(os.Stderr.Fd())}
	l.Formatter = l
	if syslogHook != nil {
		l.AddHook(syslogHook)
	}
	l.SetReportCaller(true)
	return l
}

// GetLogger returns a logger mapped to `name`
func GetLogger(name string) *logHandle {
	mu.Lock()
	defer mu.Unlock()

	if logger, ok := loggers[name]; ok {
		return logger
	}
	logger := newLogger(name)
	loggers[name] = logger
	return logger
}

// SetLogLevel sets Level to all the loggers in the map
func SetLogLevel(lvl logrus.Level) {
	mu.Lock()
	defer mu.Unlock()
	for _, logger := range loggers {
		logger.Level = lvl
	}
}

func DisableLogColor() {
	mu.Lock()
	defer mu.Unlock()
	for _, logger := range loggers {
		logger.colorful = false
	}
}

func SetOutFile(name string) {
	file, err := os.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
	if err != nil {
		return
	}
	mu.Lock()
	defer mu.Unlock()
	for _, logger := range loggers {
		logger.SetOutput(file)
		logger.colorful = false
	}
}

func SetOutput(w io.Writer) {
	mu.Lock()
	defer mu.Unlock()
	for _, logger := range loggers {
		logger.SetOutput(w)
	}
}

func SetLogID(id string) {
	mu.Lock()
	defer mu.Unlock()
	for _, logger := range loggers {
		logger.logid = id
	}
}


================================================
FILE: pkg/utils/logger_syslog.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"fmt"
	"log/syslog"
	"os"
	"sync"

	"github.com/sirupsen/logrus"
	logrus_syslog "github.com/sirupsen/logrus/hooks/syslog"
)

type logLine struct {
	level logrus.Level
	msg   string
}

type SyslogHook struct {
	*logrus_syslog.SyslogHook
	buffer chan logLine
}

func (hook *SyslogHook) flush() {
	for l := range hook.buffer {
		line := l.msg
		var err error
		switch l.level {
		case logrus.PanicLevel:
			err = hook.Writer.Crit(line)
		case logrus.FatalLevel:
			err = hook.Writer.Crit(line)
		case logrus.ErrorLevel:
			err = hook.Writer.Err(line)
		case logrus.WarnLevel:
			err = hook.Writer.Warning(line)
		case logrus.InfoLevel:
			err = hook.Writer.Info(line)
		case logrus.DebugLevel:
			err = hook.Writer.Debug(line)
		}
		if err != nil {
			fmt.Fprintf(os.Stderr, "write to syslog: %v, level: %s, line: %s", err, l.level, line)
		}
	}
}

func (hook *SyslogHook) Fire(entry *logrus.Entry) error {
	line, err := entry.String()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Unable to read entry, %v", err)
		return err
	}

	select {
	case hook.buffer <- logLine{entry.Level, line[27:]}: // drop the timestamp
		return nil
	default:
		fmt.Fprintf(os.Stderr, "buffer of syslog is full, drop: %s", line)
		return fmt.Errorf("buffer is full")
	}
}

var once sync.Once

func InitLoggers(logToSyslog bool) {
	if logToSyslog {
		once.Do(func() {
			hook, err := logrus_syslog.NewSyslogHook("", "", syslog.LOG_DEBUG|syslog.LOG_USER, "")
			if err != nil {
				// println("Unable to connect to local syslog daemon")
				return
			}
			syslogHook = &SyslogHook{hook, make(chan logLine, 1024)}
			go syslogHook.(*SyslogHook).flush()

			for _, l := range loggers {
				l.AddHook(syslogHook)
			}
		})
	}
}


================================================
FILE: pkg/utils/logger_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"os"
	"strings"
	"testing"

	"github.com/sirupsen/logrus"
)

func TestLogger(t *testing.T) {
	_ = GetLogger("test")
	f, err := os.CreateTemp("", "test_logger")
	if err != nil {
		t.Fatalf("temp file: %s", err)
	}
	defer f.Close()
	SetOutFile("") // invalid
	SetOutFile(f.Name())
	InitLoggers(true)
	SetLogID("testid")

	SetLogLevel(logrus.TraceLevel)
	SetLogLevel(logrus.DebugLevel)
	SetLogLevel(logrus.InfoLevel)
	SetLogLevel(logrus.ErrorLevel)
	SetLogLevel(logrus.FatalLevel)
	SetLogLevel(logrus.WarnLevel)
	logger := GetLogger("test")
	logger.Info("info level")
	logger.Debug("debug level")
	logger.Warnf("warn level")
	logger.Error("error level")

	d, _ := os.ReadFile(f.Name())
	s := string(d)
	if strings.Contains(s, "info level") || strings.Contains(s, "debug level") {
		t.Fatalf("info/debug should not be logged: %s", s)
	} else if !strings.Contains(s, "warn level") || !strings.Contains(s, "error level") {
		t.Fatalf("warn/error should be logged: %s", s)
	} else if !strings.Contains(s, "testid") {
		t.Fatalf("logid \"testid\" should be logged: %s", s)
	}
}

func TestMethodName(t *testing.T) {
	type args struct {
		fullFuncName string
	}
	tests := []struct {
		name string
		args args
		want string
	}{{
		name: "main",
		args: args{
			fullFuncName: "cmd.Main",
		},
		want: "Main",
	}, {
		name: "nested method",
		args: args{
			fullFuncName: "github.com/juicedata/juicefs/cmd.watchdog.func1",
		},
		want: "watchdog",
	}, {
		name: "multiple inits",
		args: args{
			fullFuncName: "github.com/juicedata/juicefs/pkg/utils.init.3.func1",
		},
		want: "init",
	}}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			if got := MethodName(tt.args.fullFuncName); got != tt.want {
				t.Errorf("MethodName() = %v, want %v", got, tt.want)
			}
		})
	}
}


================================================
FILE: pkg/utils/logger_windows.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

func InitLoggers(logToSyslog bool) {}


================================================
FILE: pkg/utils/memusage.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"bytes"
	"os"
	"strconv"
	"syscall"
)

func MemoryUsage() (virt, rss uint64) {
	stat, err := os.ReadFile("/proc/self/stat")
	if err == nil {
		stats := bytes.Split(stat, []byte(" "))
		if len(stats) >= 24 {
			v, _ := strconv.ParseUint(string(stats[22]), 10, 64)
			r, _ := strconv.ParseUint(string(stats[23]), 10, 64)
			return v, r * 4096
		}
	}

	var ru syscall.Rusage
	err = syscall.Getrusage(syscall.RUSAGE_SELF, &ru)
	if err == nil {
		return uint64(ru.Maxrss), uint64(ru.Maxrss)
	}
	return
}


================================================
FILE: pkg/utils/memusage_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import "testing"

func TestMemUsage(t *testing.T) {
	virt, rss := MemoryUsage()
	if virt < (1<<20) || rss < (1<<20) || rss > (100<<20) {
		t.Fatalf("invalid memory usage: virt %d, rss %d", virt, rss)
	}
}


================================================
FILE: pkg/utils/memusage_windows.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"os"
	"syscall"
	"unsafe"

	"golang.org/x/sys/windows"
)

type PROCESS_MEMORY_COUNTERS struct {
	CB                         uint32
	PageFaultCount             uint32
	PeakWorkingSetSize         uint64
	WorkingSetSize             uint64
	QuotaPeakPagedPoolUsage    uint64
	QuotaPagedPoolUsage        uint64
	QuotaPeakNonPagedPoolUsage uint64
	QuotaNonPagedPoolUsage     uint64
	PagefileUsage              uint64
	PeakPagefileUsage          uint64
}

var (
	modpsapi                 = windows.NewLazySystemDLL("psapi.dll")
	procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo")
)

func getMemoryInfo(pid int32) (PROCESS_MEMORY_COUNTERS, error) {
	var mem PROCESS_MEMORY_COUNTERS
	c, err := windows.OpenProcess(windows.PROCESS_QUERY_LIMITED_INFORMATION, false, uint32(pid))
	if err != nil {
		return mem, err
	}
	defer windows.CloseHandle(c)
	if err := getProcessMemoryInfo(c, &mem); err != nil {
		return mem, err
	}

	return mem, err
}

func getProcessMemoryInfo(h windows.Handle, mem *PROCESS_MEMORY_COUNTERS) (err error) {
	r1, _, e1 := syscall.Syscall(procGetProcessMemoryInfo.Addr(), 3, uintptr(h), uintptr(unsafe.Pointer(mem)), uintptr(unsafe.Sizeof(*mem)))
	if r1 == 0 {
		if e1 != 0 {
			err = error(e1)
		} else {
			err = syscall.EINVAL
		}
	}
	return
}

func MemoryUsage() (virt, rss uint64) {
	c, err := getMemoryInfo(int32(os.Getpid()))
	if err == nil {
		return c.PeakWorkingSetSize, c.WorkingSetSize
	}
	return 0, 0
}


================================================
FILE: pkg/utils/proc_title.go
================================================
//go:build !nogspt
// +build !nogspt

/*
 * JuiceFS, Copyright 2026 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"strings"

	"github.com/erikdubbelboer/gspt"
)

func SetProcTitle(args []string) {
	gspt.SetProcTitle(strings.Join(args, " "))
}


================================================
FILE: pkg/utils/proc_title_noop.go
================================================
//go:build nogspt
// +build nogspt

/*
 * JuiceFS, Copyright 2026 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

func SetProcTitle(args []string) {
	// noop: gspt is excluded from this build to prevent argv modification
	// when libjfs.so is loaded as a shared library (e.g. by the Java SDK).
}


================================================
FILE: pkg/utils/progress.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"fmt"
	"os"
	"sync/atomic"
	"time"

	"github.com/mattn/go-isatty"
	"github.com/vbauerster/mpb/v7"
	"github.com/vbauerster/mpb/v7/decor"
)

type Progress struct {
	*mpb.Progress
	Quiet bool
	bars  []*mpb.Bar
}

type Bar struct {
	total int64
	*mpb.Bar
}

func (b *Bar) IncrTotal(n int64) {
	total := atomic.AddInt64(&b.total, n)
	b.Bar.SetTotal(total, false)
}

func (b *Bar) SetTotal(total int64) {
	atomic.StoreInt64(&b.total, total)
	b.Bar.SetTotal(total, false)
}

func (b *Bar) GetTotal() int64 {
	return atomic.LoadInt64(&b.total)
}

func (b *Bar) Done() {
	b.Bar.SetTotal(0, true)
}

type DoubleSpinner struct {
	count *mpb.Bar
	bytes *mpb.Bar
}

func (s *DoubleSpinner) IncrInt64(size int64) {
	s.count.Increment()
	s.bytes.IncrInt64(size)
}

func (s *DoubleSpinner) Done() {
	s.count.SetTotal(0, true)
	s.bytes.SetTotal(0, true)
}

func (s *DoubleSpinner) Current() (int64, int64) {
	return s.count.Current(), s.bytes.Current()
}

func (s *DoubleSpinner) SetCurrent(count, bytes int64) {
	s.count.SetCurrent(count)
	s.bytes.SetCurrent(bytes)
}

func NewProgress(quiet bool) *Progress {
	var p *Progress
	if quiet || os.Getenv("DISPLAY_PROGRESSBAR") == "false" || !isatty.IsTerminal(os.Stdout.Fd()) {
		p = &Progress{mpb.New(mpb.WithWidth(64), mpb.WithOutput(nil)), true, nil}
	} else {
		p = &Progress{mpb.New(mpb.WithWidth(64)), false, nil}
		if isatty.IsTerminal(os.Stderr.Fd()) {
			SetOutput(p)
		}
	}
	return p
}

func (p *Progress) AddCountBar(name string, total int64) *Bar {
	startTime := time.Now()
	var speedMsg, usedMsg string
	b := p.Progress.AddBar(0, // disable triggerComplete
		mpb.PrependDecorators(
			decor.Name(name+": ", decor.WCSyncWidth),
			decor.CountersNoUnit("%d/%d"),
		),
		mpb.AppendDecorators(
			decor.OnComplete(decor.AverageSpeed(0, " %.1f/s", decor.WCSyncWidthR), ""),
			decor.Any(func(s decor.Statistics) string {
				if s.Completed && speedMsg == "" {
					speed := float64(s.Current) / time.Since(startTime).Seconds()
					speedMsg = fmt.Sprintf(" %.1f/s", speed)
				}
				return speedMsg
			}, decor.WCSyncWidthR),
			decor.OnComplete(decor.Name(" ETA: ", decor.WCSyncWidthR), ""),
			decor.OnComplete(
				decor.AverageETA(decor.ET_STYLE_GO, decor.WCSyncWidthR), "",
			),
			decor.Any(func(s decor.Statistics) string {
				if s.Completed && usedMsg == "" {
					usedMsg = " used: " + (time.Since(startTime)).String()
				}
				return usedMsg
			}, decor.WCSyncWidthR),
		),
	)
	b.SetTotal(total, false)
	p.bars = append(p.bars, b)
	return &Bar{Bar: b, total: total}
}

func newSpinner() mpb.BarFiller {
	spinnerStyle := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"}
	for i, s := range spinnerStyle {
		spinnerStyle[i] = "\033[1;32m" + s + "\033[0m"
	}
	return mpb.NewBarFiller(mpb.SpinnerStyle(spinnerStyle...))
}

func (p *Progress) AddCountSpinner(name string) *Bar {
	decors := []decor.Decorator{
		decor.Name(name+": ", decor.WCSyncWidth),
		decor.Merge(decor.CurrentNoUnit("%d", decor.WCSyncSpaceR), decor.WCSyncSpaceR),
	}
	decors = append(decors, decor.AverageSpeed(0, "  %.1f/s", decor.WCSyncSpaceR))
	b := p.Progress.Add(0, newSpinner(),
		mpb.PrependDecorators(decors...),
		mpb.BarFillerClearOnComplete(),
	)
	p.bars = append(p.bars, b)
	return &Bar{Bar: b}
}

func (p *Progress) AddByteSpinner(name string) *Bar {
	decors := []decor.Decorator{
		decor.Name(name+": ", decor.WCSyncWidth),
		decor.CurrentKibiByte("% .1f", decor.WCSyncSpaceR),
		decor.CurrentNoUnit("(%d Bytes)", decor.WCSyncSpaceR),
	}
	// FIXME: maybe use EWMA speed
	decors = append(decors, decor.AverageSpeed(decor.UnitKiB, "  % .1f", decor.WCSyncSpaceR))
	b := p.Progress.Add(0, newSpinner(),
		mpb.PrependDecorators(decors...),
		mpb.BarFillerClearOnComplete(),
	)
	p.bars = append(p.bars, b)
	return &Bar{Bar: b}
}

func (p *Progress) AddIoSpeedBar(name string, total int64) *Bar {
	b := p.Progress.Add(0,
		mpb.NewBarFiller(mpb.BarStyle()),
		mpb.PrependDecorators(
			decor.Name(name+": ", decor.WCSyncWidth),
			decor.CountersKibiByte("% .1f / % .1f"),
		),
		mpb.AppendDecorators(
			decor.OnComplete(decor.Percentage(decor.WC{W: 5}), "done"),
			decor.OnComplete(
				decor.AverageETA(decor.ET_STYLE_GO, decor.WC{W: 6}), "",
			),
		),
	)
	b.SetTotal(total, false)
	p.bars = append(p.bars, b)
	return &Bar{Bar: b}
}

func (p *Progress) AddDoubleSpinner(name string) *DoubleSpinner {
	return &DoubleSpinner{
		p.AddCountSpinner(name).Bar,
		p.AddByteSpinner(name).Bar,
	}
}

func (p *Progress) AddDoubleSpinnerTwo(countName, sizeName string) *DoubleSpinner {
	return &DoubleSpinner{
		p.AddCountSpinner(countName).Bar,
		p.AddByteSpinner(sizeName).Bar,
	}
}

func (p *Progress) Done() {
	for _, b := range p.bars {
		if !b.Completed() {
			b.SetTotal(0, true)
		}
	}
	p.Progress.Wait()
	SetOutput(os.Stderr)
}

func MockProgress() (*Progress, *Bar) {
	progress := NewProgress(true)
	bar := progress.AddCountBar("Mock", 0)
	return progress, bar
}


================================================
FILE: pkg/utils/progress_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"testing"
	"time"
)

func TestProgresBar(t *testing.T) {
	p := NewProgress(true)
	bar := p.AddCountBar("Bar", 0)
	cp := p.AddCountSpinner("Spinner")
	bp := p.AddByteSpinner("Spinner")
	bar.SetTotal(50)
	for i := 0; i < 100; i++ {
		time.Sleep(time.Millisecond)
		bar.Increment()
		if i%2 == 0 {
			bar.IncrTotal(1)
			cp.Increment()
			bp.IncrInt64(1024)
		}
	}
	bar.Done()
	p.Done()
	if bar.Current() != 100 || cp.Current() != 50 || bp.Current() != 50*1024 {
		t.Fatalf("Final values: bar %d, count %d, bytes: %d", bar.Current(), cp.Current(), bp.Current())
	}

	p = NewProgress(true)
	dp := p.AddDoubleSpinner("Spinner")
	go func() {
		for i := 0; i < 100; i++ {
			time.Sleep(time.Millisecond)
			dp.IncrInt64(1024)
		}
		dp.Done()
	}()
	p.Wait()
	if c, b := dp.Current(); c != 100 || b != 102400 {
		t.Fatalf("Final values: count %d, bytes %d", c, b)
	}
}


================================================
FILE: pkg/utils/rusage.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import "syscall"

type Rusage struct {
	syscall.Rusage
}

// GetUtime returns the user time in seconds.
func (ru *Rusage) GetUtime() float64 {
	return float64(ru.Utime.Sec) + float64(ru.Utime.Usec)/1e6
}

// GetStime returns the system time in seconds.
func (ru *Rusage) GetStime() float64 {
	return float64(ru.Stime.Sec) + float64(ru.Stime.Usec)/1e6
}

// GetRusage returns CPU usage of current process.
func GetRusage() *Rusage {
	var ru syscall.Rusage
	_ = syscall.Getrusage(syscall.RUSAGE_SELF, &ru)
	return &Rusage{ru}
}


================================================
FILE: pkg/utils/rusage_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"testing"
	"time"
)

func TestRUsage(t *testing.T) {
	//u := GetRusage()
	var s string
	for i := 0; i < 1000; i++ {
		s += time.Now().String()
	}
	// don't optimize the loop
	if len(s) < 10 {
		panic("unreachable")
	}
	_ = GetRusage()
	// cancelled due to high machine load
	//if u2.GetUtime()-u.GetUtime() < 0.0001 {
	//	t.Fatalf("invalid utime: %f", u2.GetStime()-u.GetStime())
	//}
}


================================================
FILE: pkg/utils/rusage_windows.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import "golang.org/x/sys/windows"

type Rusage struct {
	kernel windows.Filetime
	user   windows.Filetime
}

func (ru *Rusage) GetUtime() float64 {
	return float64((int64(ru.user.HighDateTime)<<32)+int64(ru.user.LowDateTime)) / 10 / 1e6
}

func (ru *Rusage) GetStime() float64 {
	return float64((int64(ru.kernel.HighDateTime)<<32)+int64(ru.kernel.LowDateTime)) / 10 / 1e6
}

func GetRusage() *Rusage {
	h := windows.CurrentProcess()
	var creation, exit, kernel, user windows.Filetime
	err := windows.GetProcessTimes(h, &creation, &exit, &kernel, &user)
	if err == nil {
		return &Rusage{kernel, user}
	}
	return nil
}


================================================
FILE: pkg/utils/utils.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"context"
	"crypto/rand"
	"fmt"
	"mime"
	"net"
	"os"
	"os/user"
	"path"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/mattn/go-isatty"
)

// Exists checks if the file/folder in given path exists
func Exists(path string) bool {
	_, err := os.Stat(path)
	return err == nil || !os.IsNotExist(err) //skip mutate
}

// SplitDir splits a path with default path list separator or comma.
func SplitDir(d string) []string {
	dd := strings.Split(d, string(os.PathListSeparator))
	if len(dd) == 1 {
		dd = strings.Split(dd[0], ",")
	}
	return dd
}

// GetLocalIp get the local ip used to access remote address.
func GetLocalIp(address string) (string, error) {
	conn, err := net.Dial("udp", address)
	if err != nil {
		return "", err
	}
	ip, _, err := net.SplitHostPort(conn.LocalAddr().String())
	if err != nil {
		return "", err
	}
	return ip, nil
}

func FindLocalIPs(allowedInterfaces ...string) ([]net.IP, error) {
	ifaces, err := net.Interfaces()
	if err != nil {
		return nil, err
	}

	// Build a set of allowed interface names for fast lookup
	allowedSet := make(map[string]bool)
	for _, name := range allowedInterfaces {
		allowedSet[name] = true
	}
	checkAllowed := len(allowedSet) > 0

	var ips []net.IP
	for _, iface := range ifaces {
		if iface.Flags&net.FlagUp == 0 {
			continue // interface down
		}
		if iface.Flags&net.FlagLoopback != 0 {
			continue // loopback interface
		}
		// Filter by interface name if allowedInterfaces is specified
		if checkAllowed && !allowedSet[iface.Name] {
			continue
		}
		addrs, err := iface.Addrs()
		if err != nil {
			continue
		}
		for _, addr := range addrs {
			var ip net.IP
			switch v := addr.(type) {
			case *net.IPNet:
				ip = v.IP
			case *net.IPAddr:
				ip = v.IP
			}
			if len(ip) > 0 && !ip.IsLoopback() {
				ips = append(ips, ip)
			}
		}
	}
	return ips, nil
}

func WithTimeout(pCtx context.Context, f func(context.Context) error, timeout time.Duration) error {
	var done = make(chan int, 1)
	var t = time.NewTimer(timeout)
	var err error
	ctx, cancel := context.WithCancel(pCtx)
	go func() {
		err = f(ctx)
		done <- 1
	}()
	select {
	case <-ctx.Done():
		err = ctx.Err()
		t.Stop()
	case <-done:
		t.Stop()
	case <-t.C:
		err = fmt.Errorf("timeout after %s: %w", timeout, ErrFuncTimeout)
	}
	cancel()
	return err
}

func RemovePassword(uri string) string {
	p := strings.LastIndex(uri, "@")
	if p < 0 {
		return uri
	}
	sp := strings.Index(uri, "://") + 3
	if sp == 2 {
		sp = 0
	}
	cp := strings.Index(uri[sp:], ":")
	if cp < 0 || sp+cp > p {
		return uri
	}
	return uri[:sp+cp] + ":****" + uri[p:]
}

func GuessMimeType(key string) string {
	mimeType := mime.TypeByExtension(path.Ext(key))
	if !strings.ContainsRune(mimeType, '/') {
		mimeType = "application/octet-stream"
	}
	return mimeType
}

func StringContains(s []string, e string) bool {
	for _, item := range s {
		if item == e {
			return true
		}
	}
	return false
}

func FormatBytes(n uint64) string {
	if n < 1024 {
		return fmt.Sprintf("%d Bytes", n)
	}
	units := []string{"K", "M", "G", "T", "P", "E"}
	m := n
	i := 0
	for ; i < len(units)-1 && m >= 1<<20; i++ {
		m = m >> 10
	}
	return fmt.Sprintf("%.2f %siB (%d Bytes)", float64(m)/1024.0, units[i], n)
}

func SupportANSIColor(fd uintptr) bool {
	return isatty.IsTerminal(fd) && runtime.GOOS != "windows"
}

func RandRead(buf []byte) {
	if _, err := rand.Read(buf); err != nil {
		logger.Fatalf("Generate random content: %s", err)
	}
}

var uids = make(map[int]string)
var gids = make(map[int]string)
var users = make(map[string]int)
var groups = make(map[string]int)
var mutex sync.Mutex

var logger = GetLogger("juicefs")

func UserName(uid int) string {
	mutex.Lock()
	defer mutex.Unlock()
	name, ok := uids[uid]
	if !ok {
		if u, err := user.LookupId(strconv.Itoa(uid)); err == nil {
			name = u.Username
		} else {
			logger.Warnf("lookup uid %d: %s", uid, err)
			name = strconv.Itoa(uid)
		}
		uids[uid] = name
	}
	return name
}

func GroupName(gid int) string {
	mutex.Lock()
	defer mutex.Unlock()
	name, ok := gids[gid]
	if !ok {
		if g, err := user.LookupGroupId(strconv.Itoa(gid)); err == nil {
			name = g.Name
		} else {
			logger.Warnf("lookup gid %d: %s", gid, err)
			name = strconv.Itoa(gid)
		}
		gids[gid] = name
	}
	return name
}

func LookupUser(name string) int {
	mutex.Lock()
	defer mutex.Unlock()
	if u, ok := users[name]; ok {
		return u
	}
	var uid = -1
	if u, err := user.Lookup(name); err == nil {
		uid, _ = strconv.Atoi(u.Uid)
	} else {
		if g, e := strconv.Atoi(name); e == nil {
			uid = g
		} else {
			logger.Warnf("lookup user %s: %s", name, err)
		}
	}
	users[name] = uid
	return uid
}

func LookupGroup(name string) int {
	mutex.Lock()
	defer mutex.Unlock()
	if u, ok := groups[name]; ok {
		return u
	}
	var gid = -1
	if u, err := user.LookupGroup(name); err == nil {
		gid, _ = strconv.Atoi(u.Gid)
	} else {
		if g, e := strconv.Atoi(name); e == nil {
			gid = g
		} else {
			logger.Warnf("lookup group %s: %s", name, err)
		}
	}
	groups[name] = gid
	return gid
}

func Duration(s string) time.Duration {
	if s == "" {
		return 0
	}
	v, err := strconv.ParseFloat(s, 64)
	if err == nil {
		return time.Microsecond * time.Duration(v*1e6)
	}

	err = nil
	var d time.Duration
	p := strings.Index(s, "d")
	if p >= 0 {
		v, err = strconv.ParseFloat(s[:p], 64)
	}
	if err == nil && s[p+1:] != "" {
		d, err = time.ParseDuration(s[p+1:])
	}

	if err != nil {
		logger.Warnf("Invalid duration value: %s, setting it to 0", s)
		return 0
	}
	return d + time.Hour*time.Duration(v*24)
}


================================================
FILE: pkg/utils/utils_darwin.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"fmt"
	"os/exec"
)

func GetKernelVersion() (major, minor int) { return }

func GetSysInfo() string {
	var (
		kernel    string
		osVersion []byte
		hardware  []byte
	)

	kernel, _ = GetKernelInfo()

	osVersion, _ = exec.Command("sw_vers").Output()

	hardware, _ = exec.Command("system_profiler", "SPMemoryDataType", "SPStorageDataType").Output()

	return fmt.Sprintf(`
Kernel: 
%s
OS: 
%s
Hardware: 
%s`, kernel, string(osVersion), string(hardware))
}

func SetIOFlusher() {}

func DisableTHP() {}

func AdjustOOMKiller(score int) {}


================================================
FILE: pkg/utils/utils_linux.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"errors"
	"fmt"
	"os"
	"os/exec"
	"strconv"
	"strings"
	"syscall"

	"golang.org/x/sys/unix"
)

func GetKernelVersion() (major, minor int) {
	var uname syscall.Utsname
	if err := syscall.Uname(&uname); err == nil {
		buf := make([]byte, 0, 65) // Utsname.Release [65]int8
		for _, v := range uname.Release {
			if v == 0x00 {
				break
			}
			buf = append(buf, byte(v))
		}
		ps := strings.SplitN(string(buf), ".", 3)
		if len(ps) < 2 {
			return
		}
		if major, err = strconv.Atoi(ps[0]); err != nil {
			return
		}
		minor, _ = strconv.Atoi(ps[1])
	}
	return
}

func GetSysInfo() string {
	var (
		kernel    []byte
		osVersion []byte
		err       error
	)

	kernel, _ = exec.Command("cat", "/proc/version").Output()

	if osVersion, err = exec.Command("lsb_release", "-a").Output(); err != nil {
		osVersion, _ = exec.Command("cat", "/etc/os-release").Output()
	}

	return fmt.Sprintf(`
Kernel: 
%s
OS: 
%s`, kernel, osVersion)
}

func SetIOFlusher() {
	err := unix.Prctl(unix.PR_SET_IO_FLUSHER, 1, 0, 0, 0)
	if errors.Is(err, unix.EPERM) {
		logger.Warn("CAP_SYS_RESOURCE is needed for PR_SET_IO_FLUSHER")
	} else if errors.Is(err, unix.EINVAL) {
		logger.Info("PR_SET_IO_FLUSHER, which is introduced by Linux 5.6, is not supported by the running kernel")
	}
}

// Disable transparent huge page
func DisableTHP() {
	for {
		err := unix.Prctl(unix.PR_SET_THP_DISABLE, 1, 0, 0, 0)
		if err == nil {
			logger.Info("Disabled transparent hugepage")
			break
		}

		if errors.Is(err, unix.EINTR) {
			continue
		} else {
			logger.Warnf("Failed to disable transparent huge page: %s", err)
			return
		}
	}
}

// AdjustOOMKiller: change oom_score_adj to avoid OOM-killer
func AdjustOOMKiller(score int) {
	if os.Getuid() != 0 {
		return
	}
	f, err := os.OpenFile("/proc/self/oom_score_adj", os.O_WRONLY, 0666)
	if err != nil {
		if !os.IsNotExist(err) {
			println(err)
		}
		return
	}
	defer f.Close()
	_, err = f.WriteString(strconv.Itoa(score))
	if err != nil {
		println("adjust OOM score:", err)
	}
}


================================================
FILE: pkg/utils/utils_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"context"
	"strings"
	"testing"
	"time"
)

// mutate_test_job_number: 2
// checksum 9cb13bb28aa7918edaf4f0f4ca92eea5
// checksum 05debda2840d31bac0ab5c20c5510591
func TestMin(t *testing.T) {
	assertEqual(t, min(1, 2), 1)
	assertEqual(t, min(-1, -2), -2)
	assertEqual(t, min(0, 0), 0)
}

func TestExists(t *testing.T) {
	assertEqual(t, Exists("/"), true)
	assertEqual(t, Exists("/not_exist_path"), false)
}

func TestSplitDir(t *testing.T) {
	assertEqual(t, SplitDir("/a:/b"), []string{"/a", "/b"})
	assertEqual(t, SplitDir("a,/b"), []string{"a", "/b"})
	assertEqual(t, SplitDir("/a;b"), []string{"/a;b"})
	assertEqual(t, SplitDir("a/b"), []string{"a/b"})
}

func TestGetInode(t *testing.T) {
	_, err := GetFileInode("")
	if err == nil {
		t.Fatalf("invalid path should fail")
	}
	ino, err := GetFileInode("/")
	if err != nil {
		t.Fatalf("get file inode: %s", err)
	} else if ino > 2 {
		t.Fatalf("inode of root should be 1/2, but got %d", ino)
	}
}

func TestLocalIp(t *testing.T) {
	_, err := GetLocalIp("127.0.0.1")
	if err == nil {
		t.Fatalf("should fail with invalid address")
	}
	ip, err := GetLocalIp("127.0.0.1:22")
	if err != nil {
		t.Fatalf("get local ip: %s", err)
	}
	if ip != "127.0.0.1" {
		t.Fatalf("local ip should be 127.0.0.1, bug got %s", ip)
	}
}

func TestFindLocalIPs(t *testing.T) {
	// Test without interface filter (should return all IPs)
	ips, err := FindLocalIPs()
	if err != nil {
		t.Fatalf("FindLocalIPs failed: %s", err)
	}
	if len(ips) == 0 {
		t.Logf("Warning: No network interfaces found (this might be expected in some environments)")
	}

	// Test with non-existent interface filter (should return no IPs)
	ips, err = FindLocalIPs("nonexistent_interface_12345")
	if err != nil {
		t.Fatalf("FindLocalIPs with filter failed: %s", err)
	}
	if len(ips) != 0 {
		t.Fatalf("Expected 0 IPs with non-existent interface, got %d", len(ips))
	}

	// Test with multiple interface filters
	ips, err = FindLocalIPs("eth0", "en0", "lo0")
	if err != nil {
		t.Fatalf("FindLocalIPs with multiple filters failed: %s", err)
	}
	// We don't assert length here since it depends on the system
	t.Logf("Found %d IPs with eth0/en0/lo0 filter", len(ips))
}

func TestTimeout(t *testing.T) {
	err := WithTimeout(context.TODO(), func(context.Context) error {
		return nil
	}, time.Millisecond*10)
	if err != nil {
		t.Fatalf("fast function should return nil")
	}
	err = WithTimeout(context.TODO(), func(context.Context) error {
		time.Sleep(time.Millisecond * 100)
		return nil
	}, time.Millisecond*10)
	if err == nil || !strings.HasPrefix(err.Error(), "timeout after") {
		t.Fatalf("slow function should  be timeout: %s", err)
	}
}

func TestRemovePassword(t *testing.T) {
	testCase := []struct {
		uri      string
		expected string
	}{
		{"redis://:password@localhost:6379/0",
			"redis://:****@localhost:6379/0",
		},
		{"redis://:pass@word@localhost:6379/0",
			"redis://:****@localhost:6379/0",
		},
		{":password@localhost:6379/0",
			":****@localhost:6379/0",
		},
		{"oss://ak:sk@zhijian-test2.oss-cn-hangzhou.aliyuncs.com",
			"oss://ak:****@zhijian-test2.oss-cn-hangzhou.aliyuncs.com",
		},
		{"/tmp/file",
			"/tmp/file",
		},
		{"file:///tmp/file",
			"file:///tmp/file",
		},
		{"sftp:///tmp/file",
			"sftp:///tmp/file",
		},
	}
	for _, tc := range testCase {
		assertEqual(t, RemovePassword(tc.uri), tc.expected)
	}
}


================================================
FILE: pkg/utils/utils_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"os"
	"os/exec"
	"strconv"
	"strings"
	"syscall"

	"golang.org/x/sys/unix"
)

func GetCurrentUID() int {
	return os.Getuid()
}

func GetCurrentGID() int {
	return os.Getgid()
}

func GetCurrentUserSIDStr() string {
	return ""
}

func GetCurrentUserGroupSIDStr() string {
	return ""
}

func IsWinAdminOrElevatedPrivilege() bool {
	return false
}

func GetFileInode(path string) (uint64, error) {
	fi, err := os.Stat(path)
	if err != nil {
		return 0, err
	}
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return sst.Ino, nil
	}
	return 0, nil
}

func GetFileInodeNotFollow(path string) (uint64, error) {
	fi, err := os.Lstat(path)
	if err != nil {
		return 0, err
	}
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return sst.Ino, nil
	}
	return 0, nil
}

func GetDev(fpath string) int { // ID of device containing file
	fi, err := os.Stat(fpath)
	if err != nil {
		return -1
	}
	if sst, ok := fi.Sys().(*syscall.Stat_t); ok {
		return int(sst.Dev)
	}
	return -1
}

func GetKernelInfo() (string, error) {
	kernel, err := exec.Command("uname", "-a").Output()
	if err != nil {
		return "", err
	}

	// Ignore hostname information
	tmp := strings.Split(string(kernel), " ")
	result := strings.Join(append(tmp[:1], tmp[2:]...), " ")
	return result, nil
}

func GetUmask() int {
	umask := syscall.Umask(0)
	syscall.Umask(umask)
	return umask
}

func SetUmask(umask int) int {
	return syscall.Umask(umask)
}

func ErrnoName(err syscall.Errno) string {
	errName := unix.ErrnoName(err)
	if errName == "" {
		errName = strconv.Itoa(int(err))
	}
	return errName
}


================================================
FILE: pkg/utils/utils_windows.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package utils

import (
	"fmt"
	"os/exec"
	"strconv"
	"syscall"

	"github.com/juicedata/juicefs/pkg/win"
	"golang.org/x/sys/windows"
)

func GetCurrentUID() int {
	return win.GetCurrentUID()
}

func GetCurrentGID() int {
	return win.GetCurrentGID()
}

func GetCurrentUserSIDStr() string {
	sid, err := win.GetCurrentUserSID()
	if err != nil {
		logger.Warnf("failed to get sid for current user, %s", err)
		return ""
	}

	return fmt.Sprintf("%s (%s)", sid.String(), win.GetSidName(sid, true))
}

func GetCurrentUserGroupSIDStr() string {
	sid, err := win.GetCurrentUserPrimaryGroupSID()
	if err != nil {
		logger.Warnf("failed to get sid for current user, %s", err)
		return ""
	}

	return fmt.Sprintf("%s (%s)", sid.String(), win.GetSidName(sid, true))
}

func IsWinAdminOrElevatedPrivilege() bool {
	uid := GetCurrentUID()
	if uid == win.AdministratorUIDFromFUSE {
		return true
	}
	elevated, err := win.IsProcessElevated()
	if err != nil {
		logger.Warnf("failed to determine if process is elevated, %s", err)
		return false
	}
	return elevated
}

func getFileInode(path string, follow bool) (uint64, error) {
	pathU16, err := windows.UTF16PtrFromString(path)
	if err != nil {
		return 0, err
	}
	var flagsAndAttributes uint32 = windows.FILE_FLAG_BACKUP_SEMANTICS
	if !follow {
		flagsAndAttributes |= windows.FILE_FLAG_OPEN_REPARSE_POINT
	}
	fd, err := windows.CreateFile(pathU16, windows.GENERIC_READ, windows.FILE_SHARE_READ, nil, windows.OPEN_EXISTING, flagsAndAttributes, 0)
	if err != nil {
		return 0, err
	}
	defer windows.Close(fd)
	var data windows.ByHandleFileInformation
	err = windows.GetFileInformationByHandle(fd, &data)
	if err != nil {
		return 0, err
	}
	return uint64(data.FileIndexHigh)<<32 + uint64(data.FileIndexLow), nil
}

func GetFileInode(path string) (uint64, error) {
	return getFileInode(path, true)
}

func GetFileInodeNotFollow(path string) (uint64, error) {
	return getFileInode(path, false)
}

func GetKernelVersion() (major, minor int) { return }

func GetDev(fpath string) int { return -1 }

func GetSysInfo() string {
	sysInfo, _ := exec.Command("systeminfo").Output()
	return string(sysInfo)
}

func GetUmask() int { return 0 }

func SetUmask(umask int) int {
	return 0
}

func ErrnoName(err syscall.Errno) string {
	return strconv.Itoa(int(err))
}


================================================
FILE: pkg/version/.gitattributes
================================================
version.go export-subst


================================================
FILE: pkg/version/version.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Reference: https://semver.org; NOT strictly followed.
package version

import (
	"fmt"
	"strconv"
	"strings"
)

var (
	revision     = "$Format:%h$" // value is assigned in Makefile
	revisionDate = "$Format:%as$"
	ver          = Semver{
		major:      1,
		minor:      4,
		patch:      0,
		preRelease: "dev",
		build:      fmt.Sprintf("%s.%s", revisionDate, revision),
	}
)

type Semver struct {
	major, minor, patch uint64
	preRelease, build   string
}

func (s *Semver) String() string {
	pr := s.preRelease
	if pr != "" {
		pr = "-" + pr
	}
	if strings.Contains(s.build, "Format") {
		s.build = "unknown"
	}
	return fmt.Sprintf("%d.%d.%d%s+%s", s.major, s.minor, s.patch, pr, s.build)
}

func Version() string {
	return ver.String()
}

func SetVersion(v string) {
	ver = *Parse(v)
}

func GetVersion() Semver {
	return ver
}

func CompareVersions(v1, v2 *Semver) (int, error) {
	if v1 == nil || v2 == nil {
		return 0, fmt.Errorf("v1 %v and v2 %v can't be nil", v1, v2)
	}
	var less bool
	if v1.major != v2.major {
		less = v1.major < v2.major
	} else if v1.minor != v2.minor {
		less = v1.minor < v2.minor
	} else if v1.patch != v2.patch {
		less = v1.patch < v2.patch
	} else if v1.preRelease != v2.preRelease {
		less = v1.preRelease < v2.preRelease
		if v1.preRelease == "" || v2.preRelease == "" {
			less = !less
		}
	} else {
		return 0, nil
	}
	if less {
		return -1, nil
	} else {
		return 1, nil
	}
}

func Parse(vs string) *Semver {
	if p := strings.Index(vs, "+"); p > 0 {
		vs = vs[:p] // ignore build information
	}
	var v Semver
	if p := strings.Index(vs, "-"); p > 0 {
		v.preRelease = vs[p+1:]
		vs = vs[:p]
	}

	ps := strings.Split(vs, ".")
	if len(ps) > 3 {
		return nil
	}
	var err error
	if v.major, err = strconv.ParseUint(ps[0], 10, 64); err != nil {
		return nil
	}
	if len(ps) > 1 {
		if v.minor, err = strconv.ParseUint(ps[1], 10, 64); err != nil {
			return nil
		}
	}
	if len(ps) > 2 {
		if v.patch, err = strconv.ParseUint(ps[2], 10, 64); err != nil {
			return nil
		}
	}
	return &v
}


================================================
FILE: pkg/version/version_test.go
================================================
/*
 * JuiceFS, Copyright 2022 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package version

import "testing"

func TestVersion(t *testing.T) {
	ver = Semver{
		major: 1,
		minor: 0,
		patch: 0,
		build: "2022-02-22.f4692af9",
	}
	if v := Version(); v != "1.0.0+2022-02-22.f4692af9" {
		t.Fatalf("Version %s != expected 1.0.0+2022-02-22.f4692af9", v)
	}
	if _, err := CompareVersions(&ver, Parse("")); err == nil {
		t.Fatalf("Expect failed to parse empty string")
	}
	if _, err := CompareVersions(&ver, Parse("0.1.2.3")); err == nil {
		t.Fatalf("Expect failed to parse string \"0.1.2.3\"")
	}

	cases := []struct {
		vs     string
		expect int
	}{
		{"0.9+foo.bar", 1},
		{"0.9.10", 1},
		{"1.0-beta+baz", 1},
		{"1", 0},
		{"1.1", -1},
		{"2.0.0-alpha", -1},
	}
	for _, c := range cases {
		if r, _ := CompareVersions(&ver, Parse(c.vs)); r != c.expect {
			t.Fatalf("Failed case: %+v", c)
		}
	}

	ver.preRelease = "beta"
	if v := Version(); v != "1.0.0-beta+2022-02-22.f4692af9" {
		t.Fatalf("Version %s != expected 1.0.0-beta+2022-02-22.f4692af9", v)
	}
	cases[2].expect = 0
	cases[3].expect = -1
	for _, c := range cases {
		if r, _ := CompareVersions(&ver, Parse(c.vs)); r != c.expect {
			t.Fatalf("Failed case: %+v", c)
		}
	}
}


================================================
FILE: pkg/vfs/accesslog.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"fmt"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
)

var (
	opsDurationsHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "fuse_ops_durations_histogram_seconds",
		Help:    "Operations latency distributions.",
		Buckets: prometheus.ExponentialBuckets(0.00001, 1.8, 29), // should cover range of `objectReqsHistogram`
	})
	opsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
		Name: "fuse_ops_total",
		Help: "Total number of operations.",
	}, []string{"method"})
	opsDurations = prometheus.NewCounterVec(prometheus.CounterOpts{
		Name: "fuse_ops_durations_seconds",
		Help: "Operations latency in seconds.",
	}, []string{"method"})
	opsIOErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
		Name: "fuse_ops_io_errors",
		Help: "Number of IO errors.",
	}, []string{"errno"})
)

type logReader struct {
	sync.Mutex
	buffer chan []byte
	last   []byte
}

var (
	readerLock sync.RWMutex
	readers    map[uint64]*logReader
)

func init() {
	readers = make(map[uint64]*logReader)
}

func logit(ctx Context, method string, err syscall.Errno, format string, args ...interface{}) {
	used := ctx.Duration()
	opsDurationsHistogram.Observe(used.Seconds())
	opsTotal.WithLabelValues(method).Inc()
	opsDurations.WithLabelValues(method).Add(used.Seconds())
	if err != 0 {
		opsIOErrors.WithLabelValues(utils.ErrnoName(err)).Inc()
	}
	readerLock.RLock()
	defer readerLock.RUnlock()
	if len(readers) == 0 && used < time.Second*10 {
		return
	}
	for i, a := range args {
		switch v := a.(type) {
		case string:
			if !strconv.CanBackquote(v) {
				args[i] = strings.Trim(strconv.Quote(v), "\"")
			}
		}
	}
	cmd := fmt.Sprintf(method+" "+format, args...)
	t := utils.Now()
	ts := t.Format("2006.01.02 15:04:05.000000")
	cmd += fmt.Sprintf(" - %s <%.6f>", strerr(err), used.Seconds())
	if ctx.Pid() != 0 && used >= time.Second*10 {
		logger.Infof("slow operation: %s", cmd)
	}
	line := []byte(fmt.Sprintf("%s [uid:%d,gid:%d,pid:%d] %s\n", ts, ctx.Uid(), ctx.Gid(), ctx.Pid(), cmd))

	for _, r := range readers {
		select {
		case r.buffer <- line:
		default:
		}
	}
}

func openAccessLog(fh uint64) uint64 {
	readerLock.Lock()
	defer readerLock.Unlock()
	readers[fh] = &logReader{buffer: make(chan []byte, 10240)}
	return fh
}

func closeAccessLog(fh uint64) {
	readerLock.Lock()
	defer readerLock.Unlock()
	delete(readers, fh)
}

func readAccessLog(fh uint64, buf []byte) int {
	readerLock.RLock()
	r, ok := readers[fh]
	readerLock.RUnlock()
	if !ok {
		return 0
	}
	r.Lock()
	defer r.Unlock()
	var n int
	if len(r.last) > 0 {
		n = copy(buf, r.last)
		r.last = r.last[n:]
	}
	var t = time.NewTimer(time.Second)
	defer t.Stop()
	for n < len(buf) {
		select {
		case line := <-r.buffer:
			l := copy(buf[n:], line)
			n += l
			if l < len(line) {
				r.last = line[l:]
			}
		case <-t.C:
			if n == 0 {
				n = copy(buf, "#\n")
			}
			return n
		}
	}
	return n
}


================================================
FILE: pkg/vfs/accesslog_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
)

func TestAccessLog(t *testing.T) {
	openAccessLog(1)
	defer closeAccessLog(1)

	ctx := NewLogContext(meta.NewContext(10, 1, []uint32{2}))
	logit(ctx, "method", 0, "test")

	n := readAccessLog(2, nil)
	if n != 0 {
		t.Fatalf("invalid fd")
	}

	now := time.Now()
	// partial read
	buf := make([]byte, 1024)
	n = readAccessLog(1, buf[:10])
	if n != 10 {
		t.Fatalf("partial read: %d", n)
	}
	if time.Since(now) > time.Millisecond*10 {
		t.Fatalf("should not block")
	}

	// read whole line, block for 1 second
	n = readAccessLog(1, buf[10:])
	if n != 66 {
		t.Fatalf("partial read: %d", n)
	}
	logs := string(buf[:10+n])

	// check format
	ts, err := time.Parse("2006.01.02 15:04:05.000000", logs[:26])
	if err != nil {
		t.Fatalf("invalid time %s: %s", logs, err)
	}
	if now.Sub(ts.Local()) > time.Millisecond*10 {
		t.Fatalf("stale time: %s now: %s", ts, time.Now())
	}
	if logs[26:len(logs)-4] != " [uid:1,gid:2,pid:10] method test - OK <0.0000" {
		t.Fatalf("unexpected log: %q", logs[26:])
	}

	// block read
	n = readAccessLog(1, buf)
	if n != 2 || string(buf[:2]) != "#\n" {
		t.Fatalf("expected line: %q", string(buf[:n]))
	}
}


================================================
FILE: pkg/vfs/backup.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"compress/gzip"
	"context"
	"errors"
	"io"
	"os"
	"path/filepath"
	"runtime"
	"sort"
	"strings"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	osync "github.com/juicedata/juicefs/pkg/sync"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
)

var (
	LastBackupTimeG = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "last_successful_backup",
		Help: "Last successful backup.",
	})
	LastBackupDurationG = prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "last_backup_duration",
		Help: "Last backup duration.",
	})
)

// Backup metadata periodically in the object storage
func Backup(m meta.Meta, blob object.ObjectStorage, interval time.Duration, skipTrash bool) {
	ctx := meta.Background()
	key := "lastBackup"
	for {
		utils.SleepWithJitter(interval / 10)
		var value []byte
		if st := m.GetXattr(ctx, 0, key, &value); st != 0 && st != meta.ENOATTR {
			logger.Warnf("getxattr inode 1 key %s: %s", key, st)
			continue
		}
		var last time.Time
		var err error
		if len(value) > 0 {
			last, err = time.Parse(time.RFC3339, string(value))
		}
		if err != nil {
			logger.Warnf("parse time value %s: %s", value, err)
			continue
		}
		if now := time.Now(); now.Sub(last) >= interval {
			var iused, dummy uint64
			_ = m.StatFS(ctx, meta.RootInode, &dummy, &dummy, &iused, &dummy)
			if interval <= time.Hour {
				if iused > 1e6 {
					logger.Warnf("backup metadata skipped because of too many inodes: %d %s; "+
						"you may increase `--backup-meta` to enable it again", iused, interval)
					continue
				}
			}
			if st := m.SetXattr(ctx, 0, key, []byte(now.Format(time.RFC3339)), meta.XattrCreateOrReplace); st != 0 {
				logger.Warnf("setxattr inode 1 key %s: %s", key, st)
				continue
			}
			if iused >= 1e5 {
				logger.Infof("backup metadata started, inodes=%d", iused)
			}
			if fpath, err := backup(m, blob, now, iused < 1e5, skipTrash); err == nil {
				go cleanupBackups(blob, now) // only cleanup on success
				LastBackupTimeG.Set(float64(now.UnixNano()) / 1e9)
				logger.Infof("backup metadata succeed, fast mode: %v, path: %q, used %s", iused < 1e5, fpath, time.Since(now))
			} else {
				logger.Warnf("backup metadata failed: %s", err)
			}
			LastBackupDurationG.Set(time.Since(now).Seconds())
		} else {
			LastBackupDurationG.Set(0)
		}
	}
}

func backup(m meta.Meta, blob object.ObjectStorage, now time.Time, fast, skipTrash bool) (string, error) {
	name := "dump-" + now.UTC().Format("2006-01-02-150405") + ".json.gz"
	localDir := os.TempDir()
	if !strings.HasSuffix(localDir, "/") {
		localDir += "/"
	}
	fp, err := os.Create(filepath.Join(localDir, "meta", name))
	if errors.Is(err, syscall.ENOENT) || (errors.Is(err, syscall.ENOTDIR) && runtime.GOOS == "windows") {
		if err = os.MkdirAll(filepath.Join(localDir, "meta"), 0755); err != nil {
			return "", err
		}
		fp, err = os.Create(filepath.Join(localDir, "meta", name))
	}
	if err != nil {
		return "", err
	}
	defer os.Remove(fp.Name())
	defer fp.Close()
	zw, _ := gzip.NewWriterLevel(fp, gzip.BestSpeed)
	var threads = 2
	if m.Name() == "tikv" {
		threads = 10
	}
	err = m.DumpMeta(zw, 0, threads, false, fast, skipTrash) // force dump the whole tree
	_ = zw.Close()
	if err != nil {
		return "", err
	}
	size, err := fp.Seek(0, io.SeekCurrent)
	if err != nil {
		return "", err
	}

	fpath := "meta/" + name
	disk, err := object.CreateStorage("file", localDir, "", "", "")
	if err != nil {
		return "", err
	}
	osync.InitForCopyData()
	_, err = osync.CopyData(disk, blob, fpath, size, true)
	return blob.String() + fpath, err
}

func cleanupBackups(blob object.ObjectStorage, now time.Time) {
	blob = object.WithPrefix(blob, "meta/")
	ch, err := object.ListAll(context.TODO(), blob, "", "", true, false)
	if err != nil {
		logger.Warnf("listAll prefix meta/: %s", err)
		return
	}
	var objs []string
	for o := range ch {
		if o == nil {
			logger.Warnf("list failed, skip cleanup")
			return
		}
		if !o.IsDir() {
			objs = append(objs, o.Key())
		}
	}

	toDel := rotate(objs, now)
	for _, o := range toDel {
		if err = blob.Delete(context.Background(), o); err != nil {
			logger.Warnf("delete object %s: %s", o, err)
		}
	}
}

// Cleanup policy:
// 1. keep all backups within 2 days
// 2. keep one backup each day within 2 weeks
// 3. keep one backup each week within 2 months
// 4. keep one backup each month within 2 years
// 5. delete backups older than 2 years
func rotate(objs []string, now time.Time) []string {
	var days = 2
	cutoff := now.UTC().AddDate(-2, 0, 0)
	edge := now.UTC().AddDate(0, 0, -days)
	next := func() {
		if days < 14 {
			days++
			edge = edge.AddDate(0, 0, -1)
		} else if days < 60 {
			days += 7
			edge = edge.AddDate(0, 0, -7)
		} else {
			days += 30
			edge = edge.AddDate(0, 0, -30)
		}
	}

	var toDel, within []string
	sort.Strings(objs)
	for i := len(objs) - 1; i >= 0; i-- {
		if len(objs[i]) != 30 { // len("dump-2006-01-02-150405.json.gz")
			logger.Warnf("bad object for metadata backup %s: length %d", objs[i], len(objs[i]))
			continue
		}
		ts, err := time.Parse("2006-01-02-150405", objs[i][5:22])
		if err != nil {
			logger.Warnf("bad object for metadata backup %s: %s", objs[i], err)
			continue
		}

		if ts.Before(cutoff) {
			toDel = append(toDel, objs[:i+1]...)
			break
		}

		if ts.Before(edge) {
			if l := len(within); l > 0 { // keep the earliest one
				toDel = append(toDel, within[:l-1]...)
				within = within[:0]
			}
			for next(); ts.Before(edge); next() {
			}
			within = append(within, objs[i])
		} else if days > 2 {
			within = append(within, objs[i])
		}
	}
	if l := len(within); l > 0 {
		toDel = append(toDel, within[:l-1]...)
	}
	return toDel
}


================================================
FILE: pkg/vfs/backup_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"context"
	"testing"
	"time"

	"github.com/juicedata/juicefs/pkg/object"
)

func TestRotate(t *testing.T) {
	format := func(ts time.Time) string {
		return "dump-" + ts.UTC().Format("2006-01-02-150405") + ".json.gz"
	}

	now := time.Now()
	objs := make([]string, 0, 25)
	for cursor, i := now.AddDate(0, 0, -100), 0; i <= 200; i++ { // one backup for every half day
		objs = append(objs, format(cursor))
		toDel := rotate(objs, cursor)
		for _, d := range toDel {
			for j, k := range objs {
				if k == d {
					objs = append(objs[:j], objs[j+1:]...)
					break
				}
			}
		}
		cursor = cursor.Add(time.Duration(12) * time.Hour)
	}

	expect := make([]string, 0, 25)
	expect = append(expect, format(now.AddDate(0, 0, -100)))
	for days := 65; days > 14; days -= 7 {
		expect = append(expect, format(now.AddDate(0, 0, -days)))
	}
	for days := 13; days > 2; days-- {
		expect = append(expect, format(now.AddDate(0, 0, -days)))
	}
	for i := 4; i >= 0; i-- {
		expect = append(expect, format(now.Add(time.Duration(-i*12)*time.Hour)))
	}

	if len(objs) != len(expect) {
		t.Fatalf("length of objs %d != length of expect %d", len(objs), len(expect))
	}
	for i, o := range objs {
		if o != expect[i] {
			t.Fatalf("obj %s != expect %s", o, expect[i])
		}
	}
}

func TestBackup(t *testing.T) {
	v, blob := createTestVFS(nil, "")
	go Backup(v.Meta, blob, time.Millisecond*100, false)
	time.Sleep(time.Millisecond * 100)

	blob = object.WithPrefix(blob, "meta/")
	kc, _ := object.ListAll(context.TODO(), blob, "", "", true, false)
	var keys []string
	for obj := range kc {
		keys = append(keys, obj.Key())
	}
	if len(keys) < 1 {
		t.Fatalf("there should be at least 1 backup file")
	}
}


================================================
FILE: pkg/vfs/compact.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"context"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
)

var (
	compactSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "compact_size_histogram_bytes",
		Help:    "Distribution of size of compacted data in bytes.",
		Buckets: prometheus.ExponentialBuckets(1024, 2, 16),
	})
)

func readSlice(store chunk.ChunkStore, s *meta.Slice, page *chunk.Page, off int) error {
	buf := page.Data
	read := 0
	reader := store.NewReader(s.Id, int(s.Size))
	for read < len(buf) {
		p := page.Slice(read, len(buf)-read)
		n, err := reader.ReadAt(context.Background(), p, off+int(s.Off))
		p.Release()
		if n == 0 && err != nil {
			return err
		}
		read += n
		off += n
	}
	return nil
}

func Compact(conf chunk.Config, store chunk.ChunkStore, slices []meta.Slice, id uint64) error {
	for utils.AllocMemory()-store.UsedMemory() > int64(conf.BufferSize)*3/2 {
		time.Sleep(time.Millisecond * 100)
	}
	var size uint32
	for _, s := range slices {
		size += s.Len
	}
	compactSizeHistogram.Observe(float64(size))
	logger.Debugf("compact %d slices (%d bytes) to new slice %d", len(slices), size, id)

	writer := store.NewWriter(id)
	writer.SetWriteback(false)

	var pos int
	for i, s := range slices {
		if s.Id == 0 {
			_, err := writer.WriteAt(make([]byte, int(s.Len)), int64(pos))
			if err != nil {
				writer.Abort()
				return err
			}
			pos += int(s.Len)
			continue
		}
		var read int
		for read < int(s.Len) {
			l := min(conf.BlockSize, int(s.Len)-read)
			p := chunk.NewOffPage(l)
			if err := readSlice(store, &slices[i], p, read); err != nil {
				logger.Debugf("can't compact to slice %d, retry later, read %d: %s", id, i, err)
				p.Release()
				writer.Abort()
				return err
			}
			_, err := writer.WriteAt(p.Data, int64(pos+read))
			p.Release()
			if err != nil {
				logger.Errorf("can't compact to slice %d, retry later, write: %s", id, err)
				writer.Abort()
				return err
			}
			read += l
			if pos+read >= conf.BlockSize {
				if err = writer.FlushTo(pos + read); err != nil {
					panic(err)
				}
			}
		}
		pos += int(s.Len)
	}
	err := writer.Finish(pos)
	if err != nil {
		writer.Abort()
	}
	return err
}


================================================
FILE: pkg/vfs/compact_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"context"
	"testing"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
)

func TestCompact(t *testing.T) {
	cconf := chunk.Config{
		BlockSize:   256 * 1024,
		Compress:    "lz4",
		MaxUpload:   2,
		MaxDownload: 200,
		BufferSize:  30 << 20,
		CacheSize:   10 << 20,
		CacheDir:    "memory",
	}
	blob, _ := object.CreateStorage("mem", "", "", "", "")
	store := chunk.NewCachedStore(blob, cconf, nil)

	// prepare the slices
	var slices []meta.Slice
	var total int
	for i := 0; i < 100; i++ {
		buf := make([]byte, 100+i*100)
		for j := range buf {
			buf[j] = byte(i)
		}
		cid := uint64(i)
		w := store.NewWriter(cid)
		if n, e := w.WriteAt(buf, 0); e != nil {
			t.Fatalf("write chunk %d: %s", cid, e)
		} else {
			total += n
		}
		if e := w.Finish(len(buf)); e != nil {
			t.Fatalf("flush chunk %d: %s", cid, e)
		}
		slices = append(slices, meta.Slice{Id: cid, Size: uint32(len(buf)), Len: uint32(len(buf))})
	}

	// compact
	var cid uint64 = 1000
	err := Compact(cconf, store, slices, cid)
	if err != nil {
		t.Fatalf("compact %d slices : %s", len(slices), err)
	}

	// verify result
	r := store.NewReader(cid, total)
	var off int
	for i := 0; i < 100; i++ {
		buf := make([]byte, 100+i*100)
		page := chunk.NewPage(buf)
		n, err := r.ReadAt(context.Background(), page, off)
		if err != nil {
			t.Fatalf("read chunk %d at %d: %s", cid, off, err)
		} else if n != len(buf) {
			t.Fatalf("short read: %d", n)
		}
		for j := range buf {
			if buf[j] != byte(i) {
				t.Fatalf("invalid byte at %d: %d !=%d", j, buf[j], i)
			}
		}
		off += len(buf)
		defer page.Release()
	}

	// failed
	_ = store.Remove(1, 200)
	err = Compact(cconf, store, slices, cid)
	if err == nil {
		t.Fatalf("compact should fail with read but got nil")
	}

	// TODO: inject write failure
}


================================================
FILE: pkg/vfs/fill.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"fmt"
	"path"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"

	"github.com/juicedata/juicefs/pkg/meta"
)

type _file struct {
	ino  Ino
	size uint64
}

type CacheAction uint8

func (act CacheAction) String() string {
	switch act {
	case WarmupCache:
		return "warmup cache"
	case EvictCache:
		return "evict cache"
	case CheckCache:
		return "check cache"
	}
	return "unknown operation"
}

const (
	WarmupCache CacheAction = iota
	EvictCache
	CheckCache = 2
)

type CacheFiller struct {
	conf  *Config
	meta  meta.Meta
	store chunk.ChunkStore
}

func NewCacheFiller(conf *Config, meta meta.Meta, store chunk.ChunkStore) *CacheFiller {
	return &CacheFiller{
		conf:  conf,
		meta:  meta,
		store: store,
	}
}

type token struct{}

func (c *CacheFiller) cacheFile(ctx meta.Context, action CacheAction, resp *CacheResponse, concurrent chan token, wg *sync.WaitGroup, f _file) {
	concurrent <- token{}
	wg.Add(1)
	go func() {
		defer func() {
			<-concurrent
			wg.Done()
		}()

		if f.ino == 0 {
			logger.Warnf("%s got inode 0", action)
			return
		}

		var handler sliceHandler
		switch action {
		case WarmupCache:
			handler = func(s meta.Slice) error {
				return c.store.FillCache(s.Id, s.Size)
			}

			if c.conf.Meta.OpenCache > 0 {
				if err := c.meta.Open(ctx, f.ino, syscall.O_RDONLY, &meta.Attr{}); err != 0 {
					logger.Errorf("Inode %d could be opened: %s", f.ino, err)
				}
				_ = c.meta.Close(ctx, f.ino)
			}
		case EvictCache:
			handler = func(s meta.Slice) error {
				return c.store.EvictCache(s.Id, s.Size)
			}
		case CheckCache:
			blockHandler := func(exists bool, loc string, size int) {
				if exists {
					resp.Lock()
					resp.Locations[loc] += uint64(size)
					resp.Unlock()
				} else {
					atomic.AddUint64(&resp.MissBytes, uint64(size))
				}
			}
			handler = func(s meta.Slice) error {
				return c.store.CheckCache(s.Id, s.Size, blockHandler)
			}
		}

		iter := newSliceIterator(ctx, c.meta, f.ino, f.size, resp)
		err := iter.Iterate(handler, concurrent)
		if err != nil {
			logger.Errorf("%s error : %s", action, err)
		}

		atomic.AddUint64(&resp.FileCount, 1)
	}()
}

func (c *CacheFiller) Cache(ctx meta.Context, action CacheAction, paths []string, threads int, resp *CacheResponse) {
	if resp == nil {
		resp = &CacheResponse{Locations: make(map[string]uint64)}
	}
	start := time.Now()
	todo := make(chan _file, 20*threads)

	concurrent := make(chan token, threads)
	wg := sync.WaitGroup{}
	wg.Add(1)
	go func() {
		defer wg.Done()
		for f := range todo {
			if ctx.Canceled() {
				return
			}
			c.cacheFile(ctx, action, resp, concurrent, &wg, f)
		}
	}()

	var inode Ino
	var attr = &Attr{}
	for _, p := range paths {
		if st := c.resolve(ctx, p, &inode, attr); st != 0 {
			logger.Warnf("Failed to resolve path %s: %s", p, st)
			continue
		}
		logger.Debugf("path %s", p)
		if attr.Typ == meta.TypeDirectory {
			c.walkDir(ctx, inode, todo)
		} else if attr.Typ == meta.TypeFile {
			_ = sendFile(ctx, todo, _file{inode, attr.Length})
		}
		if ctx.Canceled() {
			break
		}
	}
	close(todo)
	wg.Wait()

	if ctx.Canceled() {
		logger.Infof("%s cancelled", action)
	}
	logger.Infof("%s %d paths in %s", action, len(paths), time.Since(start))
}

func sendFile(ctx meta.Context, todo chan _file, f _file) error {
	select {
	case todo <- f:
		return nil
	case <-ctx.Done():
		return ctx.Err()
	}
}

func (c *CacheFiller) resolve(ctx meta.Context, p string, inode *Ino, attr *Attr) syscall.Errno {
	var inodePrefix = "inode:"
	if strings.HasPrefix(p, inodePrefix) {
		i, err := strconv.ParseUint(p[len(inodePrefix):], 10, 64)
		if err == nil {
			*inode = meta.Ino(i)
			return c.meta.GetAttr(ctx, meta.Ino(i), attr)
		}
	}
	p = strings.Trim(p, "/")
	err := c.meta.Resolve(ctx, 1, p, inode, attr)
	if err != syscall.ENOTSUP {
		return err
	}

	// Fallback to the default implementation that calls `meta.Lookup` for each directory along the path.
	// It might be slower for deep directories, but it works for every meta that implements `Lookup`.
	parent := Ino(1)
	ss := strings.Split(p, "/")
	for i, name := range ss {
		if len(name) == 0 {
			continue
		}
		if parent == meta.RootInode && i == len(ss)-1 && IsSpecialName(name) {
			*inode, attr = GetInternalNodeByName(name)
			parent = *inode
			break
		}
		if i > 0 {
			if err = c.meta.Access(ctx, parent, MODE_MASK_R|MODE_MASK_X, attr); err != 0 {
				return err
			}
		}
		if err = c.meta.Lookup(ctx, parent, name, inode, attr, false); err != 0 {
			return err
		}
		if attr.Typ == meta.TypeSymlink {
			var buf []byte
			if err = c.meta.ReadLink(ctx, *inode, &buf); err != 0 {
				return err
			}
			target := string(buf)
			if strings.HasPrefix(target, "/") || strings.Contains(target, "://") {
				return syscall.ENOTSUP
			}
			target = path.Join(strings.Join(ss[:i], "/"), target)
			if err = c.resolve(ctx, target, inode, attr); err != 0 {
				return err
			}
		}
		parent = *inode
	}
	if parent == meta.RootInode {
		*inode = parent
		if err = c.meta.GetAttr(ctx, *inode, attr); err != 0 {
			return err
		}
	}
	return 0
}

func (c *CacheFiller) walkDir(ctx meta.Context, inode Ino, todo chan _file) {
	pending := make([]Ino, 1)
	pending[0] = inode
	for len(pending) > 0 {
		l := len(pending)
		l--
		inode = pending[l]
		pending = pending[:l]
		var entries []*meta.Entry
		r := c.meta.Readdir(ctx, inode, 1, &entries)
		if r == 0 {
			for _, f := range entries {
				name := string(f.Name)
				if name == "." || name == ".." {
					continue
				}
				if f.Attr.Typ == meta.TypeDirectory {
					pending = append(pending, f.Inode)
				} else if f.Attr.Typ != meta.TypeSymlink {
					_ = sendFile(ctx, todo, _file{f.Inode, f.Attr.Length})
				}
				if ctx.Canceled() {
					return
				}
			}
		} else {
			logger.Warnf("readdir %d: %s", inode, r)
		}
	}
}

type sliceIterator struct {
	ctx      meta.Context
	mClient  meta.Meta
	ino      Ino
	chunkCnt uint32
	stat     *CacheResponse

	err            error
	nextChunkIndex uint32
	nextSliceIndex uint64
	slices         []meta.Slice
}

type sliceHandler func(s meta.Slice) error

func (iter *sliceIterator) hasNext() bool {
	if iter.err != nil {
		logger.Error(iter.err)
		iter.err = nil
	}

	if iter.ctx.Canceled() {
		iter.err = iter.ctx.Err()
		return false
	}

	for iter.nextSliceIndex >= uint64(len(iter.slices)) {
		if iter.nextChunkIndex >= iter.chunkCnt {
			return false
		}

		iter.slices = nil
		iter.nextSliceIndex = 0
		if st := iter.mClient.Read(iter.ctx, iter.ino, iter.nextChunkIndex, &iter.slices); st != 0 {
			iter.err = fmt.Errorf("get slices of inode %d index %d error: %d", iter.ino, iter.nextChunkIndex, st)
			logger.Error(iter.err)
			return false
		}
		iter.nextChunkIndex++
	}

	return true
}

func (iter *sliceIterator) next() meta.Slice {
	s := iter.slices[iter.nextSliceIndex]
	iter.nextSliceIndex++
	return s
}

func (iter *sliceIterator) Iterate(handler sliceHandler, concurrent chan token) error {
	if handler == nil {
		return fmt.Errorf("handler not set")
	}
	var wg sync.WaitGroup
	for iter.hasNext() {
		s := iter.next()
		if s.Id == 0 {
			continue
		}
		atomic.AddUint64(&iter.stat.SliceCount, 1)
		atomic.AddUint64(&iter.stat.TotalBytes, uint64(s.Size))

		select {
		case concurrent <- token{}:
			wg.Add(1)
			go func() {
				defer func() {
					<-concurrent
					wg.Done()
				}()
				if err := handler(s); err != nil {
					iter.err = fmt.Errorf("inode %d slice %d : %w", iter.ino, s.Id, err)
				}
			}()
		default:
			if err := handler(s); err != nil {
				iter.err = fmt.Errorf("inode %d slice %d : %w", iter.ino, s.Id, err)
			}
		}
	}
	wg.Wait()
	return iter.err
}

func newSliceIterator(ctx meta.Context, mClient meta.Meta, ino Ino, size uint64, stat *CacheResponse) *sliceIterator {
	return &sliceIterator{
		ctx:     ctx,
		mClient: mClient,
		ino:     ino,
		stat:    stat,

		nextSliceIndex: 0,
		nextChunkIndex: 0,
		chunkCnt:       uint32((size + meta.ChunkSize - 1) / meta.ChunkSize),
	}
}


================================================
FILE: pkg/vfs/fill_test.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"os"
	"testing"

	"github.com/juicedata/juicefs/pkg/meta"
)

func TestFill(t *testing.T) {
	v, _ := createTestVFS(nil, "")
	ctx := NewLogContext(meta.Background())
	entry, _ := v.Mkdir(ctx, 1, "test", 0777, 022)
	fe, fh, _ := v.Create(ctx, entry.Inode, "file", 0644, 0, uint32(os.O_WRONLY))
	_ = v.Write(ctx, fe.Inode, []byte("hello"), 0, fh)
	_ = v.Flush(ctx, fe.Inode, fh, 0)
	v.Release(ctx, fe.Inode, fh)
	_, _ = v.Symlink(ctx, "test/file", 1, "sym")
	_, _ = v.Symlink(ctx, "/tmp/testfile", 1, "sym2")
	_, _ = v.Symlink(ctx, "testfile", 1, "sym3")

	// normal cases
	v.cacheFiller.Cache(meta.Background(), WarmupCache, []string{"/test/file", "/test", "/sym", "/"}, 2, nil)

	// remove chunk
	var slices []meta.Slice
	_ = v.Meta.Read(meta.Background(), fe.Inode, 0, &slices)
	for _, s := range slices {
		_ = v.Store.Remove(s.Id, int(s.Size))
	}
	// bad cases
	v.cacheFiller.Cache(meta.Background(), WarmupCache, []string{"/test/file", "/sym2", "/sym3", "/.stats", "/not_exists"}, 2, nil)
}


================================================
FILE: pkg/vfs/handle.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"encoding/hex"
	"encoding/json"
	"io"
	"os"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
)

type handle struct {
	sync.Mutex
	inode Ino
	fh    uint64

	// for dir
	dirHandler meta.DirHandler
	readAt     time.Time

	// for file
	flags      uint32
	locks      uint8
	flockOwner uint64 // kernel 3.1- does not pass lock_owner in release()
	ofdOwner   uint64 // OFD lock
	reader     FileReader
	writer     FileWriter
	ops        []Context

	// rwlock
	writing uint32
	readers uint32
	writers uint32
	cond    *utils.Cond

	// internal files
	off     uint64
	data    []byte
	pending []byte
	bctx    meta.Context
}

func (h *handle) Write(buf []byte) (int, error) {
	h.Lock()
	defer h.Unlock()
	h.data = append(h.data, buf...)
	return len(buf), nil
}

func (h *handle) addOp(ctx Context) {
	h.Lock()
	defer h.Unlock()
	h.ops = append(h.ops, ctx)
}

func (h *handle) removeOp(ctx Context) {
	h.Lock()
	defer h.Unlock()
	for i, c := range h.ops {
		if c == ctx {
			h.ops[i] = h.ops[len(h.ops)-1]
			h.ops = h.ops[:len(h.ops)-1]
			break
		}
	}
}

func (h *handle) cancelOp(pid uint32) {
	if pid == 0 {
		return
	}
	h.Lock()
	defer h.Unlock()
	for _, c := range h.ops {
		if c.Pid() == pid || c.Pid() > 0 && c.Duration() > time.Second {
			c.Cancel()
		}
	}
}

func (h *handle) Rlock(ctx Context) bool {
	h.Lock()
	for (h.writing | h.writers) != 0 {
		if h.cond.WaitWithTimeout(time.Second) && ctx.Canceled() {
			h.Unlock()
			logger.Warnf("read lock %d interrupted", h.inode)
			return false
		}
	}
	h.readers++
	h.Unlock()
	h.addOp(ctx)
	return true
}

func (h *handle) Runlock() {
	h.Lock()
	h.readers--
	if h.readers == 0 {
		h.cond.Broadcast()
	}
	h.Unlock()
}

func (h *handle) Wlock(ctx Context) bool {
	h.Lock()
	h.writers++
	for (h.readers | h.writing) != 0 {
		if h.cond.WaitWithTimeout(time.Second) && ctx.Canceled() {
			h.writers--
			h.Unlock()
			logger.Warnf("write lock %d interrupted", h.inode)
			return false
		}
	}
	h.writers--
	h.writing = 1
	h.Unlock()
	h.addOp(ctx)
	return true
}

func (h *handle) Wunlock() {
	h.Lock()
	h.writing = 0
	h.cond.Broadcast()
	h.Unlock()
}

func (h *handle) Close() {
	if h.reader != nil {
		h.reader.Close(meta.Background())
		h.reader = nil
	}
	if h.writer != nil {
		_ = h.writer.Close(meta.Background())
		h.writer = nil
	}
}

func (v *VFS) newHandle(inode Ino, readOnly bool) *handle {
	v.hanleM.Lock()
	defer v.hanleM.Unlock()
	var lowBits uint64
	if readOnly {
		lowBits = 1
	}
	for v.handleIno[v.nextfh] > 0 || v.nextfh&1 != lowBits {
		v.nextfh++ // skip recovered fd
	}
	fh := v.nextfh
	h := &handle{inode: inode, fh: fh}
	v.nextfh++
	h.cond = utils.NewCond(h)
	v.handles[inode] = append(v.handles[inode], h)
	return h
}

func (v *VFS) findAllHandles(inode Ino) []*handle {
	v.hanleM.Lock()
	defer v.hanleM.Unlock()
	hs := v.handles[inode]
	if len(hs) <= 1 {
		return hs
	}
	// copy hs so it will not be modified by releaseHandle
	hs2 := make([]*handle, len(hs))
	copy(hs2, hs)
	return hs2
}

const O_RECOVERED = 1 << 31 // is recovered fd

func (v *VFS) findHandle(inode Ino, fh uint64) *handle {
	v.hanleM.Lock()
	defer v.hanleM.Unlock()
	for _, f := range v.handles[inode] {
		if f.fh == fh {
			return f
		}
	}
	if fh&1 == 1 && inode != controlInode {
		f := &handle{inode: inode, fh: fh, flags: O_RECOVERED}
		f.cond = utils.NewCond(f)
		v.handles[inode] = append(v.handles[inode], f)
		if v.handleIno[fh] == 0 {
			v.handleIno[fh] = inode
		}
		return f
	}
	return nil
}

func (v *VFS) releaseHandle(inode Ino, fh uint64) {
	v.hanleM.Lock()
	defer v.hanleM.Unlock()
	hs := v.handles[inode]
	for i, f := range hs {
		if f.fh == fh {
			if hs[i].dirHandler != nil {
				hs[i].dirHandler.Close()
				hs[i].dirHandler = nil
			}
			if i+1 < len(hs) {
				hs[i] = hs[len(hs)-1]
			}
			if len(hs) > 1 {
				v.handles[inode] = hs[:len(hs)-1]
			} else {
				delete(v.handles, inode)
			}
			break
		}
	}
}

func (v *VFS) newFileHandle(inode Ino, length uint64, flags uint32) uint64 {
	h := v.newHandle(inode, (flags&O_ACCMODE) == syscall.O_RDONLY)
	h.Lock()
	defer h.Unlock()
	h.flags = flags
	switch flags & O_ACCMODE {
	case syscall.O_RDONLY:
		h.reader = v.reader.Open(inode, length)
	case syscall.O_WRONLY: // FUSE writeback_cache mode need reader even for WRONLY
		fallthrough
	case syscall.O_RDWR:
		h.reader = v.reader.Open(inode, length)
		h.writer = v.writer.Open(inode, length)
	}
	return h.fh
}

func (v *VFS) releaseFileHandle(ino Ino, fh uint64) {
	h := v.findHandle(ino, fh)
	if h != nil {
		v.releaseHandle(ino, fh)
		h.Lock()
		for (h.writing | h.writers | h.readers) != 0 {
			h.cond.WaitWithTimeout(time.Millisecond * 100)
		}
		h.Unlock()
		h.Close()
	}
}

func (v *VFS) invalidateDirHandle(parent Ino, name string, inode Ino, attr *Attr) {
	v.hanleM.Lock()
	hs := v.handles[parent]
	v.hanleM.Unlock()
	for _, h := range hs {
		h.Lock()
		if h.dirHandler != nil {
			if inode > 0 {
				h.dirHandler.Insert(inode, name, attr)
			} else {
				h.dirHandler.Delete(name)
			}
		}
		h.Unlock()
	}
}

type state struct {
	Handler map[uint64]saveHandle
	NextFh  uint64
}

type saveHandle struct {
	Inode      uint64
	Length     uint64
	Flags      uint32
	UseLocks   uint8
	FlockOwner uint64
	Off        uint64
	Data       string
}

func (v *VFS) dumpAllHandles(path string) (err error) {
	v.hanleM.Lock()
	defer v.hanleM.Unlock()
	var vfsState state
	vfsState.Handler = make(map[uint64]saveHandle)
	for ino, hs := range v.handles {
		if ino == controlInode {
			// the job is lost, can't be recovered
			continue
		}
		for _, h := range hs {
			h.Lock()
			if ino == logInode {
				readerLock.RLock()
				reader := readers[h.fh]
				readerLock.RUnlock()
				if reader == nil {
					continue
				}
				reader.Lock()
			OUTER:
				for {
					select {
					case line := <-reader.buffer:
						reader.last = append(reader.last, line...)
					default:
						break OUTER
					}
				}
				h.data = reader.last
				reader.Unlock()
			}
			var length uint64
			if h.writer != nil {
				length = h.writer.GetLength()
				err := h.writer.Flush(meta.Background())
				if err != 0 {
					logger.Errorf("flush writer of %d: %s", ino, err)
				}
			} else if h.reader != nil {
				length = h.reader.GetLength()
			}
			s := saveHandle{
				Inode:      uint64(h.inode),
				Length:     length,
				Flags:      h.flags,
				UseLocks:   h.locks,
				FlockOwner: h.flockOwner,
				Off:        h.off,
				Data:       hex.EncodeToString(h.data),
			}
			h.Unlock()
			vfsState.Handler[h.fh] = s
		}
	}
	vfsState.NextFh = v.nextfh
	d, err := json.Marshal(vfsState)
	if err != nil {
		return err
	}
	f, err := os.Create(path)
	if err != nil {
		return err
	}
	defer f.Close()
	_, err = f.Write(d)
	if err != nil {
		return err
	}
	return
}

func (v *VFS) loadAllHandles(path string) error {
	f, err := os.Open(path)
	if err != nil {
		return err
	}
	defer f.Close()
	d, err := io.ReadAll(f)
	if err != nil {
		return err
	}
	var vfsState state
	err = json.Unmarshal(d, &vfsState)
	if err != nil {
		return err
	}
	v.hanleM.Lock()
	defer v.hanleM.Unlock()
	for fh, s := range vfsState.Handler {
		data, err := hex.DecodeString(s.Data)
		if err != nil {
			logger.Warnf("decode data for inode %d: %s", s.Inode, err)
		}
		h := &handle{
			inode:      Ino(s.Inode),
			fh:         fh,
			flags:      s.Flags,
			locks:      s.UseLocks,
			flockOwner: s.FlockOwner,
			off:        s.Off,
		}
		h.cond = utils.NewCond(h)
		v.handles[h.inode] = append(v.handles[h.inode], h)
		v.handleIno[fh] = h.inode
		if s.Inode == logInode {
			openAccessLog(fh)
			readers[fh].last = data
			continue
		}
		h.data = data
		switch s.Flags & O_ACCMODE {
		case syscall.O_RDONLY:
			h.reader = v.reader.Open(h.inode, s.Length)
		case syscall.O_WRONLY: // FUSE writeback_cache mode need reader even for WRONLY
			fallthrough
		case syscall.O_RDWR:
			h.reader = v.reader.Open(h.inode, s.Length)
			h.writer = v.writer.Open(h.inode, s.Length)
		}
	}
	if len(v.handleIno) > 0 {
		logger.Infof("load %d handles from %s", len(v.handleIno), path)
	}
	v.nextfh = vfsState.NextFh
	// _ = os.Remove(path)
	return nil
}


================================================
FILE: pkg/vfs/helpers.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"fmt"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
)

const (
	MODE_MASK_R = 4
	MODE_MASK_W = 2
	MODE_MASK_X = 1
)

func strerr(errno syscall.Errno) string {
	if errno == 0 {
		return "OK"
	}
	return errno.Error()
}

var typestr = map[uint16]byte{
	syscall.S_IFSOCK: 's',
	syscall.S_IFLNK:  'l',
	syscall.S_IFREG:  '-',
	syscall.S_IFBLK:  'b',
	syscall.S_IFDIR:  'd',
	syscall.S_IFCHR:  'c',
	syscall.S_IFIFO:  'f',
	0:                '?',
}

type smode uint16

func (mode smode) String() string {
	s := []byte("?rwxrwxrwx")
	s[0] = typestr[uint16(mode)&(syscall.S_IFMT&0xffff)]
	if (mode & syscall.S_ISUID) != 0 {
		s[3] = 's'
	}
	if (mode & syscall.S_ISGID) != 0 {
		s[6] = 's'
	}
	if (mode & syscall.S_ISVTX) != 0 {
		s[9] = 't'
	}
	for i := uint16(0); i < 9; i++ {
		if (mode & (1 << i)) == 0 {
			if s[9-i] == 's' || s[9-i] == 't' {
				s[9-i] &= 0xDF
			} else {
				s[9-i] = '-'
			}
		}
	}
	return string(s)
}

// Entry is an alias of meta.Entry, which is used to generate the string
// representation lazily.
type Entry meta.Entry

func (entry *Entry) String() string {
	if entry == nil {
		return ""
	}
	if entry.Attr == nil {
		return fmt.Sprintf(" (%d)", entry.Inode)
	}
	a := entry.Attr
	mode := a.SMode()
	return fmt.Sprintf(" (%d,[%s:0%06o,%d,%d,%d,%d,%d,%d,%d])",
		entry.Inode, smode(mode), mode, a.Nlink, a.Uid, a.Gid,
		a.Atime, a.Mtime, a.Ctime, a.Length)
}

// LogContext is an interface to add duration on meta.Context.
type LogContext interface {
	meta.Context
	Duration() time.Duration
}

type logContext struct {
	meta.Context
	start time.Time
}

func (ctx *logContext) Duration() time.Duration {
	return time.Since(ctx.start)
}

// NewLogContext creates an LogContext starting from now.
func NewLogContext(ctx meta.Context) LogContext {
	return &logContext{ctx, time.Now()}
}


================================================
FILE: pkg/vfs/helpers_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"syscall"
	"testing"

	"github.com/juicedata/juicefs/pkg/meta"
)

type smodeCase struct {
	mode uint16
	str  string
}

var cases = []smodeCase{
	{syscall.S_IFDIR | 00755, "drwxr-xr-x"},
	{syscall.S_IFREG | 01644, "-rw-r--r-T"},
	{syscall.S_IFLNK | 03755, "lrwxr-sr-t"},
	{syscall.S_IFSOCK | 06700, "srws--S---"},
}

func TestSmode(t *testing.T) {
	for _, s := range cases {
		res := smode(s.mode).String()
		if res != s.str {
			t.Fatalf("str of %o: %s != %s", s.mode, res, s.str)
		}
	}
}

func TestEntryString(t *testing.T) {
	var e *Entry
	if e.String() != "" {
		t.Fatalf("empty entry should be ''")
	}
	e = &Entry{Inode: 2, Name: []byte("test")}
	if e.String() != " (2)" {
		t.Fatalf("empty entry should be ` (2)`")
	}

	e.Attr = &meta.Attr{
		Typ:    meta.TypeFile,
		Mode:   01755,
		Nlink:  1,
		Uid:    2,
		Gid:    3,
		Atime:  4,
		Mtime:  5,
		Ctime:  6,
		Length: 7,
	}
	if e.String() != " (2,[-rwxr-xr-t:0101755,1,2,3,4,5,6,7])" {
		t.Fatalf("string of entry is not expected: %s", e.String())
	}
}

func TestError(t *testing.T) {
	if strerr(0) != "OK" {
		t.Fatalf("expect 'OK' but got %q", strerr(0))
	}
	if strerr(syscall.EACCES) != "permission denied" {
		t.Fatalf("expect 'Access denied', but got %q", strerr(syscall.EACCES))
	}
}


================================================
FILE: pkg/vfs/internal.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"strconv"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
	io_prometheus_client "github.com/prometheus/client_model/go"
)

const (
	minInternalNode = 0x7FFFFFFF00000000
	logInode        = minInternalNode + 1
	controlInode    = minInternalNode + 2
	StatsInode      = minInternalNode + 3
	ConfigInode     = minInternalNode + 4
	trashInode      = meta.TrashInode
)

var controlMutex sync.Mutex
var controlHandlers = make(map[uint32]uint64)

func (v *VFS) getControlHandle(pid uint32) uint64 {
	controlMutex.Lock()
	defer controlMutex.Unlock()
	fh := controlHandlers[pid]
	if fh == 0 {
		h := v.newHandle(controlInode, false)
		fh = h.fh
		controlHandlers[pid] = fh
	}
	return fh
}

func (v *VFS) releaseControlHandle(pid uint32) {
	controlMutex.Lock()
	defer controlMutex.Unlock()
	fh := controlHandlers[pid]
	if fh != 0 {
		v.releaseHandle(controlInode, fh)
		delete(controlHandlers, pid)
	}
}

type internalNode struct {
	inode Ino
	name  string
	attr  *Attr
}

var internalNodes = []*internalNode{
	{controlInode, ".control", &Attr{Mode: 0666}},
	{logInode, ".accesslog", &Attr{Mode: 0400}},
	{StatsInode, ".stats", &Attr{Mode: 0444}},
	{ConfigInode, ".config", &Attr{Mode: 0400}},
	{trashInode, meta.TrashName, &Attr{Mode: 0555}},
}

func init() {
	uid := uint32(utils.GetCurrentUID())
	gid := uint32(utils.GetCurrentGID())
	now := time.Now().Unix()
	for _, v := range internalNodes {
		if v.inode == trashInode {
			v.attr.Typ = meta.TypeDirectory
			v.attr.Nlink = 2
		} else {
			v.attr.Typ = meta.TypeFile
			v.attr.Nlink = 1
			v.attr.Uid = uid
			v.attr.Gid = gid
		}
		v.attr.Atime = now
		v.attr.Mtime = now
		v.attr.Ctime = now
		v.attr.Full = true
	}
}

func IsSpecialNode(ino Ino) bool {
	return ino >= minInternalNode
}

func IsSpecialName(name string) bool {
	if name[0] != '.' {
		return false
	}
	for _, n := range internalNodes {
		if name == n.name {
			return true
		}
	}
	return false
}

func getInternalNode(ino Ino) *internalNode {
	for _, n := range internalNodes {
		if ino == n.inode {
			return n
		}
	}
	return nil
}

func GetInternalNodeByName(name string) (Ino, *Attr) {
	n := getInternalNodeByName(name)
	if n != nil {
		return n.inode, n.attr
	}
	return 0, nil
}

func getInternalNodeByName(name string) *internalNode {
	if name[0] != '.' {
		return nil
	}
	for _, n := range internalNodes {
		if name == n.name {
			return n
		}
	}
	return nil
}

func CollectMetrics(registry *prometheus.Registry) []byte {
	if registry == nil {
		return []byte("")
	}
	mfs, err := registry.Gather()
	if err != nil {
		logger.Errorf("collect metrics: %s", err)
		return nil
	}
	w := bytes.NewBuffer(nil)
	format := func(v float64) string {
		return strconv.FormatFloat(v, 'f', -1, 64)
	}
	for _, mf := range mfs {
		for _, m := range mf.Metric {
			var name = *mf.Name
			for _, l := range m.Label {
				if *l.Name == "method" || *l.Name == "errno" {
					name += "_" + *l.Value
				}
			}
			switch *mf.Type {
			case io_prometheus_client.MetricType_GAUGE:
				_, _ = fmt.Fprintf(w, "%s %s\n", name, format(*m.Gauge.Value))
			case io_prometheus_client.MetricType_COUNTER:
				_, _ = fmt.Fprintf(w, "%s %s\n", name, format(*m.Counter.Value))
			case io_prometheus_client.MetricType_HISTOGRAM:
				_, _ = fmt.Fprintf(w, "%s_total %d\n", name, *m.Histogram.SampleCount)
				_, _ = fmt.Fprintf(w, "%s_sum %s\n", name, format(*m.Histogram.SampleSum))
			case io_prometheus_client.MetricType_SUMMARY:
			}
		}
	}
	return w.Bytes()
}

func writeProgress(item1, item2 *uint64, out io.Writer, done chan struct{}) {
	wb := utils.NewBuffer(17)
	wb.Put8(meta.CPROGRESS)
	if item2 == nil {
		item2 = new(uint64)
	}
	ticker := time.NewTicker(time.Millisecond * 300)
	for {
		select {
		case <-ticker.C:
			wb.Put64(atomic.LoadUint64(item1))
			wb.Put64(atomic.LoadUint64(item2))
			_, _ = out.Write(wb.Bytes())
			wb.Seek(1)
		case <-done:
			ticker.Stop()
			if *item1 > 0 || *item2 > 0 {
				wb.Put64(atomic.LoadUint64(item1))
				wb.Put64(atomic.LoadUint64(item2))
				_, _ = out.Write(wb.Bytes())
			}
			return
		}
	}
}

type obj struct {
	key            string
	size, off, len uint32
}

func (v *VFS) calcObjects(id uint64, size, offset, length uint32) []*obj {
	if id == 0 {
		return []*obj{{"", size, offset, length}}
	}
	if length == 0 || offset+length > size {
		logger.Warnf("Corrupt slice id %d size %d offset %d length %d", id, size, offset, length)
		return nil
	}
	bsize := uint32(v.Conf.Chunk.BlockSize)
	var prefix string
	if v.Conf.Chunk.HashPrefix {
		prefix = fmt.Sprintf("%s/chunks/%02X/%v/%v", v.Conf.Format.Name, id%256, id/1000/1000, id)
	} else {
		prefix = fmt.Sprintf("%s/chunks/%v/%v/%v", v.Conf.Format.Name, id/1000/1000, id/1000, id)
	}
	first := offset / bsize
	last := (offset + length - 1) / bsize
	objs := make([]*obj, 0, last-first+1)
	for indx := first; indx <= last; indx++ {
		objs = append(objs, &obj{fmt.Sprintf("%s_%d_%d", prefix, indx, bsize), bsize, 0, bsize})
	}
	fo, lo := objs[0], objs[len(objs)-1]
	fo.off = offset - first*bsize
	fo.len = fo.size - fo.off
	if (last+1)*bsize > size {
		lo.size = size - last*bsize
		lo.key = fmt.Sprintf("%s_%d_%d", prefix, last, lo.size)
	}
	lo.len = (offset + length) - last*bsize - lo.off

	return objs
}

type InfoResponse struct {
	Ino     Ino
	Failed  bool
	Reason  string
	Summary meta.Summary
	Paths   []string
	Chunks  []*chunkSlice
	Objects []*chunkObj
	PLocks  []meta.PLockItem
	FLocks  []meta.FLockItem
}

type SummaryReponse struct {
	Errno syscall.Errno
	Tree  meta.TreeSummary
}

type CacheResponse struct {
	sync.Mutex
	FileCount  uint64
	SliceCount uint64
	TotalBytes uint64
	MissBytes  uint64 // for check op
	Locations  map[string]uint64
}

func (resp *CacheResponse) Add(other *CacheResponse) {
	resp.FileCount += other.FileCount
	resp.TotalBytes += other.TotalBytes
	resp.SliceCount += other.SliceCount
	resp.MissBytes += other.MissBytes
	for k, bytes := range other.Locations {
		resp.Locations[k] += bytes
	}
}

type chunkSlice struct {
	ChunkIndex uint64
	meta.Slice
}

type chunkObj struct {
	ChunkIndex     uint64
	Key            string
	Size, Off, Len uint32
}

func (v *VFS) handleInternalMsg(ctx meta.Context, cmd uint32, r *utils.Buffer, out io.Writer) {
	switch cmd {
	case meta.Rmr:
		done := make(chan struct{})
		inode := Ino(r.Get64())
		name := string(r.Get(int(r.Get8())))
		var skipTrash bool
		var numThreads int = meta.RmrDefaultThreads
		if r.HasMore() {
			skipTrash = r.Get8()&1 != 0
		}
		if r.HasMore() {
			numThreads = int(r.Get8())
		}
		var count uint64
		var st syscall.Errno
		go func() {
			logger.Infof("Start to rmr %d/%s, workers=%d, skipTrash=%v", inode, name, numThreads, skipTrash)
			st = v.Meta.Remove(ctx, inode, name, skipTrash, numThreads, &count)
			if st != 0 {
				logger.Errorf("remove %d/%s: %s", inode, name, st)
			}
			close(done)
		}()
		writeProgress(&count, nil, out, done)
		if st == 0 && v.InvalidateEntry != nil {
			if st := v.InvalidateEntry(inode, name); st != 0 {
				logger.Warnf("Invalidate entry %d/%s: %s", inode, name, st)
			}
		}
		_, _ = out.Write([]byte{uint8(st)})
	case meta.Clone:
		done := make(chan struct{})
		srcIno := Ino(r.Get64())
		srcParentIno := Ino(r.Get64())
		dstParentIno := Ino(r.Get64())
		dstName := string(r.Get(int(r.Get8())))
		umask := r.Get16()
		cmode := r.Get8()
		var concurrency uint8 = meta.CLONE_DEFAULT_CONCURRENCY // default for backward compatibility
		if r.HasMore() {
			concurrency = r.Get8()
		}
		var count, total uint64
		var eno syscall.Errno
		go func() {
			logger.Infof("Start to clone %d/%d to %d/%s, cmode=%d, umask=%d, concurrency=%d", srcParentIno, srcIno, dstParentIno, dstName, cmode, umask, concurrency)
			if eno = v.Meta.Clone(ctx, srcParentIno, srcIno, dstParentIno, dstName, cmode, umask, concurrency, &count, &total); eno != 0 {
				logger.Errorf("clone failed srcIno:%d,dstParentIno:%d,dstName:%s,cmode:%d,umask:%d,concurrency:%d,eno:%v", srcIno, dstParentIno, dstName, cmode, umask, concurrency, eno)
			}
			close(done)
		}()

		writeProgress(&count, &total, out, done)
		_, _ = out.Write([]byte{uint8(eno)})

	case meta.LegacyInfo:
		var summary meta.Summary
		inode := Ino(r.Get64())
		var recursive uint8 = 1
		if r.HasMore() {
			recursive = r.Get8()
		}
		var raw bool
		if r.HasMore() {
			raw = r.Get8() != 0
		}
		logger.Infof("Start to get legacy info of %d, recursive=%d", inode, recursive)

		wb := utils.NewBuffer(4)
		r := v.Meta.GetSummary(ctx, inode, &summary, recursive != 0, true)
		if r != 0 {
			msg := r.Error()
			wb.Put32(uint32(len(msg)))
			_, _ = out.Write(append(wb.Bytes(), msg...))
			return
		}
		var w = bytes.NewBuffer(nil)
		fmt.Fprintf(w, "  inode: %d\n", inode)
		fmt.Fprintf(w, "  files: %d\n", summary.Files)
		fmt.Fprintf(w, "   dirs: %d\n", summary.Dirs)
		fmt.Fprintf(w, " length: %s\n", utils.FormatBytes(summary.Length))
		fmt.Fprintf(w, "   size: %s\n", utils.FormatBytes(summary.Size))
		ps := v.Meta.GetPaths(ctx, inode)
		switch len(ps) {
		case 0:
			fmt.Fprintf(w, "   path: %s\n", "unknown")
		case 1:
			fmt.Fprintf(w, "   path: %s\n", ps[0])
		default:
			fmt.Fprintf(w, "  paths:\n")
			for _, p := range ps {
				fmt.Fprintf(w, "\t%s\n", p)
			}
		}
		if summary.Files == 1 && summary.Dirs == 0 {
			if raw {
				fmt.Fprintf(w, " chunks:\n")
			} else {
				fmt.Fprintf(w, "objects:\n")
			}
			for indx := uint64(0); indx*meta.ChunkSize < summary.Length; indx++ {
				var cs []meta.Slice
				_ = v.Meta.Read(ctx, inode, uint32(indx), &cs)
				for _, c := range cs {
					if raw {
						fmt.Fprintf(w, "\t%d:\t%d\t%d\t%d\t%d\n", indx, c.Id, c.Size, c.Off, c.Len)
					} else {
						for _, o := range v.calcObjects(c.Id, c.Size, c.Off, c.Len) {
							fmt.Fprintf(w, "\t%d:\t%s\t%d\t%d\t%d\n", indx, o.key, o.size, o.off, o.len)
						}
					}
				}
			}
		}
		wb.Put32(uint32(w.Len()))
		_, _ = out.Write(append(wb.Bytes(), w.Bytes()...))
	case meta.InfoV2:
		inode := Ino(r.Get64())
		info := &InfoResponse{
			Ino: inode,
		}

		var recursive uint8 = 1
		if r.HasMore() {
			recursive = r.Get8()
		}
		var raw bool
		if r.HasMore() {
			raw = r.Get8() != 0
		}
		var strict bool
		if r.HasMore() {
			strict = r.Get8() != 0
		}

		done := make(chan struct{})
		var r syscall.Errno
		go func() {
			logger.Infof("Start to get info v2 of %d, recursive=%d", inode, recursive)
			r = v.Meta.GetSummary(ctx, inode, &info.Summary, recursive != 0, strict)
			close(done)
		}()
		writeProgress(&info.Summary.Files, &info.Summary.Size, out, done)
		if r != 0 {
			info.Failed = true
			info.Reason = r.Error()
		} else {
			info.Paths = v.Meta.GetPaths(ctx, inode)
			if info.Summary.Files == 1 && info.Summary.Dirs == 0 {
				for indx := uint64(0); indx*meta.ChunkSize < info.Summary.Length; indx++ {
					var cs []meta.Slice
					_ = v.Meta.Read(ctx, inode, uint32(indx), &cs)
					for _, c := range cs {
						if raw {
							info.Chunks = append(info.Chunks, &chunkSlice{indx, c})
						} else {
							for _, o := range v.calcObjects(c.Id, c.Size, c.Off, c.Len) {
								info.Objects = append(info.Objects, &chunkObj{indx, o.key, o.size, o.off, o.len})
							}
						}
					}
				}
			}

			var err error
			if info.PLocks, info.FLocks, err = v.Meta.ListLocks(ctx, inode); err != nil {
				info.Failed = true
				info.Reason = err.Error()
			}
		}
		data, err := json.Marshal(info)
		if err != nil {
			logger.Errorf("marshal info response: %v", err)
			_, _ = out.Write([]byte{byte(syscall.EIO & 0xff)})
			return
		}
		w := utils.NewBuffer(uint32(1 + 4 + len(data)))
		w.Put8(meta.CDATA)
		w.Put32(uint32(len(data)))
		w.Put(data)
		_, _ = out.Write(w.Bytes())
	case meta.OpSummary:
		inode := Ino(r.Get64())
		tree := meta.TreeSummary{
			Inode: inode,
			Path:  "",
			Type:  meta.TypeDirectory,
		}

		var depth uint8 = 3
		if r.HasMore() {
			depth = r.Get8()
		}
		var topN uint8 = 10
		if r.HasMore() {
			topN = r.Get8()
		}
		var strict bool
		if r.HasMore() {
			strict = r.Get8() != 0
		}

		done := make(chan struct{})
		var files, size uint64
		var r syscall.Errno
		go func() {
			logger.Infof("Start to get summary of %d, depth=%d, topN=%d", inode, depth, topN)
			r = v.Meta.GetTreeSummary(ctx, &tree, depth, topN, strict,
				func(count, bytes uint64) {
					atomic.AddUint64(&files, count)
					atomic.AddUint64(&size, bytes)
				})
			close(done)
		}()
		writeProgress(&files, &size, out, done)
		data, err := json.Marshal(&SummaryReponse{r, tree})
		if err != nil {
			logger.Errorf("marshal summary response: %v", err)
			_, _ = out.Write([]byte{byte(syscall.EIO & 0xff)})
			return
		}
		w := utils.NewBuffer(uint32(1 + 4 + len(data)))
		w.Put8(meta.CDATA)
		w.Put32(uint32(len(data)))
		w.Put(data)
		_, _ = out.Write(w.Bytes())
	case meta.CompactPath:
		inode := Ino(r.Get64())
		coCnt := r.Get16()

		done := make(chan struct{})
		var totalChunks, currChunks uint64
		var eno syscall.Errno
		go func() {
			logger.Infof("Start to compact %d with %d workers", inode, coCnt)
			eno = v.Meta.Compact(ctx, inode, int(coCnt), func() {
				atomic.AddUint64(&totalChunks, 1)
			}, func() {
				atomic.AddUint64(&currChunks, 1)
			})
			close(done)
		}()

		writeProgress(&totalChunks, &currChunks, out, done)
		_, _ = out.Write([]byte{uint8(eno)})

	case meta.FillCache:
		paths := strings.Split(string(r.Get(int(r.Get32()))), "\n")
		concurrent := r.Get16()
		background := r.Get8()

		action := WarmupCache
		if r.HasMore() {
			action = CacheAction(r.Get8())
		}

		logger.Infof("Start to %s %d paths with %d workers, background=%d", action, len(paths), concurrent, background)
		stat := &CacheResponse{Locations: make(map[string]uint64)}
		if background == 0 {
			done := make(chan struct{})
			go func() {
				v.cacheFiller.Cache(ctx, action, paths, int(concurrent), stat)
				close(done)
			}()
			writeProgress(&stat.FileCount, &stat.TotalBytes, out, done)
		} else {
			go v.cacheFiller.Cache(meta.NewContext(ctx.Pid(), ctx.Uid(), ctx.Gids()), action, paths, int(concurrent), nil)
		}
		data, err := json.Marshal(stat)
		if err != nil {
			logger.Errorf("marshal response error: %v", err)
			_, _ = out.Write([]byte{byte(syscall.EIO & 0xff)})
			return
		}
		w := utils.NewBuffer(uint32(1 + 4 + len(data)))
		w.Put8(meta.CDATA)
		w.Put32(uint32(len(data)))
		w.Put(data)
		_, _ = out.Write(w.Bytes())
	default:
		logger.Warnf("unknown message type: %d", cmd)
		_, _ = out.Write([]byte{byte(syscall.EINVAL & 0xff)})
	}
}


================================================
FILE: pkg/vfs/reader.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"context"
	"fmt"
	"runtime"
	"sort"
	"sync"
	"sync/atomic"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
)

/*
 * state of sliceReader
 *
 *    <-- REFRESH
 *   |      |
 *  NEW -> BUSY  -> READY
 *          |         |
 *        BREAK ---> INVALID
 */
const (
	NEW = iota
	BUSY
	REFRESH
	BREAK
	READY
	INVALID
)

const readSessions = 2

var readBufferUsed atomic.Int64

type sstate uint8

func (m sstate) valid() bool { return m != BREAK && m != INVALID }

var stateNames = []string{"NEW", "BUSY", "REFRESH", "BREAK", "READY", "INVALID"}

func (m sstate) String() string {
	if m <= INVALID {
		return stateNames[m]
	}
	panic("<unknown>")
}

type FileReader interface {
	Read(ctx meta.Context, off uint64, buf []byte) (int, syscall.Errno)
	GetLength() uint64
	Close(ctx meta.Context)
}

type DataReader interface {
	Open(inode Ino, length uint64) FileReader
	Truncate(inode Ino, length uint64)
	Invalidate(inode Ino, off, length uint64)
}

type frange struct {
	off uint64
	len uint64
}

func (r *frange) String() string         { return fmt.Sprintf("[%d,%d,%d)", r.off, r.len, r.end()) }
func (r *frange) end() uint64            { return r.off + r.len }
func (r *frange) contain(p uint64) bool  { return r.off < p && p < r.end() }
func (r *frange) overlap(a *frange) bool { return a.off < r.end() && r.off < a.end() }
func (r *frange) include(a *frange) bool { return r.off <= a.off && a.end() <= r.end() }

// protected by file
type sliceReader struct {
	ctx        context.Context
	cancel     context.CancelFunc
	file       *fileReader
	block      *frange
	state      sstate
	page       *chunk.Page
	indx       uint32
	currentPos uint32
	lastAccess time.Time
	cond       *utils.Cond
	next       *sliceReader
	prev       **sliceReader
	refs       uint16
}

func (s *sliceReader) delay(delay time.Duration) {
	time.AfterFunc(delay, s.run)
}

func (s *sliceReader) done(err syscall.Errno, delay time.Duration) {
	f := s.file
	switch s.state {
	case BUSY:
		s.state = NEW // failed
	case BREAK:
		s.state = INVALID
	case REFRESH:
		s.state = NEW
	}
	if err != 0 {
		if !f.closing {
			logger.Errorf("read file %d: %s", f.inode, err)
		}
		f.err = err
	}
	if f.shouldStop() {
		s.state = INVALID
	}

	switch s.state {
	case NEW:
		s.delay(delay)
	case READY:
		s.cond.Broadcast()
	case INVALID:
		if s.refs == 0 {
			s.delete()
			if f.closing && f.slices == nil {
				f.r.Lock()
				if f.refs == 0 {
					f.delete()
				}
				f.r.Unlock()
			}
		} else {
			s.cond.Broadcast()
		}
	}
	runtime.Goexit()
}

func retry_time(trycnt uint32) time.Duration {
	if trycnt < 30 {
		return time.Millisecond * time.Duration((trycnt-1)*300+1)
	}
	return time.Second * 10
}

func (s *sliceReader) run() {
	f := s.file
	f.Lock()
	defer f.Unlock()
	if s.state != NEW || f.shouldStop() {
		s.done(0, 0)
	}
	s.state = BUSY
	indx := s.indx
	inode := f.inode
	f.Unlock()

	var slices []meta.Slice
	err := f.r.m.Read(meta.Background(), inode, indx, &slices)
	f.Lock()
	length := f.length
	if s.state != BUSY || f.shouldStop() {
		s.done(0, 0)
	}
	if err == syscall.ENOENT {
		s.done(err, 0)
	} else if err != 0 {
		f.tried++
		trycnt := f.tried
		if trycnt > f.r.maxRetries {
			s.done(syscall.EIO, 0)
		} else {
			s.done(0, retry_time(trycnt))
		}
	}

	s.currentPos = 0
	if s.block.off > length {
		s.block.len = 0
		s.state = READY
		s.done(0, 0)
	} else if s.block.end() > length {
		s.block.len = length - s.block.off
	}
	need := s.block.len
	f.Unlock()

	p := s.page.Slice(0, int(need))
	defer p.Release()
	var n int

	ctx := context.WithValue(s.ctx, meta.CtxKey("inode"), inode) // Output inode in log for debugging
	n = f.r.Read(ctx, p, slices, (uint32(s.block.off))%meta.ChunkSize)

	f.Lock()
	if s.state != BUSY || f.shouldStop() {
		s.done(0, 0)
	}
	if n == int(need) {
		s.state = READY
		s.currentPos = uint32(n)
		s.file.tried = 0
		s.lastAccess = time.Now()
		s.done(0, 0)
	} else {
		s.currentPos = 0 // start again from beginning
		err = syscall.EIO
		f.tried++
		_ = f.r.m.InvalidateChunkCache(meta.Background(), inode, indx)
		if f.tried > f.r.maxRetries {
			s.done(err, 0)
		} else {
			s.done(0, retry_time(f.tried))
		}
	}
}

func (s *sliceReader) invalidate() {
	switch s.state {
	case NEW:
	case BUSY:
		s.state = REFRESH
		// TODO cancel ongoing read
	case READY:
		if s.refs > 0 {
			s.state = NEW
			go s.run()
		} else {
			s.state = INVALID
			s.delete() // nobody wants it anymore, so delete it
		}
	}
}

func (s *sliceReader) drop() {
	if s.state <= BREAK {
		if s.refs == 0 {
			s.state = BREAK
			s.cancel()
		}
	} else {
		if s.refs == 0 {
			s.delete() // nobody wants it anymore, so delete it
		} else if s.state == READY {
			s.state = INVALID // somebody still using it, so mark it for removal
		}
	}
}

func (s *sliceReader) delete() {
	*(s.prev) = s.next
	if s.next != nil {
		s.next.prev = s.prev
	} else {
		s.file.last = s.prev
	}
	readBufferUsed.Add(-int64(cap(s.page.Data)))
	s.page.Release()
}

type session struct {
	lastOffset uint64
	total      uint64
	readahead  uint64
	atime      time.Time
}

type fileReader struct {
	// protected by itself
	inode    Ino
	length   uint64
	err      syscall.Errno
	tried    uint32
	sessions [readSessions]session
	slices   *sliceReader
	last     **sliceReader

	sync.Mutex
	closing bool

	// protected by r
	refs uint16
	next *fileReader
	r    *dataReader
}

func (f *fileReader) GetLength() uint64 {
	f.Lock()
	defer f.Unlock()
	return f.length
}

// protected by f
func (f *fileReader) newSlice(block *frange) *sliceReader {
	s := &sliceReader{}
	s.ctx, s.cancel = context.WithCancel(context.Background())
	s.file = f
	s.lastAccess = time.Now()
	s.indx = uint32(block.off / meta.ChunkSize)
	s.block = &frange{block.off, block.len} // random read
	blockend := (block.off/f.r.blockSize + 1) * f.r.blockSize
	if s.block.end() > f.length {
		s.block.len = f.length - s.block.off
	}
	if s.block.end() > blockend {
		s.block.len = blockend - s.block.off
	}
	block.off = s.block.end()
	block.len -= s.block.len
	s.page = chunk.NewOffPage(int(s.block.len))
	s.cond = utils.NewCond(&f.Mutex)
	s.prev = f.last
	*(f.last) = s
	f.last = &(s.next)
	go s.run()
	readBufferUsed.Add(int64(cap(s.page.Data)))
	return s
}

func (f *fileReader) delete() {
	r := f.r
	i := r.files[f.inode]
	if i == f {
		if i.next != nil {
			r.files[f.inode] = i.next
		} else {
			delete(r.files, f.inode)
		}
	} else {
		for i != nil {
			if i.next == f {
				i.next = f.next
				break
			}
			i = i.next
		}
	}
	f.next = nil
}

func (f *fileReader) acquire() {
	f.r.Lock()
	defer f.r.Unlock()
	f.refs++
}

func (f *fileReader) release() {
	f.r.Lock()
	defer f.r.Unlock()
	f.refs--
	if f.refs == 0 && f.slices == nil {
		f.delete()
	}
}

func (f *fileReader) guessSession(block *frange) int {
	idx := -1
	var closestOff uint64
	for i, ses := range f.sessions {
		if ses.lastOffset > closestOff && ses.lastOffset <= block.off && block.off <= ses.lastOffset+ses.readahead+f.r.blockSize {
			idx = i
			closestOff = ses.lastOffset
		}
	}
	if idx == -1 {
		for i, ses := range f.sessions {
			bt := ses.readahead / 8
			if bt < f.r.blockSize {
				bt = f.r.blockSize
			}
			min := ses.lastOffset - bt
			if ses.lastOffset < bt {
				min = 0
			}
			if min <= block.off && block.off < ses.lastOffset && (closestOff == 0 || ses.lastOffset < closestOff) {
				idx = i
				closestOff = ses.lastOffset
			}
		}
	}
	if idx == -1 {
		for i, ses := range f.sessions {
			if ses.total == 0 {
				idx = i
				break
			}
			if idx == -1 || ses.atime.Before(f.sessions[idx].atime) {
				idx = i
			}
		}
		f.sessions[idx].lastOffset = block.off
		f.sessions[idx].total = block.len
		f.sessions[idx].readahead = 0
	} else {
		if block.end() > f.sessions[idx].lastOffset {
			f.sessions[idx].total += block.end() - f.sessions[idx].lastOffset
		}
	}
	f.sessions[idx].atime = time.Now()
	return idx
}

func (f *fileReader) checkReadahead(block *frange) int {
	idx := f.guessSession(block)
	ses := &f.sessions[idx]
	seqdata := ses.total
	readahead := ses.readahead
	used := uint64(readBufferUsed.Load())
	if readahead == 0 && f.r.blockSize <= f.r.readAheadMax && (block.off == 0 || seqdata > block.len) { // begin with read-ahead turned on
		ses.readahead = f.r.blockSize
	} else if readahead < f.r.readAheadMax && seqdata >= readahead && f.r.readAheadTotal > used+readahead*4 {
		ses.readahead *= 2
	} else if readahead >= f.r.blockSize && (f.r.readAheadTotal < used+readahead/2 || seqdata < readahead/4) {
		ses.readahead /= 2
	}
	if ses.readahead >= f.r.blockSize {
		ahead := frange{block.end(), ses.readahead}
		f.readAhead(&ahead)
	}
	if block.end() > ses.lastOffset {
		ses.lastOffset = block.end()
	}
	return idx
}

func (f *fileReader) need(block *frange) bool {
	for _, ses := range f.sessions {
		if ses.total == 0 {
			break
		}
		bt := ses.readahead / 8
		if bt < f.r.blockSize {
			bt = f.r.blockSize
		}
		b := &frange{ses.lastOffset - bt, ses.readahead*2 + f.r.blockSize*2}
		if ses.lastOffset < bt {
			b.off = 0
		}
		if block.overlap(b) {
			return true
		}
	}
	return false
}

// cleanup unused requests
func (f *fileReader) cleanupRequests(block *frange) {
	now := time.Now()
	var cnt int
	f.visit(func(s *sliceReader) bool {
		if !s.state.valid() ||
			!block.overlap(s.block) && (s.lastAccess.Add(time.Second*30).Before(now) || !f.need(s.block)) {
			s.drop()
		} else if !block.overlap(s.block) {
			cnt++
		}
		return true
	})
	f.visit(func(s *sliceReader) bool {
		if !block.overlap(s.block) && cnt > f.r.maxRequests {
			s.drop()
			cnt--
		}
		return cnt > f.r.maxRequests
	})
}

func (f *fileReader) releaseIdleBuffer() {
	f.Lock()
	defer f.Unlock()
	now := time.Now()
	var idle = time.Minute
	used := readBufferUsed.Load()
	if used > int64(f.r.readAheadTotal) {
		idle /= time.Duration(used / int64(f.r.readAheadTotal))
	}
	f.visit(func(s *sliceReader) bool {
		if !s.state.valid() || s.lastAccess.Add(idle).Before(now) || !f.need(s.block) {
			s.drop()
		}
		return true
	})
}

func (f *fileReader) splitRange(block *frange) []uint64 {
	ranges := []uint64{block.off, block.end()}
	contain := func(p uint64) bool {
		for _, i := range ranges {
			if i == p {
				return true
			}
		}
		return false
	}
	f.visit(func(s *sliceReader) bool {
		if s.state.valid() {
			if block.contain(s.block.off) && !contain(s.block.off) {
				ranges = append(ranges, s.block.off)
			}
			if block.contain(s.block.end()) && !contain(s.block.end()) {
				ranges = append(ranges, s.block.end())
			}
		}
		return true
	})
	sort.Slice(ranges, func(i, j int) bool {
		return ranges[i] < ranges[j]
	})
	return ranges
}

// protected by f
func (f *fileReader) readAhead(block *frange) {
	f.visit(func(r *sliceReader) bool {
		if r.state.valid() && r.block.off <= block.off && r.block.end() > block.off {
			if r.state == READY && block.len > f.r.blockSize && r.block.off == block.off && r.block.off%f.r.blockSize == 0 {
				// next block is ready, reduce readahead by a block
				block.len -= f.r.blockSize / 2
			}
			if r.block.end() <= block.end() {
				block.len = block.end() - r.block.end()
			} else {
				block.len = 0
			}
			block.off = r.block.end()
		}
		return true
	})
	if block.len > 0 && block.off < f.length && uint64(readBufferUsed.Load()) < f.r.readAheadTotal {
		if block.len < f.r.blockSize {
			block.len += f.r.blockSize - block.end()%f.r.blockSize // align to end of a block
		}
		f.newSlice(block)
		if block.len > 0 {
			f.readAhead(block)
		}
	}
}

type req struct {
	frange
	s *sliceReader
}

func (f *fileReader) prepareRequests(ranges []uint64) []*req {
	var reqs []*req
	edges := len(ranges)
	for i := 0; i < edges-1; i++ {
		var added bool
		b := frange{ranges[i], ranges[i+1] - ranges[i]}
		f.visit(func(s *sliceReader) bool {
			if !added && s.state.valid() && s.block.include(&b) {
				s.refs++
				s.lastAccess = time.Now()
				reqs = append(reqs, &req{frange{ranges[i] - s.block.off, b.len}, s})
				added = true
				return false
			}
			return true
		})
		if !added {
			for b.len > 0 {
				s := f.newSlice(&b)
				s.refs++
				reqs = append(reqs, &req{frange{0, s.block.len}, s})
			}
		}
	}
	return reqs
}

func (f *fileReader) shouldStop() bool {
	return f.err != 0 || f.closing
}

func (f *fileReader) waitForIO(ctx meta.Context, reqs []*req, buf []byte) (int, syscall.Errno) {
	start := time.Now()
	for _, req := range reqs {
		s := req.s
		for s.state != READY && uint64(s.currentPos) < s.block.len {
			if s.cond.WaitWithTimeout(time.Second) {
				if ctx.Canceled() {
					logger.Warnf("read %d interrupted after %s", f.inode, time.Since(start))
					return 0, syscall.EINTR
				}
			}
			if f.shouldStop() {
				return 0, f.err
			}
		}
	}

	var n int
	for _, req := range reqs {
		s := req.s
		if req.off < s.block.len && s.block.off+req.off < f.length {
			if req.end() > s.block.len {
				logger.Warnf("not enough bytes (%d < %d), restart read", s.block.len, req.end())
				return 0, syscall.EAGAIN
			}
			if s.block.off+req.end() > f.length {
				req.len = f.length - s.block.off - req.off
			}
			n += copy(buf[n:], s.page.Data[req.off:req.end()])
		}
	}
	return n, 0
}

func (f *fileReader) Read(ctx meta.Context, offset uint64, buf []byte) (int, syscall.Errno) {
	if f.r.readBufferUsed() > f.r.bufferSize {
		time.Sleep(time.Millisecond * 10)             // slow down
		for f.r.readBufferUsed() > f.r.bufferSize*2 { // readahead uses 80% of buffer, stop here to avoid OOM
			time.Sleep(time.Millisecond * 100)
		}
	}
	f.Lock()
	defer f.Unlock()
	f.acquire()
	defer f.release()

	if f.shouldStop() {
		return 0, f.err
	}

	size := uint64(len(buf))
	if offset >= f.length || size == 0 {
		return 0, 0
	}
	block := &frange{offset, size}
	if block.end() > f.length {
		block.len = f.length - block.off
	}

	f.cleanupRequests(block)
	var lastBS uint64 = 32 << 10
	if block.off+lastBS > f.length {
		lastblock := frange{f.length - lastBS, lastBS}
		if f.length < lastBS {
			lastblock = frange{0, f.length}
		}
		f.readAhead(&lastblock)
	}
	ranges := f.splitRange(block)
	reqs := f.prepareRequests(ranges)
	defer func() {
		for _, req := range reqs {
			s := req.s
			s.refs--
			if s.refs == 0 && s.state == INVALID {
				s.delete()
			}
		}
	}()
	f.checkReadahead(block)
	return f.waitForIO(ctx, reqs, buf)
}

func (f *fileReader) visit(fn func(s *sliceReader) bool) {
	var next *sliceReader
	for s := f.slices; s != nil; s = next {
		next = s.next
		if !fn(s) {
			break
		}
	}
}

func (f *fileReader) Close(ctx meta.Context) {
	f.Lock()
	f.closing = true
	f.visit(func(s *sliceReader) bool {
		s.drop()
		return true
	})
	f.release()
	f.Unlock()
}

type dataReader struct {
	sync.Mutex
	m              meta.Meta
	store          chunk.ChunkStore
	files          map[Ino]*fileReader
	blockSize      uint64
	bufferSize     int64
	readAheadMax   uint64
	readAheadTotal uint64
	maxRequests    int
	maxRetries     uint32
}

func NewDataReader(conf *Config, m meta.Meta, store chunk.ChunkStore) DataReader {
	var readAheadTotal = 256 << 20
	if conf.Chunk.BufferSize > 0 {
		readAheadTotal = int(conf.Chunk.BufferSize / 10 * 8) // 80% of total buffer
	}
	readAheadMax := min(conf.Chunk.Readahead, readAheadTotal)
	r := &dataReader{
		m:              m,
		store:          store,
		files:          make(map[Ino]*fileReader),
		blockSize:      uint64(conf.Chunk.BlockSize),
		bufferSize:     int64(conf.Chunk.BufferSize),
		readAheadTotal: uint64(readAheadTotal),
		readAheadMax:   uint64(readAheadMax),
		maxRequests:    readAheadMax/conf.Chunk.BlockSize*readSessions + 1,
		maxRetries:     uint32(conf.Meta.Retries),
	}
	go r.checkReadBuffer()
	return r
}

func (r *dataReader) readBufferUsed() int64 {
	used := readBufferUsed.Load()
	return used
}

func (r *dataReader) checkReadBuffer() {
	for {
		r.Lock()
		for _, f := range r.files {
			for f != nil {
				r.Unlock()
				f.releaseIdleBuffer()
				r.Lock()
				f = f.next
			}
		}
		r.Unlock()
		time.Sleep(time.Second)
	}
}

func (r *dataReader) Open(inode Ino, length uint64) FileReader {
	f := &fileReader{
		r:      r,
		inode:  inode,
		length: length,
	}
	f.last = &(f.slices)

	r.Lock()
	f.refs = 1
	f.next = r.files[inode]
	r.files[inode] = f
	r.Unlock()
	return f
}

func (r *dataReader) visit(inode Ino, fn func(*fileReader)) {
	// r could be hold inside f, so Unlock r first to avoid deadlock
	r.Lock()
	var fs []*fileReader
	f := r.files[inode]
	for f != nil {
		fs = append(fs, f)
		f = f.next
	}
	r.Unlock()
	for _, f := range fs {
		f.Lock()
		fn(f)
		f.Unlock()
	}
}

func (r *dataReader) Truncate(inode Ino, length uint64) {
	r.visit(inode, func(f *fileReader) {
		if length < f.length {
			f.visit(func(s *sliceReader) bool {
				if s.block.off+s.block.len > length {
					s.invalidate()
				}
				return true
			})
		}
		f.length = length
	})
}

func (r *dataReader) Invalidate(inode Ino, off, length uint64) {
	b := frange{off, length}
	r.visit(inode, func(f *fileReader) {
		if off+length > f.length {
			f.length = off + length
		}
		f.visit(func(s *sliceReader) bool {
			if b.overlap(s.block) {
				s.invalidate()
			}
			return true
		})
	})
}

func (r *dataReader) readSlice(ctx context.Context, s *meta.Slice, page *chunk.Page, off int) error {
	buf := page.Data
	read := 0
	if s.Id == 0 {
		for read < len(buf) {
			buf[read] = 0
			read++
		}
		return nil
	}

	reader := r.store.NewReader(s.Id, int(s.Size))
	for read < len(buf) {
		p := page.Slice(read, len(buf)-read)
		n, err := reader.ReadAt(ctx, p, off+int(s.Off))
		p.Release()
		if n == 0 && err != nil {
			logger.Warningf("fail to read sliceId %d (off:%d, size:%d, clen: %d, inode: %d): %s",
				s.Id, off+int(s.Off), len(buf)-read, s.Size, ctx.Value(meta.CtxKey("inode")), err)
			return err
		}
		read += n
		off += n
	}
	return nil
}

func (r *dataReader) Read(ctx context.Context, page *chunk.Page, slices []meta.Slice, offset uint32) int {
	if len(slices) > 16 {
		return r.readManySlices(ctx, page, slices, offset)
	}
	read := 0
	var pos uint32
	errs := make(chan error, 10)
	waits := 0
	buf := page.Data
	size := len(buf)
	for i := 0; i < len(slices); i++ {
		if read < size && offset < pos+slices[i].Len {
			toread := min(size-read, int(pos+slices[i].Len-offset))
			go func(s *meta.Slice, p *chunk.Page, off, pos uint32) {
				defer p.Release()
				errs <- r.readSlice(ctx, s, p, int(off))
			}(&slices[i], page.Slice(read, toread), offset-pos, pos)
			read += toread
			offset += uint32(toread)
			waits++
		}
		pos += slices[i].Len
	}
	for read < size {
		buf[read] = 0
		read++
	}
	var err error
	// wait for all goroutine to return, otherwise they may access invalid memory
	for waits > 0 {
		if e := <-errs; e != nil {
			err = e
		}
		waits--
	}
	if err != nil {
		return 0
	}
	return read
}

func (r *dataReader) readManySlices(ctx context.Context, page *chunk.Page, slices []meta.Slice, offset uint32) int {
	read := 0
	var pos uint32
	var err error
	errs := make(chan error, 10)
	waits := 0
	buf := page.Data
	size := len(buf)
	concurrency := make(chan byte, 16)

SLICES:
	for i := 0; i < len(slices); i++ {
		if read < size && offset < pos+slices[i].Len {
			toread := min(size-read, int(pos+slices[i].Len-offset))
		WAIT:
			for {
				select {
				case concurrency <- 1:
					break WAIT
				case e := <-errs:
					waits--
					if e != nil {
						err = e
						break SLICES
					}
				}
			}
			go func(s *meta.Slice, p *chunk.Page, off int, pos uint32) {
				defer p.Release()
				errs <- r.readSlice(ctx, s, p, off)
				<-concurrency
			}(&slices[i], page.Slice(read, toread), int(offset-pos), pos)

			read += toread
			offset += uint32(toread)
			waits++
		}
		pos += slices[i].Len
	}
	// wait for all jobs done, otherwise they may access invalid memory
	for waits > 0 {
		if e := <-errs; e != nil {
			err = e
		}
		waits--
	}
	if err != nil {
		return 0
	}
	for read < size {
		buf[read] = 0
		read++
	}
	return read
}


================================================
FILE: pkg/vfs/vfs.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"encoding/json"
	"fmt"
	"log"
	"os"
	"runtime"
	"sort"
	"sync"
	"syscall"
	"time"

	"github.com/google/uuid"
	"github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
)

type Ino = meta.Ino
type Attr = meta.Attr
type Context = LogContext

const (
	rootID      = 1
	maxName     = meta.MaxName
	maxSymlink  = meta.MaxSymlink
	maxFileSize = meta.ChunkSize << 31
)

type Port struct {
	PrometheusAgent string `json:",omitempty"`
	DebugAgent      string `json:",omitempty"`
	ConsulAddr      string `json:",omitempty"`
	PyroscopeAddr   string `json:",omitempty"`
}

// FuseOptions contains options for fuse mount, keep the same structure with `fuse.MountOptions`
type FuseOptions struct {
	AllowOther               bool
	Options                  []string
	MaxBackground            int
	MaxWrite                 int
	MaxReadAhead             int
	IgnoreSecurityLabels     bool // ignoring labels should be provided as a fusermount mount option.
	RememberInodes           bool
	FsName                   string
	Name                     string
	SingleThreaded           bool
	DisableXAttrs            bool
	Debug                    bool
	Logger                   *log.Logger `json:"-"`
	EnableLocks              bool
	EnableSymlinkCaching     bool `json:",omitempty"`
	ExplicitDataCacheControl bool
	SyncRead                 bool `json:",omitempty"`
	DirectMount              bool
	DirectMountStrict        bool `json:",omitempty"`
	DirectMountFlags         uintptr
	EnableAcl                bool
	DisableReadDirPlus       bool `json:",omitempty"`
	EnableReadDirPlusAuto    bool
	EnableWriteback          bool
	EnableIoctl              bool `json:",omitempty"`
	DontUmask                bool
	OtherCaps                uint32
	NoAllocForRead           bool
	Timeout                  time.Duration
}

func (o FuseOptions) StripOptions() FuseOptions {
	options := o.Options
	o.Options = make([]string, 0, len(o.Options))
	for _, opt := range options {
		if opt == "nonempty" {
			continue
		}
		o.Options = append(o.Options, opt)
	}

	sort.Strings(o.Options)

	// ignore these options because they won't be send to kernel
	o.IgnoreSecurityLabels,
		o.RememberInodes,
		o.SingleThreaded,
		o.DisableXAttrs,
		o.Debug,
		o.NoAllocForRead = false, false, false, false, false, false

	// ignore there options because they cannot be configured by users
	o.Name = ""
	o.MaxBackground = 0
	o.MaxReadAhead = 0
	o.DirectMount = false
	o.DontUmask = false
	o.Timeout = 0
	return o
}

type SecurityConfig struct {
	EnableCap     bool
	EnableSELinux bool
}

type Config struct {
	Meta                 *meta.Config
	Format               meta.Format
	Chunk                *chunk.Config
	Security             *SecurityConfig
	Port                 *Port
	Version              string
	AttrTimeout          time.Duration
	DirEntryTimeout      time.Duration
	NegEntryTimeout      time.Duration
	EntryTimeout         time.Duration
	ReaddirCache         bool
	BackupMeta           time.Duration
	BackupSkipTrash      bool
	FastResolve          bool   `json:",omitempty"`
	AccessLog            string `json:",omitempty"`
	Subdir               string `json:",omitempty"`
	PrefixInternal       bool
	HideInternal         bool
	RootSquash           *AnonymousAccount `json:",omitempty"`
	AllSquash            *AnonymousAccount `json:",omitempty"`
	NonDefaultPermission bool              `json:",omitempty"`
	UMask                uint16

	Pid       int
	PPid      int
	CommPath  string       `json:",omitempty"`
	StatePath string       `json:",omitempty"`
	FuseOpts  *FuseOptions `json:",omitempty"`

	// the mount point for current volume (to follow symlink)
	Mountpoint string
}

type AnonymousAccount struct {
	Uid uint32
	Gid uint32
}

var (
	readSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "fuse_read_size_bytes",
		Help:    "size of read distributions.",
		Buckets: prometheus.LinearBuckets(4096, 4096, 32),
	})
	writtenSizeHistogram = prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "fuse_written_size_bytes",
		Help:    "size of write distributions.",
		Buckets: prometheus.LinearBuckets(4096, 4096, 32),
	})
)

func (v *VFS) Lookup(ctx Context, parent Ino, name string) (entry *meta.Entry, err syscall.Errno) {
	var inode Ino
	var attr = &Attr{}
	if parent == rootID || name == internalNodes[0].name { // 0 is the control file
		n := getInternalNodeByName(name)
		if n != nil {
			entry = &meta.Entry{Inode: n.inode, Attr: n.attr}
			return
		}
	}
	if IsSpecialNode(parent) && name == "." {
		if n := getInternalNode(parent); n != nil {
			entry = &meta.Entry{Inode: n.inode, Attr: n.attr}
			return
		}
	}
	defer func() {
		logit(ctx, "lookup", err, "(%d,%s):%s", parent, name, (*Entry)(entry))
	}()
	if len(name) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}
	err = v.Meta.Lookup(ctx, parent, name, &inode, attr, true)
	if err == 0 {
		entry = &meta.Entry{Inode: inode, Attr: attr}
	}
	return
}

func (v *VFS) GetAttr(ctx Context, ino Ino, opened uint8) (entry *meta.Entry, err syscall.Errno) {
	if IsSpecialNode(ino) && getInternalNode(ino) != nil {
		n := getInternalNode(ino)
		entry = &meta.Entry{Inode: n.inode, Attr: n.attr}
		return
	}
	defer func() { logit(ctx, "getattr", err, "(%d):%s", ino, (*Entry)(entry)) }()
	var attr = &Attr{}
	err = v.Meta.GetAttr(ctx, ino, attr)
	if err == 0 {
		entry = &meta.Entry{Inode: ino, Attr: attr}
	}
	return
}

func get_filetype(mode uint16) uint8 {
	switch mode & (syscall.S_IFMT & 0xffff) {
	case syscall.S_IFIFO:
		return meta.TypeFIFO
	case syscall.S_IFSOCK:
		return meta.TypeSocket
	case syscall.S_IFLNK:
		return meta.TypeSymlink
	case syscall.S_IFREG:
		return meta.TypeFile
	case syscall.S_IFBLK:
		return meta.TypeBlockDev
	case syscall.S_IFDIR:
		return meta.TypeDirectory
	case syscall.S_IFCHR:
		return meta.TypeCharDev
	}
	return meta.TypeFile
}

func (v *VFS) Mknod(ctx Context, parent Ino, name string, mode uint16, cumask uint16, rdev uint32) (entry *meta.Entry, err syscall.Errno) {
	defer func() {
		logit(ctx, "mknod", err, "(%d,%s,%s:0%04o,0x%08X):%s", parent, name, smode(mode), mode, rdev, (*Entry)(entry))
	}()
	if parent == rootID && IsSpecialName(name) {
		err = syscall.EEXIST
		return
	}
	if len(name) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}
	_type := get_filetype(mode)
	if _type == 0 {
		err = syscall.EPERM
		return
	}

	var inode Ino
	var attr = &Attr{}
	err = v.Meta.Mknod(ctx, parent, name, _type, mode&07777, cumask, rdev, "", &inode, attr)
	if err == 0 {
		entry = &meta.Entry{Inode: inode, Attr: attr}
		v.invalidateDirHandle(parent, name, inode, attr)
	}
	return
}

func (v *VFS) Unlink(ctx Context, parent Ino, name string) (err syscall.Errno) {
	return v.doUnlink(ctx, parent, name, false)
}

func (v *VFS) doUnlink(ctx Context, parent Ino, name string, skipTrash bool) (err syscall.Errno) {
	defer func() { logit(ctx, "unlink", err, "(%d,%s)", parent, name) }()
	if parent == rootID && IsSpecialName(name) {
		err = syscall.EPERM
		return
	}
	if len(name) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}
	err = v.Meta.Unlink(ctx, parent, name, skipTrash)
	if err == 0 {
		v.invalidateDirHandle(parent, name, 0, nil)
	}
	return
}

func (v *VFS) Mkdir(ctx Context, parent Ino, name string, mode uint16, cumask uint16) (entry *meta.Entry, err syscall.Errno) {
	defer func() {
		logit(ctx, "mkdir", err, "(%d,%s,%s:0%04o):%s", parent, name, smode(mode), mode, (*Entry)(entry))
	}()
	if parent == rootID && IsSpecialName(name) {
		err = syscall.EEXIST
		return
	}
	if len(name) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}

	var inode Ino
	var attr = &Attr{}
	err = v.Meta.Mkdir(ctx, parent, name, mode, cumask, 0, &inode, attr)
	if err == 0 {
		entry = &meta.Entry{Inode: inode, Attr: attr}
		v.invalidateDirHandle(parent, name, inode, attr)
	}
	return
}

func (v *VFS) Rmdir(ctx Context, parent Ino, name string) (err syscall.Errno) {
	defer func() { logit(ctx, "rmdir", err, "(%d,%s)", parent, name) }()
	if len(name) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}
	err = v.Meta.Rmdir(ctx, parent, name)
	if err == 0 {
		v.invalidateDirHandle(parent, name, 0, nil)
	}
	return
}

func (v *VFS) Symlink(ctx Context, path string, parent Ino, name string) (entry *meta.Entry, err syscall.Errno) {
	defer func() {
		logit(ctx, "symlink", err, "(%d,%s,%s):%s", parent, name, path, (*Entry)(entry))
	}()
	if parent == rootID && IsSpecialName(name) {
		err = syscall.EEXIST
		return
	}
	if len(name) > maxName || len(path) >= maxSymlink {
		err = syscall.ENAMETOOLONG
		return
	}

	var inode Ino
	var attr = &Attr{}
	err = v.Meta.Symlink(ctx, parent, name, path, &inode, attr)
	if err == 0 {
		entry = &meta.Entry{Inode: inode, Attr: attr}
		v.invalidateDirHandle(parent, name, inode, attr)
	}
	return
}

func (v *VFS) Readlink(ctx Context, ino Ino) (path []byte, err syscall.Errno) {
	defer func() { logit(ctx, "readlink", err, "(%d): (%s)", ino, string(path)) }()
	err = v.Meta.ReadLink(ctx, ino, &path)
	return
}

func (v *VFS) Rename(ctx Context, parent Ino, name string, newparent Ino, newname string, flags uint32) (err syscall.Errno) {
	defer func() {
		logit(ctx, "rename", err, "(%d,%s,%d,%s,%d)", parent, name, newparent, newname, flags)
	}()
	if parent == rootID && IsSpecialName(name) {
		err = syscall.EPERM
		return
	}
	if newparent == rootID && IsSpecialName(newname) {
		err = syscall.EPERM
		return
	}
	if len(name) > maxName || len(newname) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}

	var inode Ino
	var attr = &Attr{}
	err = v.Meta.Rename(ctx, parent, name, newparent, newname, flags, &inode, attr)
	if err == 0 {
		v.invalidateDirHandle(parent, name, 0, nil)
		v.invalidateDirHandle(newparent, newname, 0, nil)
		v.invalidateDirHandle(newparent, newname, inode, attr)
	}
	return
}

func (v *VFS) Link(ctx Context, ino Ino, newparent Ino, newname string) (entry *meta.Entry, err syscall.Errno) {
	defer func() {
		logit(ctx, "link", err, "(%d,%d,%s):%s", ino, newparent, newname, (*Entry)(entry))
	}()
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	if newparent == rootID && IsSpecialName(newname) {
		err = syscall.EPERM
		return
	}
	if len(newname) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}

	var attr = &Attr{}
	err = v.Meta.Link(ctx, ino, newparent, newname, attr)
	if err == 0 {
		entry = &meta.Entry{Inode: ino, Attr: attr}
		v.invalidateDirHandle(newparent, newname, ino, attr)
	}
	return
}

func (v *VFS) Opendir(ctx Context, ino Ino, flags uint32) (fh uint64, err syscall.Errno) {
	defer func() { logit(ctx, "opendir", err, "(%d) [fh:%d]", ino, fh) }()
	if ctx.CheckPermission() {
		var mmask uint8 = 0
		switch flags & (syscall.O_RDONLY | syscall.O_WRONLY | syscall.O_RDWR) {
		case syscall.O_RDONLY:
			mmask = MODE_MASK_R
		case syscall.O_WRONLY:
			mmask = MODE_MASK_W
		case syscall.O_RDWR:
			mmask = MODE_MASK_R | MODE_MASK_W
		}
		if err = v.Meta.Access(ctx, ino, mmask, nil); err != 0 {
			return
		}
	}
	fh = v.newHandle(ino, true).fh
	return
}

func (v *VFS) UpdateLength(inode Ino, attr *meta.Attr) {
	if attr.Full && attr.Typ == meta.TypeFile {
		length := v.writer.GetLength(inode)
		if length > attr.Length {
			attr.Length = length
		}
		v.reader.Truncate(inode, attr.Length)
	}
}

func (v *VFS) Readdir(ctx Context, ino Ino, size uint32, off int, fh uint64, plus bool) (entries []*meta.Entry, readAt time.Time, err syscall.Errno) {
	defer func() { logit(ctx, "readdir", err, "(%d,%d,%d,%t): (%d)", ino, size, off, plus, len(entries)) }()
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	h.Lock()
	defer h.Unlock()

	if h.dirHandler == nil || off == 0 {
		if h.dirHandler != nil {
			h.dirHandler.Close()
			h.dirHandler = nil
		}
		var initEntries []*meta.Entry
		if ino == rootID && !v.Conf.HideInternal {
			for _, node := range internalNodes[1:] {
				initEntries = append(initEntries, &meta.Entry{
					Inode: node.inode,
					Name:  []byte(node.name),
					Attr:  node.attr,
				})
			}
		}
		h.readAt = time.Now()
		if h.dirHandler, err = v.Meta.NewDirHandler(ctx, ino, plus, initEntries); err != 0 {
			if plus && err == syscall.EACCES {
				h.dirHandler, err = v.Meta.NewDirHandler(ctx, ino, false, initEntries)
			}
			if err != 0 {
				return
			}
		}
	}
	if entries, err = h.dirHandler.List(ctx, off); err != 0 {
		return
	}
	readAt = h.readAt
	logger.Debugf("readdir: [%d:%d] %d entries, offset=%d", ino, fh, len(entries), off)
	return
}

func (v *VFS) UpdateReaddirOffset(ctx Context, ino Ino, fh uint64, off int) {
	h := v.findHandle(ino, fh)
	if h == nil {
		return
	}
	h.Lock()
	defer h.Unlock()
	if h.dirHandler != nil {
		h.dirHandler.Read(off)
	}
}

func (v *VFS) Releasedir(ctx Context, ino Ino, fh uint64) int {
	defer logit(ctx, "releasedir", 0, "(%d)", ino)
	h := v.findHandle(ino, fh)
	if h == nil {
		return 0
	}
	v.ReleaseHandler(ino, fh)
	return 0
}

const O_TMPFILE = 020000000

func (v *VFS) Create(ctx Context, parent Ino, name string, mode uint16, cumask uint16, flags uint32) (entry *meta.Entry, fh uint64, err syscall.Errno) {
	defer func() {
		logit(ctx, "create", err, "(%d,%s,%s:0%04o):%s [fh:%d]", parent, name, smode(mode), mode, (*Entry)(entry), fh)
	}()
	// O_TMPFILE support
	doUnlink := runtime.GOOS == "linux" && flags&O_TMPFILE != 0
	if doUnlink {
		name = fmt.Sprintf("tmpfile_%s", uuid.New().String())
	}
	if parent == rootID && IsSpecialName(name) {
		err = syscall.EEXIST
		return
	}
	if len(name) > maxName {
		err = syscall.ENAMETOOLONG
		return
	}

	var inode Ino
	var attr = &Attr{}
	if runtime.GOOS == "windows" {
		attr.Flags = meta.FlagWindowsArchive
	}
	err = v.Meta.Create(ctx, parent, name, mode&07777, cumask, flags, &inode, attr)
	if runtime.GOOS == "darwin" && err == syscall.ENOENT {
		err = syscall.EACCES
	}
	if err == 0 {
		v.UpdateLength(inode, attr)
		fh = v.newFileHandle(inode, attr.Length, flags)
		entry = &meta.Entry{Inode: inode, Attr: attr}
		v.invalidateDirHandle(parent, name, inode, attr)

		if doUnlink {
			if flags&syscall.O_EXCL != 0 {
				logger.Warnf("The O_EXCL is currently not supported for use with O_TMPFILE")
			}
			err = v.doUnlink(ctx, parent, name, true)
		}
	}
	return
}

func (v *VFS) Open(ctx Context, ino Ino, flags uint32) (entry *meta.Entry, fh uint64, err syscall.Errno) {
	defer func() {
		if entry != nil {
			logit(ctx, "open", err, "(%d,%#x) [fh:%d]", ino, flags, fh)
		} else {
			logit(ctx, "open", err, "(%d,%#x)", ino, flags)
		}
	}()
	var attr = &Attr{}
	if IsSpecialNode(ino) {
		if ino != controlInode && (flags&O_ACCMODE) != syscall.O_RDONLY {
			err = syscall.EACCES
			return
		}
		h := v.newHandle(ino, true)
		fh = h.fh
		n := getInternalNode(ino)
		if n == nil {
			return
		}
		entry = &meta.Entry{Inode: ino, Attr: n.attr}
		switch ino {
		case logInode:
			openAccessLog(fh)
		case StatsInode:
			h.data = CollectMetrics(v.registry)
		case ConfigInode:
			v.Conf.Format = v.Meta.GetFormat()
			if v.UpdateFormat != nil {
				v.UpdateFormat(&v.Conf.Format)
			}
			v.Conf.Format.RemoveSecret()
			h.data, _ = json.MarshalIndent(v.Conf, "", " ")
			entry.Attr.Length = uint64(len(h.data))
		}
		return
	}

	err = v.Meta.Open(ctx, ino, flags, attr)
	if err == 0 {
		v.UpdateLength(ino, attr)
		fh = v.newFileHandle(ino, attr.Length, flags)
		entry = &meta.Entry{Inode: ino, Attr: attr}
	}
	return
}

func (v *VFS) Truncate(ctx Context, ino Ino, size int64, fh uint64, attr *Attr) (err syscall.Errno) {
	// defer func() { logit(ctx, "truncate (%d,%d): %s", ino, size, strerr(err)) }()
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	if size < 0 {
		err = syscall.EINVAL
		return
	}
	if size >= maxFileSize {
		err = syscall.EFBIG
		return
	}
	hs := v.findAllHandles(ino)
	sort.Slice(hs, func(i, j int) bool { return hs[i].fh < hs[j].fh })
	for _, h := range hs {
		if !h.Wlock(ctx) {
			err = syscall.EINTR
			return
		}
		defer func(h *handle) { h.Wunlock() }(h)
	}
	_ = v.writer.Flush(ctx, ino)
	if fh == 0 {
		err = v.Meta.Truncate(ctx, ino, 0, uint64(size), attr, false)
	} else {
		h := v.findHandle(ino, fh)
		if h == nil {
			err = syscall.EBADF
			return
		}
		if h.writer == nil {
			err = syscall.EACCES
			return
		}
		// flags = 1 means the file is opened, so we don't need to check if it's in the trash
		err = v.Meta.Truncate(ctx, ino, 1, uint64(size), attr, true)
	}
	if err == 0 {
		v.writer.Truncate(ino, uint64(size))
		v.reader.Truncate(ino, uint64(size))
		v.invalidateAttr(ino)
	}
	return err
}

func (v *VFS) ReleaseHandler(ino Ino, fh uint64) {
	v.releaseFileHandle(ino, fh)
}

func (v *VFS) Release(ctx Context, ino Ino, fh uint64) {
	var err syscall.Errno
	defer func() { logit(ctx, "release", err, "(%d,%d)", ino, fh) }()
	if IsSpecialNode(ino) {
		if ino == logInode {
			closeAccessLog(fh)
		}
		v.releaseHandle(ino, fh)
		return
	}
	if fh > 0 {
		f := v.findHandle(ino, fh)
		if f != nil {
			f.Lock()
			for (f.writing | f.writers | f.readers) != 0 {
				if f.cond.WaitWithTimeout(time.Second) && ctx.Canceled() {
					f.Unlock()
					logger.Warnf("write lock %d interrupted", f.inode)
					err = syscall.EINTR
					return
				}
			}
			locks := f.locks
			fowner := f.flockOwner
			powner := f.ofdOwner
			f.Unlock()
			if f.writer != nil {
				_ = f.writer.Flush(ctx)
				v.invalidateAttr(ino)
			}
			if locks&1 != 0 {
				_ = v.Meta.Flock(ctx, ino, fowner^fh, F_UNLCK, false)
			}
			if locks&2 != 0 && powner != 0 {
				_ = v.Meta.Setlk(ctx, ino, powner, false, F_UNLCK, 0, 0x7FFFFFFFFFFFFFFF, 0)
			}
		}
		_ = v.Meta.Close(ctx, ino)
		go v.releaseFileHandle(ino, fh) // after writes it waits for data sync, so do it after everything
	}
}

func hasReadPerm(flag uint32) bool {
	return (flag & O_ACCMODE) != syscall.O_WRONLY
}

func (v *VFS) Read(ctx Context, ino Ino, buf []byte, off uint64, fh uint64) (n int, err syscall.Errno) {
	size := uint32(len(buf))
	if IsSpecialNode(ino) {
		if ino == controlInode && runtime.GOOS == "darwin" {
			fh = v.getControlHandle(ctx.Pid())
		}
		h := v.findHandle(ino, fh)
		if h == nil {
			err = syscall.EBADF
			return
		}
		if len(h.data) == 0 {
			switch ino {
			case StatsInode:
				h.data = CollectMetrics(v.registry)
			case ConfigInode:
				v.Conf.Format = v.Meta.GetFormat()
				if v.UpdateFormat != nil {
					v.UpdateFormat(&v.Conf.Format)
				}
				v.Conf.Format.RemoveSecret()
				h.data, _ = json.MarshalIndent(v.Conf, "", " ")
			}
		}

		if ino == logInode {
			if h.flags&O_RECOVERED != 0 {
				openAccessLog(fh)
			}
			n = readAccessLog(fh, buf)
		} else {
			defer func() { logit(ctx, "read", err, "(%d,%d,%d,%d): %d", ino, size, off, fh, n) }()
			h.Lock()
			defer h.Unlock()
			if off < h.off {
				logger.Errorf("read dropped data from %s: %d < %d", ino, off, h.off)
				err = syscall.EIO
				return
			}
			if int(off-h.off) < len(h.data) {
				n = copy(buf, h.data[off-h.off:])
			}
			if len(h.data) > 2<<20 && off-h.off > 1<<20 {
				// drop first part to avoid OOM
				h.off += 1 << 20
				h.data = h.data[1<<20:]
			}
		}
		return
	}

	defer func() {
		readSizeHistogram.Observe(float64(n))
		logit(ctx, "read", err, "(%d,%d,%d,%d): (%d)", ino, size, off, fh, n)
	}()
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	if h.flags&O_RECOVERED != 0 {
		// recovered
		var attr Attr
		err = v.Meta.Open(ctx, ino, syscall.O_RDONLY, &attr)
		if err != 0 {
			v.releaseHandle(ino, fh)
			err = syscall.EBADF
			return
		}
		h.Lock()
		v.UpdateLength(ino, &attr)
		h.flags = syscall.O_RDONLY
		h.reader = v.reader.Open(h.inode, attr.Length)
		h.Unlock()
	}

	if off >= maxFileSize || off+uint64(size) >= maxFileSize {
		err = syscall.EFBIG
		return
	}
	if h.reader == nil {
		err = syscall.EBADF
		return
	}

	// there could be read operation for write-only if kernel writeback is enabled
	if v.Conf.FuseOpts != nil && !v.Conf.FuseOpts.EnableWriteback && !hasReadPerm(h.flags) {
		err = syscall.EBADF
		return
	}
	if !h.Rlock(ctx) {
		err = syscall.EINTR
		return
	}
	defer h.Runlock()

	_ = v.writer.Flush(ctx, ino)
	n, err = h.reader.Read(ctx, off, buf)
	for err == syscall.EAGAIN {
		n, err = h.reader.Read(ctx, off, buf)
	}
	if err == syscall.ENOENT {
		err = syscall.EBADF
	}
	h.removeOp(ctx)
	return
}

func (v *VFS) Write(ctx Context, ino Ino, buf []byte, off, fh uint64) (err syscall.Errno) {
	size := uint64(len(buf))
	if ino == controlInode && runtime.GOOS == "darwin" {
		fh = v.getControlHandle(ctx.Pid())
	}
	defer func() { logit(ctx, "write", err, "(%d,%d,%d,%d)", ino, size, off, fh) }()
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	if off >= maxFileSize || off+size >= maxFileSize {
		err = syscall.EFBIG
		return
	}

	if ino == controlInode {
		h.Lock()
		defer h.Unlock()
		h.pending = append(h.pending, buf...)
		rb := utils.ReadBuffer(h.pending)
		cmd := rb.Get32()
		size := int(rb.Get32())
		if rb.Left() < size {
			logger.Debugf("message not complete: %d %d > %d", cmd, size, rb.Left())
			return
		}
		h.data = append(h.data, h.pending...)
		h.pending = h.pending[:0]
		if rb.Left() == size {
			h.bctx = meta.NewContext(ctx.Pid(), ctx.Uid(), ctx.Gids())
			go v.handleInternalMsg(h.bctx, cmd, rb, h)
		} else {
			logger.Warnf("broken message: %d %d < %d", cmd, size, rb.Left())
			h.data = append(h.data, uint8(syscall.EIO&0xff))
		}
		return
	}

	if h.writer == nil {
		err = syscall.EBADF
		return
	}

	if !h.Wlock(ctx) {
		err = syscall.EINTR
		return
	}
	defer h.Wunlock()

	err = h.writer.Write(ctx, off, buf)
	if err == syscall.ENOENT || err == syscall.EPERM || err == syscall.EINVAL {
		err = syscall.EBADF
	}
	h.removeOp(ctx)

	if err == 0 {
		writtenSizeHistogram.Observe(float64(len(buf)))
		v.reader.Invalidate(ino, off, size)
		v.invalidateAttr(ino)
	}
	return
}

func (v *VFS) Fallocate(ctx Context, ino Ino, mode uint8, off, size int64, fh uint64) (err syscall.Errno) {
	defer func() { logit(ctx, "fallocate", err, "(%d,%d,%d,%d)", ino, mode, off, size) }()
	if off < 0 || size <= 0 {
		err = syscall.EINVAL
		return
	}
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	if off >= maxFileSize || off+size >= maxFileSize {
		err = syscall.EFBIG
		return
	}
	if h.writer == nil {
		err = syscall.EBADF
		return
	}
	if !h.Wlock(ctx) {
		err = syscall.EINTR
		return
	}
	defer h.Wunlock()
	defer h.removeOp(ctx)

	err = v.writer.Flush(ctx, ino)
	if err != 0 {
		return
	}
	var length uint64
	err = v.Meta.Fallocate(ctx, ino, mode, uint64(off), uint64(size), &length)
	if err == 0 {
		v.writer.Truncate(ino, length)
		s := size
		if off+size > int64(length) {
			s = int64(length) - off
		}
		if s > 0 {
			v.reader.Invalidate(ino, uint64(off), uint64(s))
		}
		v.invalidateAttr(ino)
	}
	return
}

func (v *VFS) CopyFileRange(ctx Context, nodeIn Ino, fhIn, offIn uint64, nodeOut Ino, fhOut, offOut, size uint64, flags uint32) (copied uint64, err syscall.Errno) {
	defer func() {
		logit(ctx, "copy_file_range", err, "(%d,%d,%d,%d,%d,%d)", nodeIn, offIn, nodeOut, offOut, size, flags)
	}()
	if IsSpecialNode(nodeIn) {
		err = syscall.ENOTSUP
		return
	}
	if IsSpecialNode(nodeOut) {
		err = syscall.EPERM
		return
	}
	hi := v.findHandle(nodeIn, fhIn)
	if fhIn == 0 || hi == nil || hi.inode != nodeIn {
		err = syscall.EBADF
		return
	}
	ho := v.findHandle(nodeOut, fhOut)
	if fhOut == 0 || ho == nil || ho.inode != nodeOut {
		err = syscall.EBADF
		return
	}
	if hi.reader == nil {
		err = syscall.EBADF
		return
	}
	if ho.writer == nil {
		err = syscall.EACCES
		return
	}
	if offIn >= maxFileSize || offIn+size >= maxFileSize || offOut >= maxFileSize || offOut+size >= maxFileSize {
		err = syscall.EFBIG
		return
	}
	if flags != 0 {
		err = syscall.EINVAL
		return
	}
	if nodeIn == nodeOut && (offIn <= offOut && offOut < offIn+size || offOut <= offIn && offIn < offOut+size) {
		err = syscall.EINVAL // overlap
		return
	}

	if !ho.Wlock(ctx) {
		err = syscall.EINTR
		return
	}
	defer ho.Wunlock()
	defer ho.removeOp(ctx)
	if nodeIn != nodeOut {
		if !hi.Rlock(ctx) {
			err = syscall.EINTR
			return
		}
		defer hi.Runlock()
		defer hi.removeOp(ctx)
	}

	err = v.writer.Flush(ctx, nodeIn)
	if err != 0 {
		return
	}
	err = v.writer.Flush(ctx, nodeOut)
	if err != 0 {
		return
	}
	var length uint64
	err = v.Meta.CopyFileRange(ctx, nodeIn, offIn, nodeOut, offOut, size, flags, &copied, &length)
	if err == 0 {
		v.writer.Truncate(nodeOut, length)
		v.reader.Invalidate(nodeOut, offOut, size)
		v.invalidateAttr(nodeOut)
	}
	return
}

func (v *VFS) Flush(ctx Context, ino Ino, fh uint64, lockOwner uint64) (err syscall.Errno) {
	if ino == controlInode && runtime.GOOS == "darwin" {
		fh = v.getControlHandle(ctx.Pid())
		defer v.releaseControlHandle(ctx.Pid())
	}
	defer func() { logit(ctx, "flush", err, "(%d,%d,%016X)", ino, fh, lockOwner) }()
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	if IsSpecialNode(ino) {
		if ino == controlInode && h.bctx != nil {
			h.bctx.Cancel()
		}
		return
	}

	if h.writer != nil {
		for !h.Wlock(ctx) {
			h.cancelOp(ctx.Pid())
		}

		err = h.writer.Flush(ctx)
		if err == syscall.ENOENT || err == syscall.EPERM || err == syscall.EINVAL {
			err = syscall.EBADF
		}
		h.removeOp(ctx)
		h.Wunlock()
	} else if h.reader != nil {
		h.cancelOp(ctx.Pid())
	}

	h.Lock()
	locks := h.locks
	if lockOwner == h.ofdOwner {
		h.ofdOwner = 0
	}
	h.Unlock()
	if locks&2 != 0 {
		_ = v.Meta.Setlk(ctx, ino, lockOwner, false, F_UNLCK, 0, 0x7FFFFFFFFFFFFFFF, 0)
	}
	return
}

func (v *VFS) Fsync(ctx Context, ino Ino, datasync int, fh uint64) (err syscall.Errno) {
	defer func() { logit(ctx, "fsync", err, "(%d,%d)", ino, datasync) }()
	if IsSpecialNode(ino) {
		return
	}
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	if h.writer != nil {
		if !h.Wlock(ctx) {
			return syscall.EINTR
		}
		defer h.Wunlock()
		defer h.removeOp(ctx)

		err = h.writer.Flush(ctx)
		if err == syscall.ENOENT || err == syscall.EPERM || err == syscall.EINVAL {
			err = syscall.EBADF
		}
	}
	return
}

const (
	xattrMaxName = 255
	xattrMaxSize = 65536
)

var macSupportFlags = meta.XattrCreateOrReplace | meta.XattrCreate | meta.XattrReplace

const (
	_SECURITY_CAPABILITY  = "security.capability"
	_SECURITY_SELINUX     = "security.selinux"
	_SECURITY_ACL         = "system.posix_acl_access"
	_SECURITY_ACL_DEFAULT = "system.posix_acl_default"
)

func isXattrEnabled(conf *Config, name string) bool {
	switch name {
	case _SECURITY_CAPABILITY:
		return conf.Security != nil && conf.Security.EnableCap
	case _SECURITY_SELINUX:
		return conf.Security != nil && conf.Security.EnableSELinux
	case _SECURITY_ACL, _SECURITY_ACL_DEFAULT:
		return conf.Format.EnableACL
	}
	return true
}

func (v *VFS) SetXattr(ctx Context, ino Ino, name string, value []byte, flags uint32) (err syscall.Errno) {
	defer func() { logit(ctx, "setxattr", err, "(%d,%s,%d,%d)", ino, name, len(value), flags) }()
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	if len(value) > xattrMaxSize {
		if runtime.GOOS == "darwin" {
			err = syscall.E2BIG
		} else {
			err = syscall.ERANGE
		}
		return
	}
	if len(name) > xattrMaxName {
		if runtime.GOOS == "darwin" {
			err = syscall.EPERM
		} else {
			err = syscall.ERANGE
		}
		return
	}
	if len(name) == 0 {
		err = syscall.EINVAL
		return
	}

	if !isXattrEnabled(v.Conf, name) {
		err = syscall.ENOTSUP
		return
	}

	if typ, ok := aclTypes[name]; ok {
		var rule *acl.Rule
		rule, err = decodeACL(value)
		if err != 0 {
			return
		}
		err = v.Meta.SetFacl(ctx, ino, typ, rule)
		v.invalidateAttr(ino)
	} else {
		// only retain supported flags
		if runtime.GOOS == "darwin" {
			flags &= uint32(macSupportFlags)
		}
		err = v.Meta.SetXattr(ctx, ino, name, value, flags)
	}
	return
}

func (v *VFS) GetXattr(ctx Context, ino Ino, name string, size uint32) (value []byte, err syscall.Errno) {
	if !isXattrEnabled(v.Conf, name) {
		err = syscall.ENODATA
		return
	}

	defer func() { logit(ctx, "getxattr", err, "(%d,%s,%d): (%d)", ino, name, size, len(value)) }()
	if IsSpecialNode(ino) {
		err = meta.ENOATTR
		return
	}
	if len(name) > xattrMaxName {
		if runtime.GOOS == "darwin" {
			err = syscall.EPERM
		} else {
			err = syscall.ERANGE
		}
		return
	}
	if len(name) == 0 {
		err = syscall.EINVAL
		return
	}

	if typ, ok := aclTypes[name]; ok {
		rule := &acl.Rule{}
		if err = v.Meta.GetFacl(ctx, ino, typ, rule); err != 0 {
			return nil, err
		}
		value = encodeACL(rule)
	} else {
		err = v.Meta.GetXattr(ctx, ino, name, &value)
	}
	if size > 0 && len(value) > int(size) {
		err = syscall.ERANGE
	}
	return
}

func (v *VFS) ListXattr(ctx Context, ino Ino, size int) (data []byte, err syscall.Errno) {
	defer func() { logit(ctx, "listxattr", err, "(%d,%d): (%d)", ino, size, len(data)) }()
	if IsSpecialNode(ino) {
		err = meta.ENOATTR
		return
	}
	err = v.Meta.ListXattr(ctx, ino, &data)
	if size > 0 && len(data) > size {
		err = syscall.ERANGE
	}
	return
}

func (v *VFS) RemoveXattr(ctx Context, ino Ino, name string) (err syscall.Errno) {
	defer func() { logit(ctx, "removexattr", err, "(%d,%s)", ino, name) }()
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	if len(name) > xattrMaxName {
		if runtime.GOOS == "darwin" {
			err = syscall.EPERM
		} else {
			err = syscall.ERANGE
		}
		return
	}
	if len(name) == 0 {
		err = syscall.EINVAL
		return
	}

	if !isXattrEnabled(v.Conf, name) {
		err = syscall.ENOTSUP
		return
	}

	if typ, ok := aclTypes[name]; ok {
		err = v.Meta.SetFacl(ctx, ino, typ, acl.EmptyRule())
	} else {
		err = v.Meta.RemoveXattr(ctx, ino, name)
	}

	return
}

var logger = utils.GetLogger("juicefs")

type VFS struct {
	Conf            *Config
	Meta            meta.Meta
	Store           chunk.ChunkStore
	InvalidateEntry func(parent meta.Ino, name string) syscall.Errno
	UpdateFormat    func(*meta.Format)
	reader          DataReader
	writer          DataWriter
	cacheFiller     *CacheFiller

	handles   map[Ino][]*handle
	handleIno map[uint64]Ino
	hanleM    sync.Mutex
	nextfh    uint64

	modM       sync.Mutex
	modifiedAt map[Ino]time.Time

	registry *prometheus.Registry
}

func NewVFS(conf *Config, m meta.Meta, store chunk.ChunkStore, registerer prometheus.Registerer, registry *prometheus.Registry) *VFS {
	reader := NewDataReader(conf, m, store)
	writer := NewDataWriter(conf, m, store, reader)

	v := &VFS{
		Conf:        conf,
		Meta:        m,
		Store:       store,
		reader:      reader,
		writer:      writer,
		cacheFiller: NewCacheFiller(conf, m, store),
		handles:     make(map[Ino][]*handle),
		handleIno:   make(map[uint64]Ino),
		modifiedAt:  make(map[meta.Ino]time.Time),
		nextfh:      1,
		registry:    registry,
	}

	n := getInternalNode(ConfigInode)
	v.Conf.Format.RemoveSecret()
	data, _ := json.MarshalIndent(v.Conf, "", " ")
	n.attr.Length = uint64(len(data))
	if conf.Meta.Subdir != "" { // don't show trash directory
		internalNodes = internalNodes[:len(internalNodes)-1]
	}
	if conf.PrefixInternal {
		for _, n := range internalNodes {
			n.name = ".jfs" + n.name
		}
		meta.TrashName = ".jfs" + meta.TrashName
	}

	statePath := os.Getenv("_FUSE_STATE_PATH")
	if statePath == "" {
		statePath = fmt.Sprintf("/tmp/state%d.json", os.Getppid())
	}
	if err := v.loadAllHandles(statePath); err != nil && !os.IsNotExist(err) {
		logger.Errorf("load state from %s: %s", statePath, err)
	}
	_ = os.Rename(statePath, statePath+".bak")

	go v.cleanupModified()
	initVFSMetrics(v, writer, reader, registerer)
	return v
}

func (v *VFS) invalidateAttr(ino Ino) {
	v.modM.Lock()
	v.modifiedAt[ino] = time.Now()
	v.modM.Unlock()
}

func (v *VFS) ModifiedSince(ino Ino, start time.Time) bool {
	v.modM.Lock()
	t, ok := v.modifiedAt[ino]
	v.modM.Unlock()
	return ok && t.After(start)
}

func (v *VFS) cleanupModified() {
	for {
		v.modM.Lock()
		expire := time.Now().Add(time.Second * -30)
		var cnt, deleted int
		for i, t := range v.modifiedAt {
			if t.Before(expire) {
				delete(v.modifiedAt, i)
				deleted++
			}
			cnt++
			if cnt > 1000 {
				break
			}
		}
		v.modM.Unlock()
		time.Sleep(time.Millisecond * time.Duration(1000*(cnt+1-deleted*2)/(cnt+1)))
	}
}

func (v *VFS) FlushAll(path string) (err error) {
	now := time.Now()
	defer func() {
		logger.Infof("flush buffered data in %s: %v", time.Since(now), err)
	}()
	err = v.writer.FlushAll()
	if err != nil {
		return err
	}
	if path == "" {
		return nil
	}
	return v.dumpAllHandles(path)
}

func initVFSMetrics(v *VFS, writer DataWriter, reader DataReader, registerer prometheus.Registerer) {
	if registerer == nil {
		return
	}
	handlersGause := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "fuse_open_handlers",
		Help: "number of open files and directories.",
	}, func() float64 {
		v.hanleM.Lock()
		defer v.hanleM.Unlock()
		return float64(len(v.handles))
	})
	_ = registerer.Register(handlersGause)
	InitMemoryBufferMetrics(writer, reader, registerer)
}

func InitMemoryBufferMetrics(writer DataWriter, reader DataReader, registerer prometheus.Registerer) {
	usedBufferSize := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "used_buffer_size_bytes",
		Help: "size of currently used buffer.",
	}, func() float64 {
		if dw, ok := writer.(*dataWriter); ok {
			return float64(dw.usedBufferSize())
		}
		return 0.0
	})
	storeCacheSize := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "store_cache_size_bytes",
		Help: "size of store cache.",
	}, func() float64 {
		if dw, ok := writer.(*dataWriter); ok {
			return float64(dw.store.UsedMemory())
		}
		return 0.0
	})
	readBufferMetric := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
		Name: "used_read_buffer_size_bytes",
		Help: "size of currently used buffer for read",
	}, func() float64 {
		if dr, ok := reader.(*dataReader); ok {
			return float64(dr.readBufferUsed())
		}
		return 0.0
	})
	_ = registerer.Register(usedBufferSize)
	_ = registerer.Register(storeCacheSize)
	_ = registerer.Register(readBufferMetric)
}

func InitMetrics(registerer prometheus.Registerer) {
	if registerer == nil {
		return
	}
	registerer.MustRegister(readSizeHistogram)
	registerer.MustRegister(writtenSizeHistogram)
	registerer.MustRegister(opsDurationsHistogram)
	registerer.MustRegister(opsTotal)
	registerer.MustRegister(opsDurations)
	registerer.MustRegister(opsIOErrors)
	registerer.MustRegister(compactSizeHistogram)
}

// Linux ACL format:
//
//	version:8 (2)
//	flags:8 (0)
//	filler:16
//	N * [ tag:16 perm:16 id:32 ]
//	tag:
//	  01 - user
//	  02 - named user
//	  04 - group
//	  08 - named group
//	  10 - mask
//	  20 - other

func encodeACL(n *acl.Rule) []byte {
	length := 4 + 24 + uint32(len(n.NamedUsers)+len(n.NamedGroups))*8
	if n.Mask != 0xFFFF {
		length += 8
	}
	buff := make([]byte, length)
	w := utils.NewNativeBuffer(buff)
	w.Put8(acl.Version) // version
	w.Put8(0)           // flag
	w.Put16(0)          // filler
	wRule := func(tag, perm uint16, id uint32) {
		w.Put16(tag)
		w.Put16(perm)
		w.Put32(id)
	}
	wRule(1, n.Owner, 0xFFFFFFFF)
	for _, rule := range n.NamedUsers {
		wRule(2, rule.Perm, rule.Id)
	}
	wRule(4, n.Group, 0xFFFFFFFF)
	for _, rule := range n.NamedGroups {
		wRule(8, rule.Perm, rule.Id)
	}
	if n.Mask != 0xFFFF {
		wRule(0x10, n.Mask, 0xFFFFFFFF)
	}
	wRule(0x20, n.Other, 0xFFFFFFFF)
	return buff
}

func decodeACL(buff []byte) (*acl.Rule, syscall.Errno) {
	length := len(buff)
	if length < 4 || ((length % 8) != 4) || buff[0] != acl.Version {
		return nil, syscall.EINVAL
	}

	n := acl.EmptyRule()
	r := utils.NewNativeBuffer(buff[4:])
	for r.HasMore() {
		tag := r.Get16()
		perm := r.Get16()
		id := r.Get32()
		switch tag {
		case 1:
			if n.Owner != 0xFFFF {
				return nil, syscall.EINVAL
			}
			n.Owner = perm
		case 2:
			n.NamedUsers = append(n.NamedUsers, acl.Entry{Id: id, Perm: perm})
		case 4:
			if n.Group != 0xFFFF {
				return nil, syscall.EINVAL
			}
			n.Group = perm
		case 8:
			n.NamedGroups = append(n.NamedGroups, acl.Entry{Id: id, Perm: perm})
		case 0x10:
			if n.Mask != 0xFFFF {
				return nil, syscall.EINVAL
			}
			n.Mask = perm
		case 0x20:
			if n.Other != 0xFFFF {
				return nil, syscall.EINVAL
			}
			n.Other = perm
		}
	}
	if n.Mask == 0xFFFF && len(n.NamedUsers)+len(n.NamedGroups) > 0 {
		return nil, syscall.EINVAL
	}
	return n, 0
}

var aclTypes = map[string]uint8{
	_SECURITY_ACL:         acl.TypeAccess,
	_SECURITY_ACL_DEFAULT: acl.TypeDefault,
}


================================================
FILE: pkg/vfs/vfs_test.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"encoding/binary"
	"encoding/json"
	"fmt"
	"log"
	"math/rand"
	"reflect"
	"slices"
	"strings"
	"syscall"
	"testing"
	"time"

	"github.com/google/uuid"
	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/stretchr/testify/require"
	"golang.org/x/sys/unix"
)

// nolint:errcheck

func createTestVFS(applyMetaConfOption func(metaConfig *meta.Config), metaUri string) (*VFS, object.ObjectStorage) {
	mp := "/jfs"
	metaConf := meta.DefaultConf()
	metaConf.MountPoint = mp
	if applyMetaConfOption != nil {
		applyMetaConfOption(metaConf)
	}
	if metaUri == "" {
		metaUri = "memkv://"
	}
	m := meta.NewClient(metaUri, metaConf)
	format := &meta.Format{
		Name:        "test",
		UUID:        uuid.New().String(),
		Storage:     "mem",
		BlockSize:   4096,
		Compression: "lz4",
		DirStats:    true,
	}
	err := m.Init(format, true)
	if err != nil {
		log.Fatalf("setting: %s", err)
	}
	conf := &Config{
		Meta:    metaConf,
		Format:  *format,
		Version: "Juicefs",
		Chunk: &chunk.Config{
			BlockSize:   format.BlockSize * 1024,
			Compress:    format.Compression,
			MaxUpload:   2,
			MaxDownload: 200,
			BufferSize:  30 << 20,
			CacheSize:   10 << 20,
			CacheDir:    "memory",
		},
		FuseOpts: &FuseOptions{},
	}
	blob, _ := object.CreateStorage("mem", "", "", "", "")
	registry := prometheus.NewRegistry() // replace default so only JuiceFS metrics are exposed
	registerer := prometheus.WrapRegistererWithPrefix("juicefs_",
		prometheus.WrapRegistererWith(prometheus.Labels{"mp": mp, "vol_name": format.Name}, registry))
	store := chunk.NewCachedStore(blob, *conf.Chunk, registry)
	return NewVFS(conf, m, store, registerer, registry), blob
}

func TestVFSBasic(t *testing.T) {
	v, _ := createTestVFS(nil, "")
	ctx := NewLogContext(meta.NewContext(10, 1, []uint32{2, 3}))

	if st, e := v.StatFS(ctx, 1); e != 0 {
		t.Fatalf("statfs 1: %s", e)
	} else if st.Total-st.Avail != 0 {
		t.Fatalf("used: %d", st.Total-st.Avail)
	}

	// dirs
	de, e := v.Mkdir(ctx, 1, "d1", 0755, 0)
	if e != 0 {
		t.Fatalf("mkdir d1: %s", e)
	}
	if _, e := v.Mkdir(ctx, de.Inode, "d2", 0755, 0); e != 0 {
		t.Fatalf("mkdir d1/d2: %s", e)
	}
	if e := v.Rmdir(ctx, 1, "d1"); e != syscall.ENOTEMPTY {
		t.Fatalf("rmdir not empty: %s", e)
	}
	if e := v.Rmdir(ctx, de.Inode, "d2"); e != 0 {
		t.Fatalf("rmdir d1/d2: %s", e)
	}

	// files
	fe, e := v.Mknod(ctx, de.Inode, "f1", 0644|syscall.S_IFREG, 0, 0)
	if e != 0 {
		t.Fatalf("mknod d1/f1: %s", e)
	}
	if e := v.Access(ctx, fe.Inode, unix.X_OK); e != syscall.EACCES {
		t.Fatalf("access d1/f1: %s", e)
	}
	if _, e := v.SetAttr(ctx, fe.Inode, meta.SetAttrMtimeNow|meta.SetAttrAtimeNow, 0, 0, 0, 0, 0, 0, 0, 0, 0); e != 0 {
		t.Fatalf("setattr d1/f2 mtimeNow: %s", e)
	}
	if fe2, e := v.SetAttr(ctx, fe.Inode, meta.SetAttrMode|meta.SetAttrUID|meta.SetAttrGID|meta.SetAttrAtime|meta.SetAttrMtime|meta.SetAttrSize, 0, 0755, 1, 3, 1234, 1234, 5678, 5678, 1024); e != 0 {
		t.Fatalf("setattr d1/f1: %s %d %d", e, fe2.Attr.Gid, fe2.Attr.Length)
	} else if fe2.Attr.Mode != 0755 || fe2.Attr.Uid != 1 || fe2.Attr.Gid != 3 || fe2.Attr.Atime != 1234 || fe2.Attr.Atimensec != 5678 || fe2.Attr.Mtime != 1234 || fe2.Attr.Mtimensec != 5678 || fe2.Attr.Length != 1024 {
		t.Fatalf("setattr d1/f1: %+v", fe2.Attr)
	}
	if e := v.Access(ctx, fe.Inode, unix.X_OK); e != 0 {
		t.Fatalf("access d1/f1: %s", e)
	}
	if _, e := v.Link(ctx, fe.Inode, 1, "f2"); e != 0 {
		t.Fatalf("link f2->f1: %s", e)
	}
	if fe, e := v.GetAttr(ctx, fe.Inode, 0); e != 0 || fe.Attr.Nlink != 2 {
		t.Fatalf("getattr d1/f2: %s %d", e, fe.Attr.Nlink)
	}
	if e := v.Unlink(ctx, de.Inode, "f1"); e != 0 {
		t.Fatalf("unlink d1/f1: %s", e)
	}
	if fe, e := v.Lookup(ctx, 1, "f2"); e != 0 || fe.Attr.Nlink != 1 {
		t.Fatalf("lookup f2: %s", e)
	}
	if e := v.Rename(ctx, 1, "f2", 1, "f3", 0); e != 0 {
		t.Fatalf("rename f2 -> f3: %s", e)
	}
	if fe, fh, e := v.Open(ctx, fe.Inode, syscall.O_RDONLY); e != 0 {
		t.Fatalf("open f3: %s", e)
	} else if e := v.Flush(ctx, fe.Inode, fh, 0); e != 0 {
		t.Fatalf("close f3: %s", e)
	} else {
		v.Release(ctx, fe.Inode, fh)
	}

	// symlink
	if fe, e := v.Symlink(ctx, "f2", 1, "sym"); e != 0 {
		t.Fatalf("symlink sym -> f2: %s", e)
	} else if target, e := v.Readlink(ctx, fe.Inode); e != 0 || string(target) != "f2" {
		t.Fatalf("readlink sym: %s %s", e, string(target))
	}

	// edge cases
	longName := strings.Repeat("a", 256)
	if _, e = v.Lookup(ctx, 1, longName); e != syscall.ENAMETOOLONG {
		t.Fatalf("lookup long name")
	}
	if _, _, e = v.Create(ctx, 1, longName, 0, 0, 0); e != syscall.ENAMETOOLONG {
		t.Fatalf("create long name")
	}
	if _, e = v.Mknod(ctx, 1, longName, 0, 0, 0); e != syscall.ENAMETOOLONG {
		t.Fatalf("mknod long name")
	}
	if _, e = v.Mkdir(ctx, 1, longName, 0, 0); e != syscall.ENAMETOOLONG {
		t.Fatalf("mkdir long name")
	}
	if _, e = v.Link(ctx, 2, 1, longName); e != syscall.ENAMETOOLONG {
		t.Fatalf("link long name")
	}
	if e = v.Unlink(ctx, 1, longName); e != syscall.ENAMETOOLONG {
		t.Fatalf("unlink long name")
	}
	if e = v.Rmdir(ctx, 1, longName); e != syscall.ENAMETOOLONG {
		t.Fatalf("rmdir long name")
	}
	if _, e = v.Symlink(ctx, "", 1, longName); e != syscall.ENAMETOOLONG {
		t.Fatalf("symlink long name")
	}
	if e = v.Rename(ctx, 1, "a", 1, longName, 0); e != syscall.ENAMETOOLONG {
		t.Fatalf("rename long name")
	}
	if e = v.Rename(ctx, 1, longName, 1, "a", 0); e != syscall.ENAMETOOLONG {
		t.Fatalf("rename long name")
	}

}

func TestVFSIO(t *testing.T) {
	v, _ := createTestVFS(nil, "")
	ctx := NewLogContext(meta.Background())
	fe, fh, e := v.Create(ctx, 1, "file", 0755, 0, syscall.O_RDWR)
	if e != 0 {
		t.Fatalf("create file: %s", e)
	}
	if e = v.Fallocate(ctx, fe.Inode, 0, 0, 64<<10, fh); e != 0 {
		t.Fatalf("fallocate : %s", e)
	}
	if e = v.Write(ctx, fe.Inode, []byte("hello"), 0, fh); e != 0 {
		t.Fatalf("write file: %s", e)
	}
	if e = v.Fsync(ctx, fe.Inode, 1, fh); e != 0 {
		t.Fatalf("fsync file: %s", e)
	}
	if e = v.Write(ctx, fe.Inode, []byte("hello"), 100<<20, fh); e != 0 {
		t.Fatalf("write file: %s", e)
	}
	var attr meta.Attr
	if e = v.Truncate(ctx, fe.Inode, (100<<20)+2, fh, &attr); e != 0 {
		t.Fatalf("truncate file: %s", e)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 10<<20, 10, 0); e != 0 || n != 10 {
		t.Fatalf("copyfilerange: %s %d", e, n)
	}
	var buf = make([]byte, 128<<10)
	if n, e := v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 {
		t.Fatalf("read file: %s", e)
	} else if n != len(buf) {
		t.Fatalf("short read file: %d != %d", n, len(buf))
	} else if string(buf[:5]) != "hello" {
		t.Fatalf("unexpected data: %q", string(buf[:5]))
	}
	if n, e := v.Read(ctx, fe.Inode, buf[:6], 10<<20, fh); e != 0 || n != 6 || string(buf[:n]) != "hello\x00" {
		t.Fatalf("read file end: %s %d %s", e, n, string(buf[:n]))
	}
	if n, e := v.Read(ctx, fe.Inode, buf, 100<<20, fh); e != 0 || n != 2 || string(buf[:n]) != "he" {
		t.Fatalf("read file end: %s %d %s", e, n, string(buf[:n]))
	}
	if e = v.Flush(ctx, fe.Inode, fh, 0); e != 0 {
		t.Fatalf("flush file: %s", e)
	}

	// edge cases
	_, fh2, _ := v.Open(ctx, fe.Inode, syscall.O_RDONLY)
	_, fh3, _ := v.Open(ctx, fe.Inode, syscall.O_WRONLY)
	wHandle := v.findHandle(fe.Inode, fh3)
	if wHandle == nil {
		t.Fatalf("failed to find O_WRONLY handle")
	}
	wHandle.reader = nil
	// read
	if _, e = v.Read(ctx, fe.Inode, nil, 0, 0); e != syscall.EBADF {
		t.Fatalf("read bad fd: %s", e)
	}
	if _, e = v.Read(ctx, fe.Inode, make([]byte, 1024), 0, fh3); e != syscall.EBADF {
		t.Fatalf("read write-only fd: %s", e)
	}
	if _, e = v.Read(ctx, fe.Inode, nil, 1<<60, fh2); e != syscall.EFBIG {
		t.Fatalf("read off too big: %s", e)
	}
	// write
	if e = v.Write(ctx, fe.Inode, nil, 0, 0); e != syscall.EBADF {
		t.Fatalf("write bad fd: %s", e)
	}
	if e = v.Write(ctx, fe.Inode, nil, 1<<60, fh2); e != syscall.EFBIG {
		t.Fatalf("write off too big: %s", e)
	}
	if e = v.Write(ctx, fe.Inode, make([]byte, 1024), 0, fh2); e != syscall.EBADF {
		t.Fatalf("write read-only fd: %s", e)
	}
	// truncate
	if e = v.Truncate(ctx, fe.Inode, -1, 0, &meta.Attr{}); e != syscall.EINVAL {
		t.Fatalf("truncate invalid off,length: %s", e)
	}
	if e = v.Truncate(ctx, fe.Inode, 1<<60, 0, &meta.Attr{}); e != syscall.EFBIG {
		t.Fatalf("truncate too large: %s", e)
	}
	// fallocate
	if e = v.Fallocate(ctx, fe.Inode, 0, -1, -1, fh); e != syscall.EINVAL {
		t.Fatalf("fallocate invalid off,length: %s", e)
	}
	if e = v.Fallocate(ctx, StatsInode, 0, 0, 1, fh); e != syscall.EPERM {
		t.Fatalf("fallocate invalid off,length: %s", e)
	}
	if e = v.Fallocate(ctx, fe.Inode, 0, 0, 100, 0); e != syscall.EBADF {
		t.Fatalf("fallocate invalid off,length: %s", e)
	}
	if e = v.Fallocate(ctx, fe.Inode, 0, 1<<60, 1<<60, fh); e != syscall.EFBIG {
		t.Fatalf("fallocate invalid off,length: %s", e)
	}
	if e = v.Fallocate(ctx, fe.Inode, 0, 1<<10, 1<<20, fh2); e != syscall.EBADF {
		t.Fatalf("fallocate read-only fd: %s", e)
	}

	// copy file range
	if n, e := v.CopyFileRange(ctx, StatsInode, fh, 0, fe.Inode, fh, 10<<20, 10, 0); e != syscall.ENOTSUP {
		t.Fatalf("copyfilerange internal file: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, StatsInode, fh, 10<<20, 10, 0); e != syscall.EPERM {
		t.Fatalf("copyfilerange internal file: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, 0, 0, fe.Inode, fh, 10<<20, 10, 0); e != syscall.EBADF {
		t.Fatalf("copyfilerange invalid fh: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, 0, 10<<20, 10, 0); e != syscall.EBADF {
		t.Fatalf("copyfilerange invalid fh: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 10<<20, 10, 1); e != syscall.EINVAL {
		t.Fatalf("copyfilerange invalid flag: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 10<<20, 1<<50, 0); e != syscall.EINVAL {
		t.Fatalf("copyfilerange overlap: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh, 1<<63, 1<<63, 0); e != syscall.EFBIG {
		t.Fatalf("copyfilerange too big file: %s %d", e, n)
	}
	if n, e := v.CopyFileRange(ctx, fe.Inode, fh, 0, fe.Inode, fh2, 1<<20, 1<<10, 0); e != syscall.EACCES {
		t.Fatalf("copyfilerange too big file: %s %d", e, n)
	}

	// sequntial write/read
	for i := uint64(0); i < 1001; i++ {
		if e := v.Write(ctx, fe.Inode, make([]byte, 128<<10), i*(128<<10), fh); e != 0 {
			t.Fatalf("write big file: %s", e)
		}
	}
	buf = make([]byte, 128<<10)
	for i := uint64(0); i < 1000; i++ {
		if n, e := v.Read(ctx, fe.Inode, buf, i*(128<<10), fh); e != 0 || n != (128<<10) {
			t.Fatalf("read big file: %s", e)
		} else {
			for j := 0; j < 128<<10; j++ {
				if buf[j] != 0 {
					t.Fatalf("read big file: %d %d", j, buf[j])
				}
			}
		}
	}
	// many small write
	buf = make([]byte, 5<<10)
	for j := range buf {
		buf[j] = 1
	}
	for i := int64(32 - 1); i >= 0; i-- {
		if e := v.Write(ctx, fe.Inode, buf, uint64(i)*(4<<10), fh); e != 0 {
			t.Fatalf("write big file: %s", e)
		}
	}
	time.Sleep(time.Millisecond * 1500) // wait for it to be flushed
	buf = make([]byte, 128<<10)
	if n, e := v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 || n != (128<<10) {
		t.Fatalf("read big file: %s", e)
	} else {
		for j := range buf {
			if buf[j] != 1 {
				t.Fatalf("read big file: %d %d", j, buf[j])
			}
		}
	}

	v.Release(ctx, fe.Inode, fh)
}

func TestVFSXattrs(t *testing.T) {
	v, _ := createTestVFS(nil, "")
	ctx := NewLogContext(meta.Background())
	fe, e := v.Mkdir(ctx, 1, "xattrs", 0755, 0)
	if e != 0 {
		t.Fatalf("mkdir xattrs: %s", e)
	}
	// normal cases
	if _, e := v.GetXattr(ctx, fe.Inode, "test", 0); e != meta.ENOATTR {
		t.Fatalf("getxattr not existed: %s", e)
	}
	if e := v.SetXattr(ctx, fe.Inode, "test", []byte("value"), 0); e != 0 {
		t.Fatalf("setxattr test: %s", e)
	}
	if e = v.SetXattr(ctx, fe.Inode, "test", []byte("v1"), meta.XattrCreate); e == 0 {
		t.Fatalf("setxattr test (create): %s", e)
	}
	if v, e := v.ListXattr(ctx, fe.Inode, 100); e != 0 || string(v) != "test\x00" {
		t.Fatalf("listxattr: %s %q", e, string(v))
	}
	if v, e := v.GetXattr(ctx, fe.Inode, "test", 5); e != 0 || string(v) != "value" {
		t.Fatalf("getxattr test: %s %v", e, v)
	}
	if e = v.SetXattr(ctx, fe.Inode, "test", []byte("v2"), meta.XattrReplace); e != 0 {
		t.Fatalf("setxattr test (replace): %s", e)
	}
	if v, e := v.GetXattr(ctx, fe.Inode, "test", 5); e != 0 || string(v) != "v2" {
		t.Fatalf("getxattr test: %s %v", e, v)
	}
	if _, e := v.GetXattr(ctx, fe.Inode, "test", 1); e != syscall.ERANGE {
		t.Fatalf("getxattr large value: %s", e)
	}
	if v, e := v.ListXattr(ctx, fe.Inode, 1); e != syscall.ERANGE {
		t.Fatalf("listxattr: %s %q", e, string(v))
	}
	if e := v.RemoveXattr(ctx, fe.Inode, "test"); e != 0 {
		t.Fatalf("removexattr test: %s", e)
	}
	if _, e := v.GetXattr(ctx, fe.Inode, "test", 0); e != meta.ENOATTR {
		t.Fatalf("getxattr not existed: %s", e)
	}
	if v, e := v.ListXattr(ctx, fe.Inode, 100); e != 0 || string(v) != "" {
		t.Fatalf("listxattr: %s %q", e, string(v))
	}
	// edge case
	if e = v.SetXattr(ctx, fe.Inode, "", []byte("v2"), 0); e != syscall.EINVAL {
		t.Fatalf("setxattr long key: %s", e)
	}
	if e = v.SetXattr(ctx, fe.Inode, strings.Repeat("test", 100), []byte("v2"), 0); e != syscall.EPERM && e != syscall.ERANGE {
		t.Fatalf("setxattr long key: %s", e)
	}
	if e = v.SetXattr(ctx, fe.Inode, "test", make([]byte, 1<<20), 0); e != syscall.E2BIG && e != syscall.ERANGE {
		t.Fatalf("setxattr long key: %s", e)
	}
	if e = v.SetXattr(ctx, fe.Inode, "system.posix_acl_access", []byte("v2"), 0); e != syscall.ENOTSUP {
		t.Fatalf("setxattr long key: %s", e)
	}
	if e = v.SetXattr(ctx, ConfigInode, "test", []byte("v2"), 0); e != syscall.EPERM {
		t.Fatalf("setxattr long key: %s", e)
	}
	if _, e := v.GetXattr(ctx, fe.Inode, "", 0); e != syscall.EINVAL {
		t.Fatalf("getxattr not existed: %s", e)
	}
	if _, e := v.GetXattr(ctx, fe.Inode, strings.Repeat("test", 100), 0); e == 0 {
		t.Fatalf("getxattr not existed: %s", e)
	}
	if _, e := v.GetXattr(ctx, ConfigInode, "test", 0); e != meta.ENOATTR {
		t.Fatalf("getxattr not existed: %s", e)
	}
	if _, e := v.GetXattr(ctx, fe.Inode, "system.posix_acl_access", 0); e != syscall.ENODATA {
		t.Fatalf("getxattr not existed: %s", e)
	}
	if v, e := v.ListXattr(ctx, ConfigInode, 0); e != meta.ENOATTR {
		t.Fatalf("listxattr: %s %q", e, string(v))
	}
	if e := v.RemoveXattr(ctx, fe.Inode, strings.Repeat("test", 100)); e != syscall.EPERM && e != syscall.ERANGE {
		t.Fatalf("removexattr test: %s", e)
	}
	if e := v.RemoveXattr(ctx, fe.Inode, ""); e != syscall.EINVAL {
		t.Fatalf("removexattr test: %s", e)
	}
	if e := v.RemoveXattr(ctx, fe.Inode, "system.posix_acl_access"); e != syscall.ENOTSUP {
		t.Fatalf("removexattr test: %s", e)
	}
	if e := v.RemoveXattr(ctx, ConfigInode, "test"); e != syscall.EPERM {
		t.Fatalf("removexattr test: %s", e)
	}
}

type accessCase struct {
	uid  uint32
	gid  uint32
	mode uint16
	r    syscall.Errno
}

func TestAccessMode(t *testing.T) {
	var attr = meta.Attr{
		Uid:  1,
		Gid:  2,
		Mode: 0751,
	}

	cases := []accessCase{
		{0, 0, MODE_MASK_R | MODE_MASK_W | MODE_MASK_X, 0},
		{1, 3, MODE_MASK_R | MODE_MASK_W | MODE_MASK_X, 0},
		{2, 2, MODE_MASK_R | MODE_MASK_X, 0},
		{2, 2, MODE_MASK_W, syscall.EACCES},
		{3, 4, MODE_MASK_X, 0},
		{3, 4, MODE_MASK_R, syscall.EACCES},
		{3, 4, MODE_MASK_W, syscall.EACCES},
	}
	for _, c := range cases {
		if e := accessTest(&attr, c.mode, c.uid, c.gid); e != c.r {
			t.Fatalf("expect %s on case %+v, but got %s", c.r, c, e)
		}
	}
}

func assertEqual(t *testing.T, a interface{}, b interface{}) {
	if reflect.DeepEqual(a, b) {
		return
	}
	message := fmt.Sprintf("%v != %v", a, b)
	t.Fatal(message)
}

func TestSetattrStr(t *testing.T) {
	assertEqual(t, setattrStr(0, 0, 0, 0, 0, 0, 0), "")
	assertEqual(t, setattrStr(meta.SetAttrMode, 01755, 0, 0, 0, 0, 0), "mode=?rwxr-xr-t:01755")
	assertEqual(t, setattrStr(meta.SetAttrUID, 0, 1, 0, 0, 0, 0), "uid=1")
	assertEqual(t, setattrStr(meta.SetAttrGID, 0, 1, 2, 0, 0, 0), "gid=2")
	assertEqual(t, setattrStr(meta.SetAttrAtime, 0, 0, 0, -2, -1, 0), "atime=NOW")
	assertEqual(t, setattrStr(meta.SetAttrAtime, 0, 0, 0, 123, 123, 0), "atime=123")
	assertEqual(t, setattrStr(meta.SetAttrAtimeNow, 0, 0, 0, 0, 0, 0), "atime=NOW")
	assertEqual(t, setattrStr(meta.SetAttrMtime, 0, 0, 0, 0, -1, 0), "mtime=NOW")
	assertEqual(t, setattrStr(meta.SetAttrMtime, 0, 0, 0, 0, 123, 0), "mtime=123")
	assertEqual(t, setattrStr(meta.SetAttrMtimeNow, 0, 0, 0, 0, 0, 0), "mtime=NOW")
	assertEqual(t, setattrStr(meta.SetAttrSize, 0, 0, 0, 0, 0, 123), "size=123")
	assertEqual(t, setattrStr(meta.SetAttrUID|meta.SetAttrGID, 0, 1, 2, 0, 0, 0), "uid=1,gid=2")
}

func TestVFSLocks(t *testing.T) {
	v, _ := createTestVFS(nil, "")
	ctx := NewLogContext(meta.Background())
	fe, fh, e := v.Create(ctx, 1, "flock", 0644, 0, syscall.O_RDWR)
	if e != 0 {
		t.Fatalf("create flock: %s", e)
	}
	// flock
	if e = v.Flock(ctx, fe.Inode, fh, 123, 100, true); e != syscall.EINVAL {
		t.Fatalf("flock wr: %s", e)
	}
	if e = v.Flock(ctx, fe.Inode, fh, 123, syscall.F_WRLCK, true); e != 0 {
		t.Fatalf("flock wr: %s", e)
	}
	if e := v.Flock(ctx, fe.Inode, fh, 456, syscall.F_RDLCK, false); e != syscall.EAGAIN {
		t.Fatalf("flock rd: should block")
	}

	done := make(chan bool)
	go func() {
		_ = v.Flock(ctx, fe.Inode, fh, 456, syscall.F_RDLCK, true)
		done <- true
	}()
	if e := v.Flock(ctx, fe.Inode, fh, 123, syscall.F_UNLCK, true); e != 0 {
		t.Fatalf("flock unlock: %s", e)
	}
	select {
	case <-done:
	case <-time.NewTimer(time.Millisecond * 100).C:
		t.Fatalf("flock timeout on rdlock")
	}
	if e := v.Flock(ctx, fe.Inode, fh, 456, syscall.F_UNLCK, true); e != 0 {
		t.Fatalf("flock unlock rd: %s", e)
	}

	// posix lock
	if e = v.Setlk(ctx, fe.Inode, fh, 1, 0, 100, 100, 1, true); e != syscall.EINVAL {
		t.Fatalf("setlk: %s", e)
	}
	if e = v.Setlk(ctx, fe.Inode, fh, 1, 0, 100, syscall.F_WRLCK, 1, true); e != 0 {
		t.Fatalf("setlk: %s", e)
	}
	var start, len uint64 = 10, 1000
	var typ, pid uint32 = syscall.LOCK_UN, 10
	if e = v.Getlk(ctx, fe.Inode, fh, 2, &start, &len, &typ, &pid); e != syscall.EINVAL {
		t.Fatalf("getlk: %s", e)
	}
	typ = syscall.F_RDLCK
	if e = v.Getlk(ctx, fe.Inode, fh, 2, &start, &len, &typ, &pid); e != 0 {
		t.Fatalf("getlk: %s", e)
	} else if start != 0 || len != 100 || typ != syscall.F_WRLCK || pid != 1 {
		t.Fatalf("getlk result: %d %d %d %d", start, len, typ, pid)
	}
	if e = v.Setlk(ctx, fe.Inode, fh, 2, 10, 100, syscall.F_RDLCK, 10, false); e != syscall.EAGAIN {
		t.Fatalf("setlk rd: %s", e)
	}
	go func() {
		_ = v.Setlk(ctx, fe.Inode, fh, 2, 10, 100, syscall.F_RDLCK, 10, false)
		done <- true
	}()
	if e = v.Setlk(ctx, fe.Inode, fh, 1, 10, 100, syscall.F_UNLCK, 1, true); e != 0 {
		t.Fatalf("setlk unlock: %s", e)
	}
	select {
	case <-done:
	case <-time.NewTimer(time.Millisecond * 100).C:
		t.Fatalf("setlk timeout on rdlock")
	}
	if e = v.Setlk(ctx, fe.Inode, fh, 2, 0, 20, syscall.F_RDLCK, 10, false); e != syscall.EAGAIN {
		t.Fatalf("setlk rd: %s", e)
	}
	if e = v.Setlk(ctx, fe.Inode, fh, 1, 0, 1000, syscall.F_UNLCK, 1, true); e != 0 {
		t.Fatalf("setlk unlock: %s", e)
	}
	if e = v.Flush(ctx, fe.Inode, fh, 0); e != 0 {
		t.Fatalf("flush: %s", e)
	}
	v.Release(ctx, fe.Inode, fh)
	// invalid fd
	if e = v.Flock(ctx, fe.Inode, 10, 123, syscall.F_WRLCK, true); e != syscall.EBADF {
		t.Fatalf("flock wr: %s", e)
	}
	if e = v.Setlk(ctx, fe.Inode, 10, 1, 0, 1000, syscall.F_UNLCK, 1, true); e != syscall.EBADF {
		t.Fatalf("setlk unlock: %s", e)
	}
	if e = v.Getlk(ctx, fe.Inode, 10, 2, &start, &len, &typ, &pid); e != syscall.EBADF {
		t.Fatalf("getlk: %s", e)
	}
	// internal file
	fe, _ = v.Lookup(ctx, 1, ".stats")
	if e = v.Flock(ctx, fe.Inode, 10, 123, syscall.F_WRLCK, true); e != syscall.EPERM {
		t.Fatalf("flock wr: %s", e)
	}
	if e = v.Setlk(ctx, fe.Inode, 10, 1, 0, 1000, syscall.F_UNLCK, 1, true); e != syscall.EPERM {
		t.Fatalf("setlk unlock: %s", e)
	}
	if e = v.Getlk(ctx, fe.Inode, 10, 2, &start, &len, &typ, &pid); e != syscall.EPERM {
		t.Fatalf("getlk: %s", e)
	}
}

func TestInternalFile(t *testing.T) {
	v, _ := createTestVFS(nil, "")
	ctx := NewLogContext(meta.Background())
	// list internal files
	fh, _ := v.Opendir(ctx, 1, 0)
	entries, _, e := v.Readdir(ctx, 1, 1024, 0, fh, true)
	if e != 0 {
		t.Fatalf("readdir 1: %s", e)
	}
	internalFiles := make(map[string]bool)
	for _, e := range entries {
		if IsSpecialName(string(e.Name)) && e.Attr.Typ == meta.TypeFile {
			internalFiles[string(e.Name)] = true
		}
	}
	if len(internalFiles) != 3 {
		t.Fatalf("there should be 3 internal files but got %d", len(internalFiles))
	}
	v.Releasedir(ctx, 1, fh)

	// .config
	ctx2 := NewLogContext(meta.NewContext(10, 111, []uint32{222}))
	fe, e := v.Lookup(ctx2, 1, ".config")
	if e != 0 {
		t.Fatalf("lookup .config: %s", e)
	}
	if e := v.Access(ctx2, fe.Inode, unix.R_OK); e != syscall.EACCES { // other user can't access .config
		t.Fatalf("access .config: %s", e)
	}
	if _, e := v.GetAttr(ctx, fe.Inode, 0); e != 0 {
		t.Fatalf("getattr .config: %s", e)
	}
	// ignore setattr on internal files
	if fe2, e := v.SetAttr(ctx, fe.Inode, meta.SetAttrUID, 0, 0, ctx2.Uid(), 0, 0, 0, 0, 0, 0); e != 0 || fe2.Attr.Uid != fe.Attr.Uid {
		t.Fatalf("can't setattr on internal files")
	}
	if e = v.Unlink(ctx, 1, ".config"); e != syscall.EPERM {
		t.Fatalf("should not unlink internal file")
	}
	if _, _, e = v.Open(ctx, fe.Inode, syscall.O_WRONLY); e != syscall.EACCES {
		t.Fatalf("write .config: %s", e)
	}
	_, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDONLY)
	if e != 0 {
		t.Fatalf("open .config: %s", e)
	}
	buf := make([]byte, 10240)
	if _, e := v.Read(ctx, fe.Inode, buf, 0, 0); e != syscall.EBADF {
		t.Fatalf("read .config: %s", e)
	}
	if n, e := v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 {
		t.Fatalf("read .config: %s", e)
	} else if !strings.Contains(string(buf[:n]), v.Conf.Format.UUID) {
		t.Fatalf("invalid config: %q", string(buf[:n]))
	}

	// .stats
	fe, e = v.Lookup(ctx, 1, ".stats")
	if e != 0 {
		t.Fatalf("lookup .stats: %s", e)
	}
	if e := v.Access(ctx, fe.Inode, unix.W_OK); e != 0 { // root can do everything
		t.Fatalf("access .stats: %s", e)
	}
	fe, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDONLY)
	if e != 0 {
		t.Fatalf("open .stats: %s", e)
	}
	defer v.Release(ctx, fe.Inode, fh)
	defer v.Flush(ctx, fe.Inode, fh, 0)
	buf = make([]byte, 128<<10)
	n, e := v.Read(ctx, fe.Inode, buf[:4<<10], 0, fh)
	if e != 0 {
		t.Fatalf("read .stats: %s", e)
	}
	if n == 4<<10 {
		if n2, e := v.Read(ctx, fe.Inode, buf[n:], uint64(n), fh); e != 0 {
			t.Fatalf("read .stats 2: %s", e)
		} else {
			n += n2
		}
	}
	if !strings.Contains(string(buf[:n]), "fuse_open_handlers") {
		t.Fatalf(".stats should contains `memory`, but got %s", string(buf[:n]))
	}
	if e = v.Truncate(ctx, fe.Inode, 0, 1, &meta.Attr{}); e != syscall.EPERM {
		t.Fatalf("truncate .config: %s", e)
	}

	// accesslog
	fe, e = v.Lookup(ctx, 1, ".accesslog")
	if e != 0 {
		t.Fatalf("lookup .accesslog: %s", e)
	}
	fe, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDONLY)
	if e != 0 {
		t.Fatalf("open .accesslog: %s", e)
	}
	if n, e = v.Read(ctx, fe.Inode, buf, 0, fh); e != 0 {
		t.Fatalf("read .accesslog: %s", e)
	} else if !strings.Contains(string(buf[:n]), "open (9223372032559808513") {
		t.Fatalf("invalid access log: %q", string(buf[:n]))
	}
	_ = v.Flush(ctx, fe.Inode, fh, 0)
	v.Release(ctx, fe.Inode, fh)

	// control messages
	fe, e = v.Lookup(ctx, 1, ".control")
	if e != 0 {
		t.Fatalf("lookup .control: %s", e)
	}
	fe, fh, e = v.Open(ctx, fe.Inode, syscall.O_RDWR)
	if e != 0 {
		t.Fatalf("open .stats: %s", e)
	}
	readControl := func(resp []byte, off *uint64) (int, syscall.Errno) {
		for {
			if n, errno := v.Read(ctx, fe.Inode, resp, *off, fh); n == 0 {
				time.Sleep(time.Millisecond * 200)
			} else if n%17 == 0 {
				*off += uint64(n)
				continue
			} else if n%17 == 1 {
				*off += uint64(n / 17 * 17)
				resp[0] = resp[n-1]
				return 1, errno
			} else {
				return n, errno
			}
		}
	}

	readData := func(resp []byte, fileOff *uint64) ([]byte, syscall.Errno) {
		var off uint64
		for {
			n, errno := v.Read(ctx, fe.Inode, resp, *fileOff, fh)
			if errno != 0 {
				return nil, errno
			}
			if n == 0 {
				time.Sleep(time.Millisecond * 200)
				continue
			}
			*fileOff += uint64(n)
			for {
				if n == 1 {
					return nil, syscall.Errno(resp[off])
				} else if off+17 <= uint64(n) && resp[off] == meta.CPROGRESS {
					off += 17
				} else if off+5 < uint64(n) && resp[off] == meta.CDATA {
					size := binary.BigEndian.Uint32(resp[off+1 : off+5])
					if off+5+uint64(size) > uint64(n) {
						logger.Errorf("Bad response off %d n %d: %v", off, n, resp)
						return nil, syscall.EIO
					}
					return resp[off+5 : off+5+uint64(size)], 0
				} else {
					logger.Errorf("Bad response off %d n %d: %v", off, n, resp)
					return nil, syscall.EIO
				}
			}
		}
	}

	// rmr
	buf = make([]byte, 4+4+8+1+4)
	w := utils.FromBuffer(buf)
	w.Put32(meta.Rmr)
	w.Put32(13)
	w.Put64(1)
	w.Put8(4)
	w.Put([]byte("file"))
	if e := v.Write(ctx, fe.Inode, w.Bytes(), 0, fh); e != 0 {
		t.Fatalf("write info: %s", e)
	}
	var off uint64 = uint64(len(buf))
	resp := make([]byte, 1024*10)
	if n, e := readControl(resp, &off); e != 0 || n != 1 {
		t.Fatalf("read result: %s %d", e, n)
	} else if resp[0] != byte(syscall.ENOENT) {
		t.Fatalf("rmr result: %s", string(buf[:n]))
	} else {
		off += uint64(n)
	}
	// legacy info
	buf = make([]byte, 4+4+8)
	w = utils.FromBuffer(buf)
	w.Put32(meta.LegacyInfo)
	w.Put32(8)
	w.Put64(1)
	if e := v.Write(ctx, fe.Inode, w.Bytes(), off, fh); e != 0 {
		t.Fatalf("write legacy info: %s", e)
	}
	off += uint64(len(buf))
	buf = make([]byte, 1024*10)
	if n, e = readControl(buf, &off); e != 0 {
		t.Fatalf("read result: %s %d", e, n)
	} else if !strings.Contains(string(buf[:n]), "dirs:") {
		t.Fatalf("legacy info result: %s", string(buf[:n]))
	} else {
		off += uint64(n)
	}
	// info v2
	buf = make([]byte, 4+4+8)
	w = utils.FromBuffer(buf)
	w.Put32(meta.InfoV2)
	w.Put32(8)
	w.Put64(1)
	if e := v.Write(ctx, fe.Inode, w.Bytes(), off, fh); e != 0 {
		t.Fatalf("write info v2: %s", e)
	}
	off += uint64(len(buf))
	buf = make([]byte, 1024*10)
	data, e := readData(buf, &off)
	if e != 0 {
		t.Fatalf("read progress bar: %s %d", e, n)
	}

	var infoResp InfoResponse
	if e := json.Unmarshal(data, &infoResp); e != nil {
		t.Fatalf("unmarshal info v2: %s", e)
	}
	if infoResp.Failed && infoResp.Reason != "" {
		t.Fatalf("info v2 result: %s", infoResp.Reason)
	}

	// fill
	buf = make([]byte, 4+4+8+1+1+2+1)
	w = utils.FromBuffer(buf)
	w.Put32(meta.FillCache)
	w.Put32(13)
	w.Put64(1)
	w.Put8(1)
	w.Put([]byte("/"))
	w.Put16(2)
	w.Put8(0)
	if e := v.Write(ctx, fe.Inode, w.Bytes()[:10], 0, fh); e != 0 {
		t.Fatalf("write fill 1: %s", e)
	}
	if e := v.Write(ctx, fe.Inode, w.Bytes()[10:], 0, fh); e != 0 {
		t.Fatalf("write fill 2: %s", e)
	}
	off += uint64(len(buf))
	resp = make([]byte, 1024*10)

	data, _ = json.Marshal(CacheResponse{Locations: make(map[string]uint64)})
	expectSize := 1 + 4 + len(data)
	if n, e = readControl(resp, &off); e != 0 || n != expectSize {
		t.Fatalf("read result: %s %d %d", e, n, expectSize)
	}

	off += uint64(n)

	// invalid msg
	buf = make([]byte, 4+4+2)
	w = utils.FromBuffer(buf)
	w.Put32(meta.Rmr)
	w.Put32(0)
	if e := v.Write(ctx, fe.Inode, buf, off, fh); e != 0 {
		t.Fatalf("write info: %s", e)
	}
	off += uint64(len(buf))
	resp = make([]byte, 1024)
	if n, e := v.Read(ctx, fe.Inode, resp, off, fh); e != 0 || n != 1 {
		t.Fatalf("read result: %s %d", e, n)
	} else if resp[0] != uint8(syscall.EIO) {
		t.Fatalf("result: %s", string(resp[:n]))
	}
}

func TestReaddirCache(t *testing.T) {
	engines := map[string]string{
		"kv":    "memkv://",
		"db":    "sqlite3://:memory:",
		"redis": "redis://127.0.0.1:6379/2",
	}
	for typ, metaUri := range engines {
		testReaddirCache(t, metaUri, typ, 20)
		testReaddirCache(t, metaUri, typ, 4096)
	}
}

func testReaddirCache(t *testing.T, metaUri string, typ string, batchNum int) {
	v, _ := createTestVFS(nil, metaUri)
	ctx := NewLogContext(meta.Background())

	old := meta.DirBatchNum
	meta.DirBatchNum[typ] = batchNum
	defer func() {
		meta.DirBatchNum = old
	}()

	entry, st := v.Mkdir(ctx, 1, "testdir", 0777, 022)
	if st != 0 {
		t.Fatalf("mkdir testdir: %s", st)
	}
	parent := entry.Inode
	for i := 0; i <= 100; i++ {
		_, _ = v.Mkdir(ctx, parent, fmt.Sprintf("d%03d", i), 0777, 022)
	}

	defer func() {
		for i := 0; i <= 120; i++ {
			_ = v.Rmdir(ctx, parent, fmt.Sprintf("d%03d", i))
		}
		_ = v.Rmdir(ctx, 1, "testdir")
	}()

	fh, _ := v.Opendir(ctx, parent, 0)
	defer v.Releasedir(ctx, parent, fh)
	initNum, num := 2, 20
	var files = make(map[string]bool)
	// read first 20
	entries, _, _ := v.Readdir(ctx, parent, 20, initNum, fh, true)
	for _, e := range entries[:num] {
		files[string(e.Name)] = true
	}

	off := num + initNum
	{
		entries, _, _ = v.Readdir(ctx, parent, 20, off, fh, true) // read next 20
		v.UpdateReaddirOffset(ctx, parent, fh, off+1)             // but readdir buffer is too full to return all entries
		name := fmt.Sprintf("d%03d", off+2)
		_ = v.Rmdir(ctx, parent, name)
		entries, _, _ = v.Readdir(ctx, parent, 20, off, fh, true) // should only get 19 entries
		for _, e := range entries {
			if string(e.Name) == name {
				t.Fatalf("dir %s should be deleted", name)
			}
		}
	}
	v.UpdateReaddirOffset(ctx, parent, fh, off)
	for i := 0; i < 100; i += 10 {
		name := fmt.Sprintf("d%03d", i)
		_ = v.Rmdir(ctx, parent, name)
		delete(files, name)
	}
	for i := 100; i < 110; i++ {
		_, _ = v.Mkdir(ctx, parent, fmt.Sprintf("d%03d", i), 0777, 022)
		_ = v.Rename(ctx, parent, fmt.Sprintf("d%03d", i), parent, fmt.Sprintf("d%03d", i+10), 0)
		delete(files, fmt.Sprintf("d%03d", i))
	}
	for {
		entries, _, _ := v.Readdir(ctx, parent, 20, off, fh, true)
		if len(entries) == 0 {
			break
		}
		if len(entries) > 20 {
			entries = entries[:20]
		}
		for _, e := range entries {
			if e.Inode > 0 {
				files[string(e.Name)] = true
			} else {
				t.Logf("invalid entry %s", e.Name)
			}
		}
		off += len(entries)
		v.UpdateReaddirOffset(ctx, parent, fh, off)
	}
	for i := 0; i < 100; i += 10 {
		name := fmt.Sprintf("d%03d", i)
		if _, ok := files[name]; ok {
			t.Fatalf("dir %s should be deleted", name)
		}
	}
	for i := 100; i < 110; i++ {
		name := fmt.Sprintf("d%03d", i)
		if _, ok := files[name]; ok {
			t.Fatalf("dir %s should be deleted", name)
		}
	}
	for i := 110; i < 120; i++ {
		name := fmt.Sprintf("d%03d", i)
		if _, ok := files[name]; !ok {
			t.Fatalf("dir %s should be added", name)
		}
	}
}

func TestVFSReadDirSort(t *testing.T) {
	for _, metaUri := range []string{"", "sqlite3://", "redis://127.0.0.1:6379/2"} {
		testVFSReadDirSort(t, metaUri)
	}
}

func testVFSReadDirSort(t *testing.T, metaUri string) {
	v, _ := createTestVFS(func(metaConfig *meta.Config) {
		metaConfig.SortDir = true
	}, metaUri)
	ctx := NewLogContext(meta.Background())
	entry, st := v.Mkdir(ctx, 1, "testdir", 0777, 022)
	if st != 0 {
		t.Fatalf("mkdir testdir: %s", st)
	}
	parent := entry.Inode
	for i := 0; i < 100; i++ {
		_, _ = v.Mkdir(ctx, parent, fmt.Sprintf("d%d", i), 0777, 022)
	}
	defer func() {
		for i := 0; i < 100; i++ {
			_ = v.Rmdir(ctx, parent, fmt.Sprintf("d%d", i))
		}
		_ = v.Rmdir(ctx, 1, "testdir")
	}()
	fh, _ := v.Opendir(ctx, parent, 0)
	entries1, _, _ := v.Readdir(ctx, parent, 60, 10, fh, true)
	sorted := slices.IsSortedFunc(entries1, func(i, j *meta.Entry) int {
		return strings.Compare(string(i.Name), string(j.Name))
	})
	if !sorted {
		t.Fatalf("read dir result should sorted")
	}
	v.Releasedir(ctx, parent, fh)

	fh2, _ := v.Opendir(ctx, parent, 0)
	entries2, _, _ := v.Readdir(ctx, parent, 60, 10, fh, true)
	for i := 0; i < len(entries1); i++ {
		if string(entries1[i].Name) != string(entries2[i].Name) {
			t.Fatalf("read dir result should be same")
		}
	}
	v.Releasedir(ctx, parent, fh2)
}

func testReaddirBatch(t *testing.T, metaUri string, typ string, batchNum int) {
	n, extra := 5, 40

	v, _ := createTestVFS(nil, metaUri)
	ctx := NewLogContext(meta.Background())

	old := meta.DirBatchNum
	meta.DirBatchNum[typ] = batchNum
	defer func() {
		meta.DirBatchNum = old
	}()

	entry, st := v.Mkdir(ctx, 1, "testdir", 0777, 022)
	if st != 0 {
		t.Fatalf("mkdir testdir: %s", st)
	}

	parent := entry.Inode
	for i := 0; i < n*batchNum+extra; i++ {
		_, _ = v.Mkdir(ctx, parent, fmt.Sprintf("d%d", i), 0777, 022)
	}
	defer func() {
		for i := 0; i < n*batchNum+extra; i++ {
			_ = v.Rmdir(ctx, parent, fmt.Sprintf("d%d", i))
		}
		v.Rmdir(ctx, 1, "testdir")
	}()

	fh, _ := v.Opendir(ctx, parent, 0)
	defer v.Releasedir(ctx, parent, fh)
	entries1, _, _ := v.Readdir(ctx, parent, 0, 0, fh, true)
	require.NotNil(t, entries1)
	require.Equal(t, 2+batchNum, len(entries1)) // init entries: "." and ".."

	entries2, _, _ := v.Readdir(ctx, parent, 0, 2, fh, true)
	require.NotNil(t, entries2)
	require.Equal(t, batchNum, len(entries2))

	entries3, _, _ := v.Readdir(ctx, parent, 0, 2+batchNum, fh, true)
	require.NotNil(t, entries3)
	require.Equal(t, batchNum, len(entries3))

	// reach the end
	entries4, _, _ := v.Readdir(ctx, parent, 0, n*batchNum+extra+2, fh, true)
	require.NotNil(t, entries4)
	require.Equal(t, 0, len(entries4))

	// skip-style readdir
	entries5, _, _ := v.Readdir(ctx, parent, 0, n*batchNum+2, fh, true)
	require.NotNil(t, entries5)
	require.Equal(t, extra, len(entries5))

	entries6, _, _ := v.Readdir(ctx, parent, 0, 2, fh, true)
	require.Equal(t, len(entries2), len(entries6))
	for i := 0; i < len(entries2); i++ {
		require.Equal(t, entries2[i].Inode, entries6[i].Inode)
	}

	// dir seak
	entries7, _, _ := v.Readdir(ctx, parent, 0, n*batchNum+2-20, fh, true)
	require.True(t, reflect.DeepEqual(entries5, entries7[20:]))
}

func TestReadDirBatch(t *testing.T) {
	engines := map[string]string{
		"kv":    "memkv://",
		"db":    "sqlite3://:memory:",
		"redis": "redis://127.0.0.1:6379/2",
	}
	for typ, metaUri := range engines {
		testReaddirBatch(t, metaUri, typ, 100)
		// testReaddirBatch(t, metaUri, typ, 4096)
	}
}

func TestReaddir(t *testing.T) {
	engines := map[string]string{
		"kv":    "memkv://",
		"db":    "sqlite3://:memory:",
		"redis": "redis://127.0.0.1:6379/2",
	}
	for typ, metaUri := range engines {
		batchNum := meta.DirBatchNum[typ]
		extra := rand.Intn(batchNum)
		testReaddir(t, metaUri, 20, 0)
		testReaddir(t, metaUri, 20, 5)
		testReaddir(t, metaUri, 2*batchNum, 0)
		testReaddir(t, metaUri, 2*batchNum, extra)
	}
}

func testReaddir(t *testing.T, metaUri string, dirNum int, offset int) {
	v, _ := createTestVFS(nil, metaUri)
	ctx := NewLogContext(meta.Background())

	entry, st := v.Mkdir(ctx, 1, "testdir", 0777, 022)
	if st != 0 {
		t.Fatalf("mkdir testdir: %s", st)
	}

	parent := entry.Inode
	for i := 0; i < dirNum; i++ {
		_, _ = v.Mkdir(ctx, parent, fmt.Sprintf("d%d", i), 0777, 022)
	}
	defer func() {
		for i := 0; i < dirNum; i++ {
			_ = v.Rmdir(ctx, parent, fmt.Sprintf("d%d", i))
		}
		v.Rmdir(ctx, 1, "testdir")
	}()

	fh, _ := v.Opendir(ctx, parent, 0)
	defer v.Releasedir(ctx, parent, fh)

	readAll := func(ctx Context, parent Ino, fh uint64, off int) []*meta.Entry {
		var entries []*meta.Entry
		for {
			ents, _, st := v.Readdir(ctx, parent, 0, off, fh, true)
			require.Equal(t, st, syscall.Errno(0))
			if len(ents) == 0 {
				break
			}
			off += len(ents)
			entries = append(entries, ents...)
		}
		return entries
	}

	entriesOne := readAll(ctx, parent, fh, offset)
	entriesTwo := readAll(ctx, parent, fh, offset)
	require.True(t, reflect.DeepEqual(entriesOne, entriesTwo))
}


================================================
FILE: pkg/vfs/vfs_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"fmt"
	"strconv"
	"strings"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"

	"golang.org/x/sys/unix"
)

const O_ACCMODE = syscall.O_ACCMODE
const F_UNLCK = syscall.F_UNLCK

type Statfs struct {
	Total  uint64
	Avail  uint64
	Files  uint64
	Favail uint64
}

func (v *VFS) StatFS(ctx Context, ino Ino) (st *Statfs, err syscall.Errno) {
	var totalspace, availspace, iused, iavail uint64
	_ = v.Meta.StatFS(ctx, ino, &totalspace, &availspace, &iused, &iavail)
	st = new(Statfs)
	st.Total = totalspace
	st.Avail = availspace
	st.Files = iused + iavail
	st.Favail = iavail
	logit(ctx, "statfs", err, "(%d): (%d,%d,%d,%d)", ino, totalspace-availspace, availspace, iused, iavail)
	return
}

func accessTest(attr *Attr, mmode uint16, uid uint32, gid uint32) syscall.Errno {
	if uid == 0 {
		return 0
	}
	mode := attr.Mode
	var effected uint16
	if uid == attr.Uid {
		effected = (mode >> 6) & 7
	} else {
		effected = mode & 7
		if gid == attr.Gid {
			effected = (mode >> 3) & 7
		}
	}
	if mmode&effected != mmode {
		return syscall.EACCES
	}
	return 0
}

func (v *VFS) Access(ctx Context, ino Ino, mask int) (err syscall.Errno) {
	defer func() { logit(ctx, "access", err, "(%d,0x%X)", ino, mask) }()
	var mmask uint16
	if mask&unix.R_OK != 0 {
		mmask |= MODE_MASK_R
	}
	if mask&unix.W_OK != 0 {
		mmask |= MODE_MASK_W
	}
	if mask&unix.X_OK != 0 {
		mmask |= MODE_MASK_X
	}
	if IsSpecialNode(ino) {
		node := getInternalNode(ino)
		if node != nil {
			err = accessTest(node.attr, mmask, ctx.Uid(), ctx.Gid())
			return
		}
	}

	err = v.Meta.Access(ctx, ino, uint8(mmask), nil)
	return
}

func setattrStr(set int, mode, uid, gid uint32, atime, mtime int64, size uint64) string {
	var sb strings.Builder
	if set&meta.SetAttrMode != 0 {
		sb.WriteString(fmt.Sprintf("mode=%s:0%04o,", smode(uint16(mode)), mode&07777))
	}
	if set&meta.SetAttrUID != 0 {
		sb.WriteString(fmt.Sprintf("uid=%d,", uid))
	}
	if set&meta.SetAttrGID != 0 {
		sb.WriteString(fmt.Sprintf("gid=%d,", gid))
	}

	var atimeStr string
	if set&meta.SetAttrAtimeNow != 0 || (set&meta.SetAttrAtime) != 0 && atime < 0 {
		atimeStr = "NOW"
	} else if set&meta.SetAttrAtime != 0 {
		atimeStr = strconv.FormatInt(atime, 10)
	}
	if atimeStr != "" {
		sb.WriteString("atime=" + atimeStr + ",")
	}

	var mtimeStr string
	if set&meta.SetAttrMtimeNow != 0 || (set&meta.SetAttrMtime) != 0 && mtime < 0 {
		mtimeStr = "NOW"
	} else if set&meta.SetAttrMtime != 0 {
		mtimeStr = strconv.FormatInt(mtime, 10)
	}
	if mtimeStr != "" {
		sb.WriteString("mtime=" + mtimeStr + ",")
	}

	if set&meta.SetAttrSize != 0 {
		sizeStr := strconv.FormatUint(size, 10)
		sb.WriteString("size=" + sizeStr + ",")
	}
	r := sb.String()
	if len(r) > 1 {
		r = r[:len(r)-1] // drop last ,
	}
	return r
}

func (v *VFS) SetAttr(ctx Context, ino Ino, set int, fh uint64, mode, uid, gid uint32, atime, mtime int64, atimensec, mtimensec uint32, size uint64) (entry *meta.Entry, err syscall.Errno) {
	str := setattrStr(set, mode, uid, gid, atime, mtime, size)
	defer func() {
		logit(ctx, "setattr", err, "(%d[%d],0x%X,[%s]):%s", ino, fh, set, str, (*Entry)(entry))
	}()
	if IsSpecialNode(ino) {
		n := getInternalNode(ino)
		if n != nil {
			entry = &meta.Entry{Inode: ino, Attr: n.attr}
		} else {
			err = syscall.EPERM
		}
		return
	}
	var attr = &Attr{}
	if set&meta.SetAttrSize != 0 {
		err = v.Truncate(ctx, ino, int64(size), fh, attr)
		if err != 0 {
			return
		}
		if (set &^ (meta.SetAttrSize | meta.SetAttrCtime | meta.SetAttrCtimeNow)) == 0 {
			v.UpdateLength(ino, attr)
			entry = &meta.Entry{Inode: ino, Attr: attr}
			return
		}
	}
	if set&meta.SetAttrMode != 0 {
		attr.Mode = uint16(mode & 07777)
	}
	if set&meta.SetAttrUID != 0 {
		attr.Uid = uid
	}
	if set&meta.SetAttrGID != 0 {
		attr.Gid = gid
	}
	if set&meta.SetAttrAtime != 0 {
		attr.Atime = atime
		attr.Atimensec = atimensec
	}
	if set&meta.SetAttrMtime != 0 {
		attr.Mtime = mtime
		attr.Mtimensec = mtimensec
	}
	if set&meta.SetAttrMtime != 0 || set&meta.SetAttrMtimeNow != 0 {
		if ctx.CheckPermission() {
			if err = v.Meta.CheckSetAttr(ctx, ino, uint16(set), *attr); err != 0 {
				return
			}
		}
		if set&meta.SetAttrMtime != 0 {
			v.writer.UpdateMtime(ino, time.Unix(mtime, int64(mtimensec)))
		}
		if set&meta.SetAttrMtimeNow != 0 {
			v.writer.UpdateMtime(ino, time.Now())
		}
	}

	err = v.Meta.SetAttr(ctx, ino, uint16(set), 0, attr)
	if err == 0 {
		v.UpdateLength(ino, attr)
		entry = &meta.Entry{Inode: ino, Attr: attr}
	}
	return
}

type lockType uint32

func (l lockType) String() string {
	switch l {
	case syscall.F_UNLCK:
		return "U"
	case syscall.F_RDLCK:
		return "R"
	case syscall.F_WRLCK:
		return "W"
	default:
		return "X"
	}
}

func (v *VFS) Getlk(ctx Context, ino Ino, fh uint64, owner uint64, start, len *uint64, typ *uint32, pid *uint32) (err syscall.Errno) {
	defer func() {
		logit(ctx, "getlk", err, "(%d,%d,%016X): (%d,%d,%s,%d)", ino, fh, owner, *start, *len, lockType(*typ), *pid)
	}()
	if lockType(*typ).String() == "X" {
		return syscall.EINVAL
	}
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	if v.findHandle(ino, fh) == nil {
		err = syscall.EBADF
		return
	}
	err = v.Meta.Getlk(ctx, ino, owner, typ, start, len, pid)
	return
}

func (v *VFS) Setlk(ctx Context, ino Ino, fh uint64, owner uint64, start, end uint64, typ uint32, pid uint32, block bool) (err syscall.Errno) {
	defer func() {
		logit(ctx, "setlk", err, "(%d,%d,%016X,%d,%d,%s,%t,%d)", ino, fh, owner, start, end, lockType(typ), block, pid)
	}()
	if lockType(typ).String() == "X" {
		return syscall.EINVAL
	}
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	h.addOp(ctx)
	defer h.removeOp(ctx)

	err = v.Meta.Setlk(ctx, ino, owner, block, typ, start, end, pid)
	if err == 0 {
		h.Lock()
		if typ != syscall.F_UNLCK {
			h.locks |= 2
			if h.ofdOwner == 0 {
				h.ofdOwner = owner
			}
		}
		h.Unlock()
	}
	return
}

func (v *VFS) Flock(ctx Context, ino Ino, fh uint64, owner uint64, typ uint32, block bool) (err syscall.Errno) {
	var name string
	defer func() { logit(ctx, "flock", err, "(%d,%d,%016X,%s,%t)", ino, fh, owner, name, block) }()
	switch typ {
	case syscall.F_RDLCK:
		name = "LOCKSH"
	case syscall.F_WRLCK:
		name = "LOCKEX"
	case syscall.F_UNLCK:
		name = "UNLOCK"
	default:
		err = syscall.EINVAL
		return
	}

	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}
	h := v.findHandle(ino, fh)
	if h == nil {
		err = syscall.EBADF
		return
	}
	h.addOp(ctx)
	defer h.removeOp(ctx)
	err = v.Meta.Flock(ctx, ino, owner^fh, typ, block)
	if err == 0 {
		h.Lock()
		if typ == syscall.F_UNLCK {
			h.locks &= 2
		} else {
			h.locks |= 1
			h.flockOwner = owner
		}
		h.Unlock()
	}
	return
}

func (v *VFS) Ioctl(ctx Context, ino Ino, cmd uint32, arg uint64, bufIn, bufOut []byte) (err syscall.Errno) {
	const (
		FS_IOC_GETFLAGS    = 0x80086601
		FS_IOC_SETFLAGS    = 0x40086602
		FS_IOC_GETFLAGS_32 = 0x80046601
		FS_IOC_SETFLAGS_32 = 0x40046602
		FS_IOC_FSGETXATTR  = 0x801C581F
	)
	const (
		FS_SECRM_FL        = 0x00000001
		FS_IMMUTABLE_FL    = 0x00000010
		FS_APPEND_FL       = 0x00000020
		FS_XFLAG_IMMUTABLE = 0x00000008
		FS_XFLAG_APPEND    = 0x00000010
	)
	defer func() { logit(ctx, "ioctl", err, "(%d,0x%X,0x%X,%v,%v)", ino, cmd, arg, bufIn, bufOut) }()
	switch cmd {
	default:
		return syscall.ENOTTY
	case FS_IOC_SETFLAGS, FS_IOC_GETFLAGS, FS_IOC_SETFLAGS_32, FS_IOC_GETFLAGS_32, FS_IOC_FSGETXATTR:
	}
	if IsSpecialNode(ino) {
		return syscall.EPERM
	}
	var attr = &Attr{}
	if cmd>>30 == 1 { // set
		var iflag uint64
		if len(bufIn) == 8 {
			iflag = utils.NativeEndian.Uint64(bufIn)
		} else if len(bufIn) == 4 {
			iflag = uint64(utils.NativeEndian.Uint32(bufIn))
		} else {
			return syscall.EINVAL
		}
		if ctx.CheckPermission() && ctx.Uid() != 0 && iflag&(FS_SECRM_FL|FS_IMMUTABLE_FL|FS_APPEND_FL) != 0 {
			return syscall.EPERM
		}
		if (iflag & FS_SECRM_FL) != 0 {
			attr.Flags |= meta.FlagSkipTrash
		}
		if (iflag & FS_IMMUTABLE_FL) != 0 {
			attr.Flags |= meta.FlagImmutable
		}
		if (iflag & FS_APPEND_FL) != 0 {
			attr.Flags |= meta.FlagAppend
		}
		if iflag &= ^uint64(FS_SECRM_FL | FS_IMMUTABLE_FL | FS_APPEND_FL); iflag != 0 {
			return syscall.ENOTSUP
		}
		return v.Meta.SetAttr(ctx, ino, meta.SetAttrFlag, 0, attr)
	} else {
		if err = v.Meta.GetAttr(ctx, ino, attr); err != 0 {
			return
		}
		var iflag uint64
		if cmd>>8&0xFF == 'f' { // FS_IOC_GETFLAGS
			if (attr.Flags & meta.FlagSkipTrash) != 0 {
				iflag |= FS_SECRM_FL
			}
			if (attr.Flags & meta.FlagImmutable) != 0 {
				iflag |= FS_IMMUTABLE_FL
			}
			if (attr.Flags & meta.FlagAppend) != 0 {
				iflag |= FS_APPEND_FL
			}
			if len(bufOut) == 8 {
				utils.NativeEndian.PutUint64(bufOut, iflag)
			} else if len(bufOut) == 4 {
				utils.NativeEndian.PutUint32(bufOut, uint32(iflag))
			} else {
				return syscall.EINVAL
			}
		} else { // 'X', FS_IOC_FSGETXATTR
			if (attr.Flags & meta.FlagImmutable) != 0 {
				iflag |= FS_XFLAG_IMMUTABLE
			}
			if (attr.Flags & meta.FlagAppend) != 0 {
				iflag |= FS_XFLAG_APPEND
			}
			if len(bufOut) == 28 {
				utils.NativeEndian.PutUint32(bufOut[:4], uint32(iflag))
				for i := range bufOut[4:] {
					bufOut[4+i] = 0
				}
			} else {
				return syscall.EINVAL
			}
		}
		return
	}
}


================================================
FILE: pkg/vfs/vfs_windows.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"syscall"

	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/winfsp/cgofuse/fuse"
)

const O_ACCMODE = uint32(fuse.O_ACCMODE)
const F_UNLCK = 0x01

func (v *VFS) ChFlags(ctx Context, ino Ino, flags uint8) (err syscall.Errno) {
	defer func() {
		logit(ctx, "chflags", err, "(%d):%d", ino, flags)
	}()
	if IsSpecialNode(ino) {
		err = syscall.EPERM
		return
	}

	err = syscall.EINVAL
	var attr = &Attr{Flags: flags}

	if ctx.CheckPermission() {
		if err = v.Meta.CheckSetAttr(ctx, ino, meta.SetAttrFlag, *attr); err != 0 {
			return
		}
	}

	err = v.Meta.SetAttr(ctx, ino, meta.SetAttrFlag, 0, attr)
	return
}


================================================
FILE: pkg/vfs/writer.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package vfs

import (
	"math/rand"
	"runtime"
	"sync"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
)

const (
	flushDuration = time.Second * 5
)

type FileWriter interface {
	Write(ctx meta.Context, offset uint64, data []byte) syscall.Errno
	Flush(ctx meta.Context) syscall.Errno
	Close(ctx meta.Context) syscall.Errno
	GetLength() uint64
	Truncate(length uint64)
}

type DataWriter interface {
	Open(inode Ino, fleng uint64) FileWriter
	Flush(ctx meta.Context, inode Ino) syscall.Errno
	GetLength(inode Ino) uint64
	Truncate(inode Ino, length uint64)
	UpdateMtime(inode Ino, mtime time.Time)
	FlushAll() error
}

type sliceWriter struct {
	id      uint64
	chunk   *chunkWriter
	off     uint32
	length  uint32
	soff    uint32
	slen    uint32
	writer  chunk.Writer
	freezed bool
	done    bool
	err     syscall.Errno
	notify  *utils.Cond
	started time.Time
	lastMod time.Time
}

func (s *sliceWriter) prepareID(ctx meta.Context, retry bool) {
	f := s.chunk.file
	f.Lock()
	for s.id == 0 {
		var id uint64
		f.Unlock()
		st := f.w.m.NewSlice(ctx, &id)
		f.Lock()
		if st != 0 && st != syscall.EIO {
			s.err = st
			break
		}
		if !retry || st == 0 {
			if s.id == 0 {
				s.id = id
			}
			break
		}
		f.Unlock()
		logger.Debugf("meta is not available: %s", st)
		time.Sleep(time.Millisecond * 100)
		f.Lock()
	}
	if s.writer != nil && s.writer.ID() == 0 {
		s.writer.SetID(s.id)
	}
	f.Unlock()
}

func (s *sliceWriter) markDone() {
	f := s.chunk.file
	f.Lock()
	s.done = true
	s.notify.Signal()
	f.Unlock()
}

// freezed, no more data
func (s *sliceWriter) flushData() {
	defer s.markDone()
	if s.slen == 0 {
		return
	}
	s.prepareID(meta.Background(), true)
	if s.err != 0 {
		logger.Infof("flush inode: %v chunk: %d err: %s", s.chunk.file.inode, s.id, s.err)
		s.writer.Abort()
		return
	}
	s.length = s.slen
	if err := s.writer.Finish(int(s.length)); err != nil {
		logger.Errorf("upload inode: %v chunk: %v (length: %v) fail: %s", s.chunk.file.inode, s.id, s.length, err)

		s.writer.Abort()
		s.err = syscall.EIO
	}
}

// protected by s.chunk.file
func (s *sliceWriter) write(ctx meta.Context, off uint32, data []uint8) syscall.Errno {
	f := s.chunk.file
	_, err := s.writer.WriteAt(data, int64(off))
	if err != nil {
		logger.Warnf("write inode: %v chunk: %d off: %d %s", s.chunk.file.inode, s.id, off, err)
		return syscall.EIO
	}
	if off+uint32(len(data)) > s.slen {
		s.slen = off + uint32(len(data))
	}
	s.lastMod = time.Now()
	if s.slen == meta.ChunkSize {
		s.freezed = true
		go s.flushData()
	} else if int(s.slen) >= f.w.blockSize {
		if s.id > 0 {
			err := s.writer.FlushTo(int(s.slen))
			if err != nil {
				logger.Warnf("write inode: %v chunk: %d off: %d %s", s.chunk.file.inode, s.id, off, err)
				return syscall.EIO
			}
		}
	}
	return 0
}

type chunkWriter struct {
	indx   uint32
	file   *fileWriter
	slices []*sliceWriter
}

// protected by file
func (c *chunkWriter) findWritableSlice(pos uint32, size uint32) *sliceWriter {
	blockSize := uint32(c.file.w.blockSize)
	for i := range c.slices {
		s := c.slices[len(c.slices)-1-i]
		if !s.freezed {
			flushoff := s.slen / blockSize * blockSize
			if pos >= s.off+flushoff && pos <= s.off+s.slen {
				return s
			} else if i > 3 {
				s.freezed = true
				go s.flushData()
			}
		}
		if pos < s.off+s.slen && s.off < pos+size {
			// overlaped
			// TODO: write into multiple slices
			return nil
		}
	}
	return nil
}

func (c *chunkWriter) commitThread() {
	f := c.file
	defer f.w.free(f)
	f.Lock()

	// the slices should be committed in the order that are created
	for len(c.slices) > 0 {
		s := c.slices[0]
		for !s.done {
			if s.notify.WaitWithTimeout(time.Millisecond*100) && !s.freezed && time.Since(s.started) > flushDuration*2 {
				s.freezed = true
				go s.flushData()
			}
		}
		err := s.err
		f.Unlock()

		if err == 0 {
			var ss = meta.Slice{Id: s.id, Size: s.length, Off: s.soff, Len: s.slen}
			err = f.w.m.Write(meta.Background(), f.inode, c.indx, s.off, ss, s.lastMod)
			f.w.reader.Invalidate(f.inode, uint64(c.indx)*meta.ChunkSize+uint64(s.off), uint64(ss.Len))
		}

		f.Lock()
		if err != 0 {
			if err == syscall.ENOENT || err == syscall.ENOSPC || err == syscall.EDQUOT {
				go func(id uint64, length int) {
					_ = f.w.store.Remove(id, length)
				}(s.id, int(s.length))
			} else {
				logger.Warnf("write inode:%d error: %s", f.inode, err)
				err = syscall.EIO
			}
			f.err = err
			logger.Errorf("write inode:%d indx:%d %s", f.inode, c.indx, err)
		}
		c.slices = c.slices[1:]
	}
	f.freeChunk(c)
	f.Unlock()
}

type fileWriter struct {
	sync.Mutex
	w *dataWriter

	inode        Ino
	length       uint64
	err          syscall.Errno
	flushwaiting uint16
	writewaiting uint16
	refs         uint16
	chunks       map[uint32]*chunkWriter

	flushcond *utils.Cond // wait for chunks==nil (flush)
	writecond *utils.Cond // wait for flushwaiting==0 (write)
}

// protected by file
func (f *fileWriter) findChunk(i uint32) *chunkWriter {
	c := f.chunks[i]
	if c == nil {
		c = &chunkWriter{indx: i, file: f}
		f.chunks[i] = c
	}
	return c
}

// protected by file
func (f *fileWriter) freeChunk(c *chunkWriter) {
	delete(f.chunks, c.indx)
	if len(f.chunks) == 0 && f.flushwaiting > 0 {
		f.flushcond.Broadcast()
	}
}

// protected by file
func (f *fileWriter) writeChunk(ctx meta.Context, indx uint32, off uint32, data []byte) syscall.Errno {
	c := f.findChunk(indx)
	s := c.findWritableSlice(off, uint32(len(data)))
	if s == nil {
		s = &sliceWriter{
			chunk:   c,
			off:     off,
			writer:  f.w.store.NewWriter(0),
			notify:  utils.NewCond(&f.Mutex),
			started: time.Now(),
		}
		go s.prepareID(meta.Background(), false)
		c.slices = append(c.slices, s)
		if len(c.slices) == 1 {
			f.w.Lock()
			f.refs++
			f.w.Unlock()
			go c.commitThread()
		}
	}
	return s.write(ctx, off-s.off, data)
}

func (f *fileWriter) totalSlices() int {
	var cnt int
	f.Lock()
	for _, c := range f.chunks {
		cnt += len(c.slices)
	}
	f.Unlock()
	return cnt
}

func (w *dataWriter) usedBufferSize() int64 {
	return utils.AllocMemory() - w.store.UsedMemory()
}

func (f *fileWriter) Write(ctx meta.Context, off uint64, data []byte) syscall.Errno {
	for {
		if f.totalSlices() < 1000 {
			break
		}
		time.Sleep(time.Millisecond)
	}
	if f.w.usedBufferSize() > f.w.bufferSize {
		// slow down
		time.Sleep(time.Millisecond * 10)
		for f.w.usedBufferSize() > f.w.bufferSize*2 {
			time.Sleep(time.Millisecond * 100)
		}
	}

	s := time.Now()
	f.Lock()
	defer f.Unlock()
	size := uint64(len(data))
	f.writewaiting++
	for f.flushwaiting > 0 {
		if f.writecond.WaitWithTimeout(time.Second) && ctx.Canceled() {
			f.writewaiting--
			logger.Warnf("write %d interrupted after %d", f.inode, time.Since(s))
			return syscall.EINTR
		}
	}
	f.writewaiting--

	indx := uint32(off / meta.ChunkSize)
	pos := uint32(off % meta.ChunkSize)
	for len(data) > 0 {
		n := uint32(len(data))
		if pos+n > meta.ChunkSize {
			n = meta.ChunkSize - pos
		}
		if st := f.writeChunk(ctx, indx, pos, data[:n]); st != 0 {
			return st
		}
		data = data[n:]
		indx++
		pos = (pos + n) % meta.ChunkSize
	}
	if off+size > f.length {
		f.length = off + size
	}
	return f.err
}

func (f *fileWriter) updateMtime(t time.Time) {
	f.Lock()
	defer f.Unlock()
	for _, c := range f.chunks {
		for _, s := range c.slices {
			s.lastMod = t
		}
	}
}

func (f *fileWriter) flush(ctx meta.Context, writeback bool) syscall.Errno {
	s := time.Now()
	f.Lock()
	defer f.Unlock()
	f.flushwaiting++

	var err syscall.Errno
	var wait = time.Second * time.Duration((f.w.maxRetries+2)*(f.w.maxRetries+2)/2)
	if wait < time.Minute*5 {
		wait = time.Minute * 5
	}
	var deadline = time.Now().Add(wait)
	for len(f.chunks) > 0 && err == 0 {
		for _, c := range f.chunks {
			for _, s := range c.slices {
				if !s.freezed {
					s.freezed = true
					go s.flushData()
				}
			}
		}
		if f.flushcond.WaitWithTimeout(time.Second*3) && ctx.Canceled() && time.Since(s) > f.w.conf.Chunk.PutTimeout*2 {
			logger.Warnf("flush %d interrupted after %d", f.inode, time.Since(s))
			err = syscall.EINTR
			break
		}
		if time.Now().After(deadline) {
			logger.Errorf("flush %d timeout after waited %s", f.inode, wait)
			for _, c := range f.chunks {
				for _, s := range c.slices {
					logger.Errorf("pending slice %d-%d: %+v", f.inode, c.indx, *s)
				}
			}
			buf := make([]byte, 1<<20)
			n := runtime.Stack(buf, true)
			logger.Warnf("All goroutines (%d):\n%s", runtime.NumGoroutine(), buf[:n])
			err = syscall.EIO
			break
		}
	}
	f.flushwaiting--
	if f.flushwaiting == 0 && f.writewaiting > 0 {
		f.writecond.Broadcast()
	}
	if err == 0 {
		err = f.err
	}
	return err
}

func (f *fileWriter) Flush(ctx meta.Context) syscall.Errno {
	return f.flush(ctx, false)
}

func (f *fileWriter) Close(ctx meta.Context) syscall.Errno {
	defer f.w.free(f)
	return f.Flush(ctx)
}

func (f *fileWriter) GetLength() uint64 {
	f.Lock()
	defer f.Unlock()
	return f.length
}

func (f *fileWriter) Truncate(length uint64) {
	f.Lock()
	defer f.Unlock()
	// TODO: truncate write buffer if length < f.length
	f.length = length
}

type dataWriter struct {
	sync.Mutex
	m          meta.Meta
	store      chunk.ChunkStore
	conf       *Config
	reader     DataReader
	blockSize  int
	bufferSize int64
	files      map[Ino]*fileWriter
	maxRetries uint32
}

func NewDataWriter(conf *Config, m meta.Meta, store chunk.ChunkStore, reader DataReader) DataWriter {
	w := &dataWriter{
		m:          m,
		store:      store,
		reader:     reader,
		conf:       conf,
		blockSize:  conf.Chunk.BlockSize,
		bufferSize: int64(conf.Chunk.BufferSize),
		files:      make(map[Ino]*fileWriter),
		maxRetries: uint32(conf.Meta.Retries),
	}
	go w.flushAll()
	return w
}

func (w *dataWriter) flushAll() {
	for {
		w.Lock()
		now := time.Now()
		for _, f := range w.files {
			f.refs++
			w.Unlock()
			tooMany := f.totalSlices() > 800
			f.Lock()

			lastBit := uint32(rand.Int() % 2) // choose half of chunks randomly
			for i, c := range f.chunks {
				hs := len(c.slices) / 2
				for j, s := range c.slices {
					if !s.freezed && (now.Sub(s.started) > flushDuration || now.Sub(s.lastMod) > time.Second && now.Sub(s.started) > time.Second ||
						tooMany && i%2 == lastBit && j <= hs) {
						s.freezed = true
						go s.flushData()
					}
				}
			}
			f.Unlock()
			w.free(f)
			w.Lock()
		}
		w.Unlock()
		time.Sleep(time.Millisecond * 100)
	}
}

func (w *dataWriter) Open(inode Ino, len uint64) FileWriter {
	w.Lock()
	defer w.Unlock()
	f, ok := w.files[inode]
	if !ok {
		f = &fileWriter{
			w:      w,
			inode:  inode,
			length: len,
			chunks: make(map[uint32]*chunkWriter),
		}
		f.flushcond = utils.NewCond(f)
		f.writecond = utils.NewCond(f)
		w.files[inode] = f
	}
	f.refs++
	return f
}

func (w *dataWriter) find(inode Ino) *fileWriter {
	w.Lock()
	defer w.Unlock()
	return w.files[inode]
}

func (w *dataWriter) free(f *fileWriter) {
	w.Lock()
	defer w.Unlock()
	f.refs--
	if f.refs == 0 {
		delete(w.files, f.inode)
	}
}

func (w *dataWriter) Flush(ctx meta.Context, inode Ino) syscall.Errno {
	f := w.find(inode)
	if f != nil {
		return f.Flush(ctx)
	}
	return 0
}

func (w *dataWriter) GetLength(inode Ino) uint64 {
	f := w.find(inode)
	if f != nil {
		return f.GetLength()
	}
	return 0
}

func (w *dataWriter) Truncate(inode Ino, len uint64) {
	f := w.find(inode)
	if f != nil {
		f.Truncate(len)
	}
}

func (w *dataWriter) UpdateMtime(inode Ino, mtime time.Time) {
	f := w.find(inode)
	if f != nil {
		f.updateMtime(mtime)
	}
}

func (w *dataWriter) FlushAll() error {
	var err error
	w.Lock()
	for inode, ind := range w.files {
		ind.refs++
		w.Unlock()
		eno := ind.Flush(meta.Background())
		w.free(ind)
		if eno != 0 {
			logger.Errorf("flush %s: %s", inode, eno)
			return eno
		}
		logger.Debugf("Flush %d", inode)
		w.Lock()
	}
	w.Unlock()
	return err
}


================================================
FILE: pkg/win/ldap.go
================================================
//go:build windows
// +build windows

/*
 * JuiceFS, Copyright 2026 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package win

import (
	"fmt"
	"strings"
	"unsafe"

	"golang.org/x/sys/windows"
)

var (
	modWldap32           = windows.NewLazySystemDLL("wldap32.dll")
	procLdapInitW        = modWldap32.NewProc("ldap_initW")
	procLdapSetOptionW   = modWldap32.NewProc("ldap_set_optionW")
	procLdapBindSW       = modWldap32.NewProc("ldap_bind_sW")
	procLdapUnbind       = modWldap32.NewProc("ldap_unbind")
	procLdapSearchSW     = modWldap32.NewProc("ldap_search_sW")
	procLdapFirstEntryW  = modWldap32.NewProc("ldap_first_entryW")
	procLdapGetValuesW   = modWldap32.NewProc("ldap_get_valuesW")
	procLdapCountValuesW = modWldap32.NewProc("ldap_count_valuesW")
	procLdapValueFreeW   = modWldap32.NewProc("ldap_value_freeW")
	procLdapMsgFreeW     = modWldap32.NewProc("ldap_msgfreeW")
)

// from winldap.h
const (
	LDAP_PORT           = 389
	LDAP_SUCCESS        = 0
	LDAP_OPT_SIGN       = 0x95
	LDAP_OPT_ENCRYPT    = 0x96
	LDAP_OPT_ON         = 1
	LDAP_SCOPE_BASE     = 0x00
	LDAP_SCOPE_ONELEVEL = 0x01
	LDAP_AUTH_NEGOTIATE = 0x0486 // LDAP_AUTH_OTHERKIND (0x86) | 0x0400
)

func LdapConnect(host string) (uintptr, error) {
	hostPtr, err := windows.UTF16PtrFromString(host)
	if err != nil {
		return 0, err
	}
	handle, _, _ := procLdapInitW.Call(
		uintptr(unsafe.Pointer(hostPtr)),
		uintptr(LDAP_PORT),
	)
	if handle == 0 {
		return 0, fmt.Errorf("ldap_initW failed")
	}
	procLdapSetOptionW.Call(handle, uintptr(LDAP_OPT_SIGN), uintptr(LDAP_OPT_ON))
	procLdapSetOptionW.Call(handle, uintptr(LDAP_OPT_ENCRYPT), uintptr(LDAP_OPT_ON))

	r1, _, _ := procLdapBindSW.Call(handle, 0, 0, uintptr(LDAP_AUTH_NEGOTIATE))
	if int32(r1) != LDAP_SUCCESS {
		procLdapUnbind.Call(handle)
		return 0, fmt.Errorf("ldap_bind_sW failed: %d", r1)
	}
	return handle, nil
}

func LdapClose(handle uintptr) {
	procLdapUnbind.Call(handle)
}

func LdapGetValue(
	handle uintptr,
	base string,
	scope uint32,
	filter string,
	attribute string,
) (string, error) {
	var basePtr *uint16
	if base != "" {
		p, err := windows.UTF16PtrFromString(base)
		if err != nil {
			return "", err
		}
		basePtr = p
	}
	filterPtr, err := windows.UTF16PtrFromString(filter)
	if err != nil {
		return "", err
	}
	attrPtr, err := windows.UTF16PtrFromString(attribute)
	if err != nil {
		return "", err
	}
	attrs := []uintptr{uintptr(unsafe.Pointer(attrPtr)), 0}

	var msg uintptr
	r1, _, _ := procLdapSearchSW.Call(
		handle,
		uintptr(unsafe.Pointer(basePtr)),
		uintptr(scope),
		uintptr(unsafe.Pointer(filterPtr)),
		uintptr(unsafe.Pointer(&attrs[0])),
		0,
		uintptr(unsafe.Pointer(&msg)),
	)
	if int32(r1) != LDAP_SUCCESS {
		return "", fmt.Errorf("ldap_search_sW failed: %d", r1)
	}
	defer procLdapMsgFreeW.Call(msg)

	entry, _, _ := procLdapFirstEntryW.Call(handle, msg)
	if entry == 0 {
		return "", fmt.Errorf("no entries found")
	}
	vals, _, _ := procLdapGetValuesW.Call(handle, entry, uintptr(unsafe.Pointer(attrPtr)))
	if vals == 0 {
		return "", fmt.Errorf("no attribute values")
	}
	defer procLdapValueFreeW.Call(vals)
	cnt, _, _ := procLdapCountValuesW.Call(vals)
	if cnt == 0 {
		return "", fmt.Errorf("no attribute values")
	}
	firstPtr := *(*uintptr)(unsafe.Pointer(vals))
	value := windows.UTF16PtrToString((*uint16)(unsafe.Pointer(firstPtr)))
	return value, nil
}

func LdapGetDefaultNamingContext(handle uintptr) (string, error) {
	return LdapGetValue(handle, "", LDAP_SCOPE_BASE, "(objectClass=*)", "defaultNamingContext")
}

func LdapGetTrustPosixOffset(
	handle uintptr,
	context string,
	domain string,
) (string, error) {
	isFlat := !strings.Contains(domain, ".")
	base := fmt.Sprintf("CN=System,%s", context)
	var filter string
	if isFlat {
		filter = fmt.Sprintf("(&(objectClass=trustedDomain)(flatName=%s))", domain)
	} else {
		filter = fmt.Sprintf("(&(objectClass=trustedDomain)(name=%s))", domain)
	}
	return LdapGetValue(handle, base, LDAP_SCOPE_ONELEVEL, filter, "trustPosixOffset")
}


================================================
FILE: pkg/win/sid.go
================================================
//go:build windows
// +build windows

/*
 * JuiceFS, Copyright 2026 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package win

import (
	"fmt"
	"runtime"
	"strconv"
	"syscall"
	"unsafe"

	"golang.org/x/sys/windows"
)

var (
	modadvapi32                   = windows.NewLazySystemDLL("advapi32.dll")
	procLsaOpenPolicy             = modadvapi32.NewProc("LsaOpenPolicy")
	procLsaQueryInformationPolicy = modadvapi32.NewProc("LsaQueryInformationPolicy")
	procLsaFreeMemory             = modadvapi32.NewProc("LsaFreeMemory")
	procLsaClose                  = modadvapi32.NewProc("LsaClose")

	netapi32 = windows.NewLazySystemDLL("netapi32.dll")

	//https://learn.microsoft.com/en-us/windows/win32/api/dsgetdc/nf-dsgetdc-dsenumeratedomaintrustsw
	procDsEnumerateDomainTrustsW = netapi32.NewProc("DsEnumerateDomainTrustsW")
	procNetApiBufferFree         = netapi32.NewProc("NetApiBufferFree")
)

var trustedDomains []trustedDomain

type LSA_OBJECT_ATTRIBUTES struct {
	Length                   uint32
	RootDirectory            windows.Handle
	ObjectName               uintptr
	Attributes               uint32
	SecurityDescriptor       uintptr
	SecurityQualityOfService uintptr
}

var primaryDomainSid *windows.SID = nil
var accountDomainSid *windows.SID = nil

const (
	PolicyAccountDomainInformation = 5
	PolicyDnsDomainInformation     = 12
)

const (
	POLICY_VIEW_LOCAL_INFORMATION   = 0x00000001
	POLICY_VIEW_AUDIT_INFORMATION   = 0x00000002
	POLICY_GET_PRIVATE_INFORMATION  = 0x00000004
	POLICY_TRUST_ADMIN              = 0x00000008
	POLICY_CREATE_ACCOUNT           = 0x00000010
	POLICY_CREATE_SECRET            = 0x00000020
	POLICY_CREATE_PRIVILEGE         = 0x00000040
	POLICY_SET_DEFAULT_QUOTA_LIMITS = 0x00000080
	POLICY_SET_AUDIT_REQUIREMENTS   = 0x00000100
	POLICY_AUDIT_LOG_ADMIN          = 0x00000200
	POLICY_SERVER_ADMIN             = 0x00000400
	POLICY_LOOKUP_NAMES             = 0x00000800
	POLICY_NOTIFICATION             = 0x00001000
)

const (
	AdministratorUIDFromFUSE = 197108 // This is calcuated from the SID of Administrator user on Windows. //0x30000 + 500
	AdminstratorsGIDFromFUSE = 544    //  S-1-5-32-544
	SystemUIDFromFUSE        = 18     //  S-1-5-32-18
)

type UNICODE_STRING struct {
	Length        uint16
	MaximumLength uint16
	Buffer        *uint16
}

// https://learn.microsoft.com/en-us/windows/win32/api/lsalookup/ns-lsalookup-policy_account_domain_info
type POLICY_ACCOUNT_DOMAIN_INFO struct {
	DomainName UNICODE_STRING
	DomainSid  *windows.SID
}

type GUID struct {
	Data1 uint32
	Data2 uint16
	Data3 uint16
	Data4 [8]byte
}

// https://learn.microsoft.com/en-us/windows/win32/api/lsalookup/ns-lsalookup-policy_dns_domain_info
type POLICY_DNS_DOMAIN_INFO struct {
	Name          UNICODE_STRING
	DnsDomainName UNICODE_STRING
	DnsForestName UNICODE_STRING
	DomainGuid    GUID
	Sid           *windows.SID
}

const (
	DS_DOMAIN_DIRECT_INBOUND  = 0x0001
	DS_DOMAIN_DIRECT_OUTBOUND = 0x0002
	DS_DOMAIN_IN_FOREST       = 0x0008
)

type DS_DOMAIN_TRUSTSW struct {
	NetbiosDomainName *uint16      // LPWSTR
	DnsDomainName     *uint16      // LPWSTR
	Flags             uint32       // ULONG
	ParentIndex       uint32       // ULONG
	TrustType         uint32       // ULONG
	TrustAttributes   uint32       // ULONG
	DomainSid         *windows.SID // PSID
	DomainGuid        windows.GUID // GUID
}

type trustedDomain struct {
	DomainSid         *windows.SID
	NetbiosDomainName *uint16
	DnsDomainName     *uint16
	TrustPosixOffset  uint32
}

func IsRelativeSid(sid1 *windows.SID, sid2 *windows.SID) bool {
	if sid1 == nil || sid2 == nil {
		return sid1 == sid2
	}

	// Check if the SIDs have the same revision, we have to do it by ourself
	// since windows.SID does not expose the revision field directly.
	rev1 := *(*uint8)(unsafe.Pointer(sid1))
	rev2 := *(*uint8)(unsafe.Pointer(sid2))
	if rev1 != rev2 {
		return false
	}

	auth1 := sid1.IdentifierAuthority()
	auth2 := sid2.IdentifierAuthority()
	for i := 0; i < len(auth1.Value); i++ {
		if auth1.Value[i] != auth2.Value[i] {
			return false
		}
	}

	cnt1 := sid1.SubAuthorityCount()
	cnt2 := sid2.SubAuthorityCount()
	if cnt1+1 != cnt2 {
		return false
	}

	for i := uint8(0); i < cnt1; i++ {
		if sid1.SubAuthority(uint32(i)) != sid2.SubAuthority(uint32(i)) {
			return false
		}
	}

	return true
}

// initializeTrustPosixOffsets queries LDAP and sets TrustPosixOffset for each trusted domain.
func initializeTrustPosixOffsets() error {
	handle, err := LdapConnect("") // empty string means default server
	if err != nil {
		return fmt.Errorf("LdapConnect failed: %w", err)
	}
	defer LdapClose(handle)

	defaultNC, err := LdapGetDefaultNamingContext(handle)
	if err != nil {
		return fmt.Errorf("LdapGetDefaultNamingContext failed: %w", err)
	}

	// For each trusted domain, get trustPosixOffset
	for i := range trustedDomains {
		domain := windows.UTF16PtrToString(trustedDomains[i].DnsDomainName)
		offsetStr, err := LdapGetTrustPosixOffset(handle, defaultNC, domain)
		if err == nil {
			if val, err := strconv.ParseUint(offsetStr, 10, 32); err == nil {
				trustedDomains[i].TrustPosixOffset = uint32(val)
			}
		}
	}

	// If trustPosixOffset looks wrong, fix it up using Cygwin magic value 0xfe500000
	for i := range trustedDomains {
		if trustedDomains[i].TrustPosixOffset < 0x100000 {
			trustedDomains[i].TrustPosixOffset = 0xfe500000
		}
	}

	return nil
}

func init() {
	if runtime.GOOS != "windows" {
		return
	}

	var objAttr LSA_OBJECT_ATTRIBUTES
	objAttr.Length = uint32(unsafe.Sizeof(objAttr))

	var policyHandle windows.Handle
	r1, _, _ := procLsaOpenPolicy.Call(
		0,
		uintptr(unsafe.Pointer(&objAttr)),
		uintptr(POLICY_VIEW_LOCAL_INFORMATION),
		uintptr(unsafe.Pointer(&policyHandle)),
	)
	if windows.NTStatus(r1) != windows.STATUS_SUCCESS {
		return
	}
	defer procLsaClose.Call(uintptr(policyHandle))

	// Get the account domain SID
	var acctInfoPtr uintptr
	r1, _, _ = procLsaQueryInformationPolicy.Call(
		uintptr(policyHandle),
		uintptr(PolicyAccountDomainInformation),
		uintptr(unsafe.Pointer(&acctInfoPtr)),
	)
	if windows.NTStatus(r1) == windows.STATUS_SUCCESS && acctInfoPtr != 0 {
		defer procLsaFreeMemory.Call(acctInfoPtr)
		info := (*POLICY_ACCOUNT_DOMAIN_INFO)(unsafe.Pointer(acctInfoPtr))
		if info.DomainSid != nil {
			if sidCopy, err := info.DomainSid.Copy(); err == nil {
				accountDomainSid = sidCopy
			}
		}
	}

	// Get the primary domain SID
	var primInfoPtr uintptr
	r1, _, _ = procLsaQueryInformationPolicy.Call(
		uintptr(policyHandle),
		uintptr(PolicyDnsDomainInformation),
		uintptr(unsafe.Pointer(&primInfoPtr)),
	)
	if windows.NTStatus(r1) == windows.STATUS_SUCCESS && primInfoPtr != 0 {
		defer procLsaFreeMemory.Call(primInfoPtr)
		info2 := (*POLICY_DNS_DOMAIN_INFO)(unsafe.Pointer(primInfoPtr))
		if info2.Sid != nil {
			if sidCopy, err := info2.Sid.Copy(); err == nil {
				primaryDomainSid = sidCopy
			}
		}
	}

	// QUERY trusted domains
	var domainsPtr uintptr
	var domainCount uint32
	r1, _, _ = procDsEnumerateDomainTrustsW.Call(
		0,
		uintptr(DS_DOMAIN_DIRECT_INBOUND|DS_DOMAIN_DIRECT_OUTBOUND|DS_DOMAIN_IN_FOREST),
		uintptr(unsafe.Pointer(&domainsPtr)),
		uintptr(unsafe.Pointer(&domainCount)),
	)
	if r1 != 0 || domainsPtr == 0 {
		return
	}
	defer procNetApiBufferFree.Call(domainsPtr)

	entrySize := unsafe.Sizeof(DS_DOMAIN_TRUSTSW{})
	base := domainsPtr
	realCount := 0
	for i := 0; i < int(domainCount); i++ {
		dom := (*DS_DOMAIN_TRUSTSW)(unsafe.Pointer(base + uintptr(i)*entrySize))
		if dom.DomainSid == nil ||
			(dom.NetbiosDomainName == nil && dom.DnsDomainName == nil) ||
			windows.EqualSid(dom.DomainSid, primaryDomainSid) {
			continue
		}
		realCount++
	}

	trustedDomains = make([]trustedDomain, 0, realCount)
	for i := 0; i < int(domainCount); i++ {
		dom := (*DS_DOMAIN_TRUSTSW)(unsafe.Pointer(base + uintptr(i)*entrySize))
		if dom.DomainSid == nil ||
			(dom.NetbiosDomainName == nil && dom.DnsDomainName == nil) ||
			windows.EqualSid(dom.DomainSid, primaryDomainSid) {
			continue
		}

		sidCopy, err := dom.DomainSid.Copy()
		if err != nil {
			continue
		}

		trustedDomains = append(trustedDomains, trustedDomain{
			DomainSid:         sidCopy,
			NetbiosDomainName: dom.NetbiosDomainName,
			DnsDomainName:     dom.DnsDomainName,
			TrustPosixOffset:  0,
		})
	}

	if len(trustedDomains) != 0 {
		initializeTrustPosixOffsets()
	}
}

func ConvertSidStrToUid(sidStr string) (int, error) {
	sid, err := windows.StringToSid(sidStr)
	if err != nil {
		return -1, err
	}
	ret := convertSidToUid(sid)
	if ret < 0 {
		return -1, fmt.Errorf("invalid uid %d for sid %s", ret, sidStr)
	}
	return ret, nil
}

func convertSidToUid(sid *windows.SID) int {
	if sid == nil || !sid.IsValid() {
		return -1
	}

	subAuthCount := sid.SubAuthorityCount()
	if subAuthCount == 0 {
		return -1
	}

	// SID FORMAT: https://learn.microsoft.com/en-us/windows-server/identity/ad-ds/manage/understand-security-identifiers
	// S-VERSION-IDENTIFIER_AUTHORITY-SUBAUTHORITY1-SUBAUTHORITY2-...-SUBAUTHORITYn(RID)
	// SUBAUTHORITY1-SUBAUTHORITY2 also known as Domain Identifier

	rid := sid.SubAuthority(uint32(subAuthCount - 1))
	subAuth0 := sid.SubAuthority(0)
	auth := sid.IdentifierAuthority()

	ret := -1

	if auth == windows.SECURITY_NT_AUTHORITY {
		// windows.SECURITY_NT_AUTHORITY: 5
		if subAuthCount == 1 {
			// well-known SIDs
			ret = int(rid)
		} else if subAuthCount == 2 && subAuth0 == 32 {
			// well-known SIDs
			ret = int(rid) // BUILTIN domain
		} else if subAuthCount >= 2 && subAuth0 == 5 {
			// ignore
		} else if subAuthCount >= 5 && subAuth0 == 21 {
			if primaryDomainSid != nil && IsRelativeSid(primaryDomainSid, sid) {
				// Accounts from the machine's primary domain:
				ret = 0x100000 + int(rid)
			} else if accountDomainSid != nil && IsRelativeSid(accountDomainSid, sid) {
				// Accounts from the local machine's user DB (SAM):
				ret = 0x30000 + int(rid)
			} else {
				// Accounts from a trusted domain of the machine's primary domain:
				for _, dom := range trustedDomains {
					if IsRelativeSid(dom.DomainSid, sid) {
						ret = int(dom.TrustPosixOffset) + int(rid)
						break
					}
				}
			}
		} else if subAuthCount == 2 {
			// Other well-known SIDs in the NT_AUTHORITY domain (S-1-5-X-RID):
			ret = 0x1000 + int(subAuth0) + int(rid)
		}
	} else if auth == windows.SECURITY_MANDATORY_LABEL_AUTHORITY {
		// windows.SECURITY_MANDATORY_LABEL_AUTHORITY: 16
		ret = 0x60000 + int(rid)
	} else if auth.Value[5] != 0 || rid != 65534 {
		// Other well-known SIDs:
		ret = 0x10000 + 0x100*int(auth.Value[5]) + int(rid)
	}

	if ret == -1 {
		ret = 65534 // fallback to unmapped SID
	}

	return ret
}

func GetCurrentUserSID() (*windows.SID, error) {
	var token windows.Token
	err := windows.OpenProcessToken(windows.CurrentProcess(), windows.TOKEN_QUERY, &token)
	if err != nil {
		return nil, err
	}
	defer token.Close()

	var requiredLen uint32
	err = windows.GetTokenInformation(token, windows.TokenUser, nil, 0, &requiredLen)
	if err != windows.ERROR_INSUFFICIENT_BUFFER {
		return nil, err
	}

	buf := make([]byte, requiredLen)
	err = windows.GetTokenInformation(token, windows.TokenUser, &buf[0], requiredLen, &requiredLen)
	if err != nil {
		return nil, err
	}
	userInfo := (*windows.Tokenuser)(unsafe.Pointer(&buf[0]))
	return userInfo.User.Sid, nil
}

func GetCurrentUserPrimaryGroupSID() (*windows.SID, error) {
	var token windows.Token
	err := windows.OpenProcessToken(windows.CurrentProcess(), windows.TOKEN_QUERY, &token)
	if err != nil {
		return nil, err
	}
	defer token.Close()

	var requiredLen uint32
	err = windows.GetTokenInformation(token, windows.TokenPrimaryGroup, nil, 0, &requiredLen)
	if err != windows.ERROR_INSUFFICIENT_BUFFER {
		return nil, err
	}

	buf := make([]byte, requiredLen)
	err = windows.GetTokenInformation(token, windows.TokenPrimaryGroup, &buf[0], requiredLen, &requiredLen)
	if err != nil {
		return nil, err
	}
	groupInfo := (*windows.Tokenprimarygroup)(unsafe.Pointer(&buf[0]))
	return groupInfo.PrimaryGroup, nil
}

func GetCurrentUID() int {
	// convert sid to uid, this function have the same procedure with FspPosixMapSidToUid to keep consistencywin
	// https://cygwin.com/cygwin-ug-net/ntsec.html

	sid, err := GetCurrentUserSID()
	if err != nil {
		return -1
	}

	return convertSidToUid(sid)
}

func GetCurrentGID() int {
	sid, err := GetCurrentUserPrimaryGroupSID()
	if err != nil {
		return -1
	}

	return convertSidToUid(sid)
}

func GetCurrentGroupName() string {
	sid, err := GetCurrentUserPrimaryGroupSID()
	if err != nil {
		return ""
	}
	return GetSidName(sid, false)
}

func GetSidName(sid *windows.SID, withDomain bool) string {
	var nameLen, domLen, sidType uint32

	err := windows.LookupAccountSid(
		nil, sid,
		nil, &nameLen,
		nil, &domLen,
		&sidType,
	)
	if err != windows.ERROR_INSUFFICIENT_BUFFER {
		return sid.String()
	}

	name := make([]uint16, nameLen)
	dom := make([]uint16, domLen)

	err = windows.LookupAccountSid(
		nil, sid,
		&name[0], &nameLen,
		&dom[0], &domLen,
		&sidType,
	)
	if err != nil {
		return sid.String()
	}

	account := syscall.UTF16ToString(name)
	if withDomain {
		domain := syscall.UTF16ToString(dom)
		return domain + `\` + account
	}

	return account
}

func IsProcessElevated() (bool, error) {
	var token windows.Token
	err := windows.OpenProcessToken(windows.CurrentProcess(), windows.TOKEN_QUERY, &token)
	if err != nil {
		return false, err
	}
	defer token.Close()

	// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-token_elevation
	type tokenElevation struct {
		TokenIsElevated uint32
	}

	var elevation tokenElevation
	var outLen uint32
	err = windows.GetTokenInformation(token, windows.TokenElevation, (*byte)(unsafe.Pointer(&elevation)), uint32(unsafe.Sizeof(elevation)), &outLen)
	if err != nil {
		return false, err
	}

	return elevation.TokenIsElevated != 0, nil
}


================================================
FILE: pkg/winfsp/log.go
================================================
//go:build windows
// +build windows

/*
 * JuiceFS, Copyright 2026 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package winfsp

import (
	"fmt"
	"os"
	"strconv"
	"syscall"
	"time"

	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/utils"
)

const RotateAccessLog = 300 << 20 // 300 MiB

func (j *juice) log(ctx fs.LogContext, format string, args ...interface{}) {
	var failed bool
	for _, a := range args {
		if eno, ok := a.(syscall.Errno); ok && eno == syscall.EIO {
			failed = true
		}
	}
	j.logM.Lock()
	buffer := j.logBuffer
	j.logM.Unlock()
	if buffer == nil && !failed {
		return
	}
	now := utils.Now()
	cmd := fmt.Sprintf(format, args...)
	ts := now.Format("2006.01.02 15:04:05.000000")
	used := ctx.Duration()
	cmd += fmt.Sprintf(" <%.6f>", used.Seconds())
	line := fmt.Sprintf("%s [uid:%d,gid:%d,pid:%d] %s\n", ts, ctx.Uid(), ctx.Gid(), ctx.Pid(), cmd)
	if failed {
		logger.Errorf("failed operation: %s", line)
	}
	if buffer == nil {
		return
	}
	select {
	case buffer <- line:
	default:
		logger.Debugf("log dropped: %s", line[:len(line)-1])
	}
}

func (fs *juice) flushLog(f *os.File, path string, rotateCount int) {
	buf := make([]byte, 0, 128<<10)
	var lastcheck = time.Now()
	numFiles := rotateCount

	for {
		line := <-fs.logBuffer
		buf = append(buf[:0], []byte(line)...)
	LOOP:
		for len(buf) < (128 << 10) {
			select {
			case line = <-fs.logBuffer:
				buf = append(buf, []byte(line)...)
			default:
				break LOOP
			}
		}
		_, err := f.Write(buf)
		if err != nil {
			logger.Errorf("write access log: %s", err)
			break
		}
		if lastcheck.Add(time.Minute).After(time.Now()) {
			continue
		}
		lastcheck = time.Now()
		fi, err := f.Stat()
		if err != nil {
			logger.Errorf("stat access log: %s", err)
			continue
		}
		if fi.Size() > RotateAccessLog {
			_ = f.Close()
			fi, err = os.Stat(path)
			if err == nil && fi.Size() > RotateAccessLog {
				tmp := fmt.Sprintf("%s.%p", path, fs)
				if os.Rename(path, tmp) == nil {
					for i := numFiles - 1; i > 0; i-- {
						_ = os.Rename(path+"."+strconv.Itoa(i), path+"."+strconv.Itoa(i+1))
					}
					_ = os.Rename(tmp, path+".1")
				} else {
					fi, err = os.Stat(path)
					if err == nil && fi.Size() > RotateAccessLog*int64(numFiles) {
						logger.Infof("can't rename %s, truncate it", path)
						_ = os.Truncate(path, 0)
					}
				}
			}
			f, err = os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
			if err != nil {
				logger.Errorf("open %s: %s", path, err)
				break
			}
			_ = os.Chmod(path, 0666)
		}
	}
}


================================================
FILE: pkg/winfsp/winfs.go
================================================
//go:build windows
// +build windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package winfsp

import (
	"fmt"
	"os"
	"os/exec"
	"path"
	"path/filepath"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"
	"unicode"

	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/juicedata/juicefs/pkg/win"
	"github.com/winfsp/cgofuse/fuse"
	"golang.org/x/sys/windows/registry"

	"github.com/urfave/cli/v2"
)

var logger = utils.GetLogger("juicefs")

const invalidFileHandle = uint64(0xffffffffffffffff)

type Ino = meta.Ino

type handleInfo struct {
	ino           meta.Ino
	cacheAttr     *meta.Attr
	attrExpiredAt time.Time
}

type juice struct {
	fuse.FileSystemBase
	sync.RWMutex
	conf         *vfs.Config
	vfs          *vfs.VFS
	fs           *fs.FileSystem
	host         *fuse.FileSystemHost
	handlers     map[uint64]handleInfo
	badfd        map[uint64]uint64
	inoHandleMap map[meta.Ino][]uint64

	asRoot           bool
	delayClose       int
	enabledGetPath   bool
	disableSymlink   bool
	readdirBatchSize int
	adminAsRoot      bool

	logM      sync.Mutex
	logBuffer chan string

	attrCacheTimeout time.Duration
}

// Init is called when the file system is created.
func (j *juice) Init() {
	j.handlers = make(map[uint64]handleInfo)
	j.badfd = make(map[uint64]uint64)
	j.inoHandleMap = make(map[meta.Ino][]uint64)
}

func (j *juice) newContext() vfs.LogContext {
	if j.asRoot {
		return vfs.NewLogContext(meta.Background())
	}
	uid, gid, pid := fuse.Getcontext()
	if uid == 0xffffffff || uid == win.SystemUIDFromFUSE {
		uid = 0
	}
	if gid == 0xffffffff || gid == win.SystemUIDFromFUSE {
		gid = 0
	}
	if j.adminAsRoot && uid == win.AdministratorUIDFromFUSE {
		// gid is basically unused on Windows, so we just check the uid here and set the gid as well
		uid = 0
		gid = 0
	}

	if pid == -1 {
		pid = 0
	}
	ctx := meta.NewContext(uint32(pid), uid, []uint32{gid})
	return vfs.NewLogContext(ctx)
}

// Statfs gets file system statistics.
func (j *juice) Statfs(path string, stat *fuse.Statfs_t) int {
	ctx := j.newContext()
	// defer trace(path)(stat)
	var totalspace, availspace, iused, iavail uint64
	j.fs.Meta().StatFS(ctx, meta.RootInode, &totalspace, &availspace, &iused, &iavail)
	var bsize uint64 = 4096
	blocks := totalspace / bsize
	bavail := availspace / bsize
	stat.Namemax = 255
	stat.Frsize = 4096
	stat.Bsize = bsize
	stat.Blocks = blocks
	stat.Bfree = bavail
	stat.Bavail = bavail
	stat.Files = iused + iavail
	stat.Ffree = iavail
	stat.Favail = iavail
	return 0
}

func errorconv(err syscall.Errno) int {
	// convert based on the error.i file in winfsp project
	switch err {
	case syscall.EACCES:
		return -fuse.EACCES
	case syscall.EEXIST:
		return -fuse.EEXIST
	case syscall.ENOENT, syscall.ENOTDIR:
		return -fuse.ENOENT
	case syscall.ECANCELED:
		return -fuse.EINTR
	case syscall.EIO:
		return -fuse.EIO
	case syscall.EINVAL:
		return -fuse.EINVAL
	case syscall.EBADFD:
		return -fuse.EBADF
	case syscall.EDQUOT:
		return -fuse.ENOSPC
	case syscall.EBUSY:
		return -fuse.EBUSY
	case syscall.ENOTEMPTY:
		return -fuse.ENOTEMPTY
	case syscall.ENAMETOOLONG:
		return -fuse.ENAMETOOLONG
	case syscall.ERROR_HANDLE_EOF:
		return -fuse.ENODATA
	}

	return -int(err)
}

func fuseFlagToSyscall(flag int) int {
	var ret int

	if flag&fuse.O_RDONLY != 0 {
		ret |= syscall.O_RDONLY
	}
	if flag&fuse.O_WRONLY != 0 {
		ret |= syscall.O_WRONLY
	}
	if flag&fuse.O_RDWR != 0 {
		ret |= syscall.O_RDWR
	}
	if flag&fuse.O_APPEND != 0 {
		ret |= syscall.O_APPEND
	}
	if flag&fuse.O_CREAT != 0 {
		ret |= syscall.O_CREAT
	}
	if flag&fuse.O_EXCL != 0 {
		ret |= syscall.O_EXCL
	}
	if flag&fuse.O_TRUNC != 0 {
		ret |= syscall.O_TRUNC
	}
	return ret

}

// Mknod creates a file node.
func (j *juice) Mknod(p string, mode uint32, dev uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Mknod (%s, %d, %d): %d", p, mode, dev, e) }()
	parent, err := j.fs.Open(ctx, path.Dir(p), 0)
	if err != 0 {
		e = errorconv(err)
		return
	}
	_, errno := j.vfs.Mknod(ctx, parent.Inode(), path.Base(p), uint16(mode), 0, uint32(dev))
	e = errorconv(errno)
	if e == 0 {
		j.fs.InvalidateEntry(parent.Inode(), path.Base(p))
	}
	return
}

// Mkdir creates a directory.
func (j *juice) Mkdir(path string, mode uint32) (e int) {
	if path == "/.UMOUNTIT" {
		logger.Infof("Umount %s ...", j.conf.Meta.MountPoint)
		go j.host.Unmount()
		return -fuse.ENOENT
	}
	ctx := j.newContext()
	defer func() { j.log(ctx, "Mkdir (%s, %d): %d", path, mode, e) }()
	e = errorconv(j.fs.Mkdir(ctx, path, uint16(mode), 0))
	return
}

// Unlink removes a file.
func (j *juice) Unlink(path string) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Unlink (%s): %d", path, e) }()
	e = errorconv(j.fs.Delete(ctx, path))
	return
}

// Rmdir removes a directory.
func (j *juice) Rmdir(path string) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Rmdir (%s): %d", path, e) }()
	e = errorconv(j.fs.Delete(ctx, path))
	return
}

func (j *juice) Symlink(target string, newpath string) (e int) {
	return -fuse.ENOSYS
	ctx := j.newContext()
	defer func() { j.log(ctx, "Symlink (%s, %s): %d", target, newpath, e) }()
	parent, err := j.fs.Open(ctx, path.Dir(newpath), 0)
	if err != 0 {
		e = errorconv(err)
		return
	}
	_, errno := j.vfs.Symlink(ctx, target, parent.Inode(), path.Base(newpath))
	e = errorconv(errno)
	return
}

func (j *juice) Readlink(path string) (e int, target string) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Readlink (%s): (%d, %s)", path, e, target) }()
	if path == "/" && j.disableSymlink {
		e = -fuse.ENOSYS
		return
	}
	fi, err := j.fs.Lstat(ctx, path)
	if err != 0 {
		e = errorconv(err)
		return
	}
	t, errno := j.vfs.Readlink(ctx, fi.Inode())
	e = errorconv(errno)
	target = string(t)
	return
}

// Rename renames a file.
func (j *juice) Rename(oldpath string, newpath string) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Rename (%s, %s): %d", oldpath, newpath, e) }()
	e = errorconv(j.fs.Rename(ctx, oldpath, newpath, 0))
	return
}

// Chmod changes the permission bits of a file.
func (j *juice) Chmod(path string, mode uint32) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Chmod (%s, %d): %d", path, mode, e) }()
	f, err := j.fs.Open(ctx, path, 0)
	if err != 0 {
		e = errorconv(err)
		return
	}
	e = errorconv(f.Chmod(ctx, uint16(mode)))
	if e == 0 {
		j.invalidateAttrCache(f.Inode())
	}
	return
}

// Chown changes the owner and group of a file.
func (j *juice) Chown(path string, uid uint32, gid uint32) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Chown (%s, %d, %d): %d", path, uid, gid, e) }()
	f, err := j.fs.Open(ctx, path, 0)
	if err != 0 {
		e = errorconv(err)
		return
	}
	if runtime.GOOS == "windows" {
		// FIXME: don't change ownership in windows
		return 0
	}
	info, _ := f.Stat()
	if uid == 0xffffffff {
		uid = uint32(info.(*fs.FileStat).Uid())
	}
	if gid == 0xffffffff {
		gid = uint32(info.(*fs.FileStat).Gid())
	}
	e = errorconv(f.Chown(ctx, uid, gid))
	return
}

// Utimens changes the access and modification times of a file.
func (j *juice) Utimens(path string, tmsp []fuse.Timespec) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Utimens (%s, %v): %d", path, tmsp, e) }()
	f, err := j.fs.Open(ctx, path, 0)
	if err != 0 {
		e = errorconv(err)
	} else {
		e = errorconv(f.Utime2(ctx, tmsp[0].Sec, tmsp[0].Nsec, tmsp[1].Sec, tmsp[1].Nsec))
		if e == 0 {
			j.invalidateAttrCache(f.Inode())
		}
	}
	return
}

// Create creates and opens a file.
// The flags are a combination of the fuse.O_* constants.
func (j *juice) Create(p string, flags int, mode uint32) (e int, fh uint64) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Create (%s, %d, %d): (%d, %d)", p, flags, mode, e, fh) }()
	parent, err := j.fs.Open(ctx, path.Dir(p), 0)
	if err != 0 {
		e = errorconv(err)
		return
	}

	entry, fh, errno := j.vfs.Create(ctx, parent.Inode(), path.Base(p), uint16(mode), 0, uint32(fuseFlagToSyscall(flags)))
	if errno == 0 {
		j.Lock()
		j.handlers[fh] = handleInfo{
			ino:           entry.Inode,
			cacheAttr:     entry.Attr,
			attrExpiredAt: time.Now().Add(j.conf.AttrTimeout),
		}
		j.inoHandleMap[entry.Inode] = append(j.inoHandleMap[entry.Inode], fh)
		j.Unlock()
	}
	e = errorconv(errno)
	if e == 0 {
		j.fs.InvalidateEntry(parent.Inode(), path.Base(p))
	}
	return
}

// Open opens a file.
// The flags are a combination of the fuse.O_* constants.
func (j *juice) Open(path string, flags int) (e int, fh uint64) {
	var fi fuse.FileInfo_t
	fi.Flags = fuseFlagToSyscall(flags)
	e = j.OpenEx(path, &fi)
	fh = fi.Fh
	return
}

// Open opens a file.
// The flags are a combination of the fuse.O_* constants.
func (j *juice) OpenEx(p string, fi *fuse.FileInfo_t) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Open (%s, %d): (%d, %d)", p, fi.Flags, e, fi.Fh) }()
	ino := meta.Ino(0)
	if strings.HasSuffix(p, "/.control") {
		ino, _ = vfs.GetInternalNodeByName(".control")
		if ino == 0 {
			e = -fuse.ENOENT
			return
		}
	} else if filename := path.Base(p); vfs.IsSpecialName(filename) && path.Dir(p) == "/" {
		ino, _ = vfs.GetInternalNodeByName(filename)
		if ino == 0 {
			e = -fuse.ENOENT
			return
		}
	} else {
		f, err := j.fs.Open(ctx, p, 0)
		if err != 0 {
			e = -fuse.ENOENT
			return
		}
		ino = f.Inode()
	}

	entry, fh, errno := j.vfs.Open(ctx, ino, uint32(fuseFlagToSyscall(fi.Flags)))
	if errno == 0 {
		fi.Fh = fh
		if vfs.IsSpecialNode(ino) {
			fi.DirectIo = true
		} else {
			fi.KeepCache = entry.Attr.KeepCache
		}
		j.Lock()
		j.handlers[fh] = handleInfo{
			ino:           ino,
			cacheAttr:     entry.Attr,
			attrExpiredAt: time.Now().Add(j.conf.AttrTimeout),
		}
		j.inoHandleMap[ino] = append(j.inoHandleMap[ino], fh)
		j.Unlock()
	}
	e = errorconv(errno)
	return
}

func (j *juice) attrToStat(inode Ino, attr *meta.Attr, stat *fuse.Stat_t) {
	stat.Ino = uint64(inode)
	stat.Mode = attr.SMode()
	stat.Uid = attr.Uid
	stat.Gid = attr.Gid

	if stat.Uid == 0 {
		if j.adminAsRoot {
			stat.Uid = win.AdministratorUIDFromFUSE
		} else {
			stat.Uid = win.SystemUIDFromFUSE
		}
	}
	if stat.Gid == 0 && j.adminAsRoot {
		if j.adminAsRoot {
			stat.Gid = win.AdminstratorsGIDFromFUSE
		} else {
			stat.Gid = win.SystemUIDFromFUSE
		}
	}

	stat.Birthtim.Sec = attr.Atime
	stat.Birthtim.Nsec = int64(attr.Atimensec)
	stat.Atim.Sec = attr.Atime
	stat.Atim.Nsec = int64(attr.Atimensec)
	stat.Mtim.Sec = attr.Mtime
	stat.Mtim.Nsec = int64(attr.Mtimensec)
	stat.Ctim.Sec = attr.Ctime
	stat.Ctim.Nsec = int64(attr.Ctimensec)
	stat.Nlink = attr.Nlink
	var rdev uint32
	var size, blocks uint64
	switch attr.Typ {
	case meta.TypeDirectory:
		fallthrough
	case meta.TypeSymlink:
		fallthrough
	case meta.TypeFile:
		size = attr.Length
		blocks = (size + 0xffff) / 0x10000
		stat.Blksize = 0x10000
	case meta.TypeBlockDev:
		fallthrough
	case meta.TypeCharDev:
		rdev = attr.Rdev
	}
	stat.Size = int64(size)
	stat.Blocks = int64(blocks)
	stat.Rdev = uint64(rdev)
	if attr.Flags&meta.FlagImmutable != 0 {
		stat.Flags |= fuse.UF_READONLY
	}
	if attr.Flags&meta.FlagWindowsHidden != 0 {
		stat.Flags |= fuse.UF_HIDDEN
	}
	if attr.Flags&meta.FlagWindowsSystem != 0 {
		stat.Flags |= fuse.UF_SYSTEM
	}
	if attr.Flags&meta.FlagWindowsArchive != 0 {
		stat.Flags |= fuse.UF_ARCHIVE
	}
}

func (j *juice) h2i(fh *uint64) meta.Ino {
	defer j.RUnlock()
	j.RLock()

	entry := j.handlers[*fh]
	if entry.ino == 0 {
		newfh := j.badfd[*fh]
		if newfh != 0 {
			entry = j.handlers[newfh]
			if entry.ino > 0 {
				*fh = newfh
			}
		}
	}
	return entry.ino
}

func (j *juice) reopen(p string, fh *uint64) meta.Ino {
	e, newfh := j.Open(p, os.O_RDWR)
	if e != 0 {
		return 0
	}
	j.Lock()
	defer j.Unlock()
	j.badfd[*fh] = newfh
	*fh = newfh
	return j.handlers[newfh].ino
}

// Getattr gets file attributes.
func (j *juice) getAttrForSpFile(ctx vfs.LogContext, p string, stat *fuse.Stat_t, fh uint64) (e int) {
	parentDir := path.Dir(p)
	_, err := j.fs.Stat(ctx, parentDir)
	if err != 0 {
		e = -fuse.ENOENT
		return
	}

	filename := path.Base(p)
	inode, attr := vfs.GetInternalNodeByName(filename)
	if inode == 0 {
		e = -fuse.ENOENT
		return
	}

	j.vfs.UpdateLength(inode, attr)

	attr.Gid = ctx.Gid()
	attr.Uid = ctx.Uid()

	j.attrToStat(inode, attr, stat)
	return
}

func (j *juice) invalidateAttrCache(ino meta.Ino) {
	if j.attrCacheTimeout == 0 || ino == 0 {
		return
	}
	j.fs.InvalidateAttr(ino) // invalidate the attrcache in fs layer
	j.Lock()
	defer j.Unlock()

	handlers := j.inoHandleMap[ino]
	for _, fh := range handlers {
		if cache, ok := j.handlers[fh]; ok {
			cache.cacheAttr = nil
			cache.attrExpiredAt = time.Time{}
			j.handlers[fh] = cache
		}
	}
}

func (j *juice) getAttrFromCache(fh uint64) (entry *meta.Entry) {
	if j.attrCacheTimeout == 0 || fh == invalidFileHandle {
		return nil
	}
	j.RLock()
	defer j.RUnlock()
	if cache, ok := j.handlers[fh]; ok && cache.cacheAttr != nil {
		if time.Now().Before(cache.attrExpiredAt) {
			entry = &meta.Entry{
				Inode: cache.ino,
				Attr:  cache.cacheAttr,
			}
			return entry
		}
	}
	return nil
}

func (j *juice) setAttrCache(fh uint64, attr *meta.Attr) {
	if j.attrCacheTimeout == 0 || fh == invalidFileHandle {
		return
	}

	j.Lock()
	defer j.Unlock()

	if cache, ok := j.handlers[fh]; ok {
		cache.cacheAttr = attr
		cache.attrExpiredAt = time.Now().Add(j.attrCacheTimeout)
		j.handlers[fh] = cache
	}
}

func (j *juice) getAttr(ctx vfs.Context, fh uint64, ino Ino, opened uint8) (entry *meta.Entry, err syscall.Errno) {
	if entry := j.getAttrFromCache(fh); entry != nil {
		return entry, 0
	}

	if entry, err = j.vfs.GetAttr(ctx, ino, opened); err != 0 {
		return nil, err
	}

	j.setAttrCache(fh, entry.Attr)

	return entry, 0
}

// Getattr gets file attributes.
func (j *juice) Getattr(p string, stat *fuse.Stat_t, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Getattr (%s, %d): %d", p, fh, e) }()
	ino := j.h2i(&fh)

	if ino == 0 {
		// special case for .control file
		if strings.HasSuffix(p, "/.control") {
			e = j.getAttrForSpFile(ctx, p, stat, fh)
			return
		} else if vfs.IsSpecialName(path.Base(p)) && path.Dir(p) == "/" {
			e = j.getAttrForSpFile(ctx, p, stat, fh)
			return
		}

		fi, err := j.fs.Lstat(ctx, p)
		if err != 0 {
			// Known issue: If the parent directory is not exists, the Windows api such as
			// GetFileAttributeX expects the ERROR_PATH_NOT_FOUND returned.
			// However, the fuse api has no such error code defined.
			e = -fuse.ENOENT
			return
		}
		ino = fi.Inode()
		entry := fi.Attr()
		if entry != nil {
			j.vfs.UpdateLength(ino, entry)
			j.attrToStat(ino, entry, stat)
			return
		}
	}

	entry, errrno := j.getAttr(ctx, fh, ino, 0)
	if errrno != 0 {
		e = errorconv(errrno)
		return
	}
	j.vfs.UpdateLength(entry.Inode, entry.Attr)
	j.attrToStat(entry.Inode, entry.Attr, stat)
	return
}

// Truncate changes the size of a file.
func (j *juice) Truncate(path string, size int64, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Truncate (%s, %d, %d): %d", path, size, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		e = -fuse.EBADF
		return
	}
	e = errorconv(j.vfs.Truncate(ctx, ino, size, 0, nil))
	if e == 0 {
		j.invalidateAttrCache(ino)
	}
	return
}

// Read reads data from a file.
func (j *juice) Read(path string, buf []byte, off int64, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Read (%s, %d, %d, %d): %d", path, len(buf), off, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		logger.Warnf("read from released fd %d for %s, re-open it", fh, path)
		ino = j.reopen(path, &fh)
	}
	if ino == 0 {
		e = -fuse.EBADF
		return
	}
	n, err := j.vfs.Read(ctx, ino, buf, uint64(off), fh)
	if err != 0 {
		e = errorconv(err)
		return
	}
	return n
}

// Write writes data to a file.
func (j *juice) Write(path string, buff []byte, off int64, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Write (%s, %d, %d, %d): %d", path, len(buff), off, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		logger.Warnf("write to released fd %d for %s, re-open it", fh, path)
		ino = j.reopen(path, &fh)
	}
	if ino == 0 {
		e = -fuse.EBADF
		return
	}
	errno := j.vfs.Write(ctx, ino, buff, uint64(off), fh)
	if errno != 0 {
		e = errorconv(errno)
	} else {
		e = len(buff)
	}

	return
}

// Flush flushes cached file data.
func (j *juice) Flush(path string, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Flush (%s, %d): %d", path, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		e = -fuse.EBADF
		return
	}
	e = errorconv(j.vfs.Flush(ctx, ino, fh, 0))
	return
}

func (j *juice) cleanInoHandlerMap(ino meta.Ino, fh uint64) {
	handles := j.inoHandleMap[ino]
	for i, handle := range handles {
		if handle == fh {
			j.inoHandleMap[ino] = append(handles[:i], handles[i+1:]...)
			break
		}
	}
	if len(j.inoHandleMap[ino]) == 0 {
		delete(j.inoHandleMap, ino)
	}
}

// Release closes an open file.
func (j *juice) Release(path string, fh uint64) int {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Release (%s, %d)", path, fh) }()
	orig := fh
	ino := j.h2i(&fh)
	if ino == 0 {
		logger.Warnf("release invalid fd %d for %s", fh, path)
		return -fuse.EBADF
	}
	go func() {
		time.Sleep(time.Second * time.Duration(j.delayClose))
		j.Lock()
		delete(j.handlers, fh)
		j.cleanInoHandlerMap(ino, fh)
		if orig != fh {
			delete(j.badfd, orig)
			j.cleanInoHandlerMap(ino, orig)
		}
		j.Unlock()
		j.vfs.Release(j.newContext(), ino, fh)
	}()
	return 0
}

// Fsync synchronizes file contents.
func (j *juice) Fsync(path string, datasync bool, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Fsync (%s, %t, %d): %d", path, datasync, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		e = -fuse.EBADF
	} else {
		e = errorconv(j.vfs.Fsync(ctx, ino, 1, fh))
	}
	return
}

// Opendir opens a directory.
func (j *juice) Opendir(path string) (e int, fh uint64) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Opendir (%s): (%d, %d)", path, e, fh) }()
	f, err := j.fs.Open(ctx, path, 0)
	if err != 0 {
		e = -fuse.ENOENT
		return
	}
	fh, errno := j.vfs.Opendir(ctx, f.Inode(), 0)
	if errno == 0 {
		j.Lock()
		j.handlers[fh] = handleInfo{
			ino: f.Inode(),
		}
		j.inoHandleMap[f.Inode()] = append(j.inoHandleMap[f.Inode()], fh)

		j.Unlock()
	}
	e = errorconv(errno)
	return
}

// Readdir reads a directory.
func (j *juice) Readdir(path string,
	fill func(name string, stat *fuse.Stat_t, ofst int64) bool,
	ofst int64, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Readdir (%s, %d, %d): %d", path, ofst, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		e = -fuse.EBADF
		return
	}

	currentOffset := int(ofst)

	for {
		entries, readAt, err := j.vfs.Readdir(ctx, ino, uint32(j.readdirBatchSize), currentOffset, fh, true)
		if err != 0 {
			e = errorconv(err)
			return
		}

		if len(entries) == 0 {
			// Some meta engines may return entries less than batch size
			// so we only break when no entries are returned
			break
		}

		var st fuse.Stat_t
		var ok bool
		var full = true
		// all the entries should have same format
		for _, e := range entries {
			if !e.Attr.Full {
				full = false
				break
			}
		}
		for _, e := range entries {
			name := string(e.Name)
			if full {
				if j.vfs.ModifiedSince(e.Inode, readAt) {
					if e2, err := j.vfs.GetAttr(ctx, e.Inode, 0); err == 0 {
						e.Attr = e2.Attr
					}
				}
				j.vfs.UpdateLength(e.Inode, e.Attr)
				j.attrToStat(e.Inode, e.Attr, &st)
				ok = fill(name, &st, 0)
			} else {
				ok = fill(name, nil, 0)
			}
			if !ok {
				break
			}
		}

		currentOffset += len(entries)
	}
	return
}

// Releasedir closes an open directory.
func (j *juice) Releasedir(path string, fh uint64) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Releasedir (%s, %d): %d", path, fh, e) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		e = -fuse.EBADF
		return
	}
	j.Lock()
	delete(j.handlers, fh)
	j.cleanInoHandlerMap(ino, fh)
	j.Unlock()
	e = -int(j.vfs.Releasedir(ctx, ino, fh))
	return
}

func (j *juice) Chflags(path string, flags uint32) (e int) {
	ctx := j.newContext()
	defer func() { j.log(ctx, "Chflags (%s, %d): %d", path, flags, e) }()
	fi, err := j.fs.Stat(ctx, path)
	if err != 0 {
		e = -fuse.ENOENT
		return
	}

	var flagSet uint8
	if flags&fuse.UF_READONLY != 0 {
		flagSet |= meta.FlagImmutable
	}
	if flags&fuse.UF_HIDDEN != 0 {
		flagSet |= meta.FlagWindowsHidden
	}
	if flags&fuse.UF_SYSTEM != 0 {
		flagSet |= meta.FlagWindowsSystem
	}
	if flags&fuse.UF_ARCHIVE != 0 {
		flagSet |= meta.FlagWindowsArchive
	}

	ino := fi.Inode()
	err = j.vfs.ChFlags(ctx, ino, flagSet)
	if err != 0 {
		e = errorconv(err)
	} else {
		j.invalidateAttrCache(ino)
	}

	return
}

func (j *juice) Getpath(p string, fh uint64) (e int, ret string) {
	if !j.enabledGetPath {
		ret = p
		return
	}

	if strings.HasSuffix(p, "/.control") {
		ret = p
		return
	} else if vfs.IsSpecialName(path.Base(p)) && path.Dir(p) == "/" {
		ret = p
		return
	}

	ctx := j.newContext()
	defer func() { j.log(ctx, "Getpath (%s, %d): (%d, %s)", p, fh, e, ret) }()
	ino := j.h2i(&fh)
	if ino == 0 {
		fi, err := j.fs.Stat(ctx, p)
		if err != 0 {
			e = errorconv(err)
			return
		}
		ino = fi.Inode()
	}

	paths := j.vfs.Meta.GetPaths(ctx, ino)
	if len(paths) == 0 {
		ret = p
		return
	}

	if len(paths) == 1 {
		ret = paths[0]
		return
	}

	retCandidicate := paths[0]

	for _, path := range paths {
		if p == path {
			ret = path
			return
		} else if strings.EqualFold(path, p) {
			retCandidicate = path
		}
	}

	ret = retCandidicate
	return
}

func getWinFspVersion() string {
	const winfspKey = `SOFTWARE\WOW6432Node\WinFsp`
	const sxsDirValue = "SxsDir"
	const dllName = "winfsp-x64.dll"

	// Get SxsDir from registry
	k, err := registry.OpenKey(registry.LOCAL_MACHINE, winfspKey, registry.QUERY_VALUE)
	if err != nil {
		logger.Errorf("Failed to open registry key %s: %v", winfspKey, err)
		return ""
	}
	defer k.Close()

	sxsDir, _, err := k.GetStringValue(sxsDirValue)
	if err != nil {
		logger.Errorf("Failed to get value %s from registry key %s: %v", sxsDirValue, winfspKey, err)
		return ""
	}

	if sxsDir == "" {
		logger.Errorf("SxsDir value is empty in registry key %s", winfspKey)
		return ""
	}

	dllPath := filepath.Join(sxsDir, "bin", dllName)
	if _, err := os.Stat(dllPath); os.IsNotExist(err) {
		logger.Errorf("WinFsp DLL not found at %s", dllPath)
		return ""
	}

	// Get version info from DLL using PowerShell
	cmd := exec.Command("powershell", "-NoProfile", "-Command",
		fmt.Sprintf(`(Get-Item '%s').VersionInfo.FileVersion`, dllPath))
	output, err := cmd.Output()
	if err != nil {
		logger.Errorf("Failed to get version info from %s: %v", dllPath, err)
		return ""
	}

	return strings.TrimSpace(string(output))
}

func compareWinFspVersion(v1, v2 string) int {
	parseVersion := func(v string) []int {
		parts := strings.Split(v, ".")
		result := make([]int, 3)
		for i := 0; i < len(parts) && i < 3; i++ {
			result[i], _ = strconv.Atoi(parts[i])
		}
		return result
	}

	p1 := parseVersion(v1)
	p2 := parseVersion(v2)

	for i := 0; i < 3; i++ {
		if p1[i] < p2[i] {
			return -1
		}
		if p1[i] > p2[i] {
			return 1
		}
	}
	return 0
}

func Serve(v *vfs.VFS, fuseOpt string, asRoot bool, delayCloseSec int, showDotFiles bool, threadsCount int, caseSensitive bool, enabledGetPath bool, c *cli.Context) error {
	var jfs juice
	conf := v.Conf
	jfs.readdirBatchSize = c.Int("readdir-batch-size")
	if jfs.readdirBatchSize <= 0 {
		jfs.readdirBatchSize = 1000
	}
	logger.Debugf("Readdir batch size: %d", jfs.readdirBatchSize)

	volAlias := c.String("alias")
	if volAlias == "" {
		volAlias = conf.Format.Name
	} else {
		// alias maybe juicefs-alias\alias when mounting by the net use command, we need the last part
		parts := strings.Split(volAlias, `\`)
		if len(parts) > 1 {
			volAlias = parts[len(parts)-1]
		}
	}

	jfs.attrCacheTimeout = v.Conf.AttrTimeout
	jfs.conf = conf
	jfs.vfs = v
	jfs.enabledGetPath = enabledGetPath
	jfs.adminAsRoot = c.Bool("admin-as-root")

	fuseAccessLog := c.String("fuse-access-log")
	if fuseAccessLog != "" {
		f, err := os.OpenFile(fuseAccessLog, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
		if err != nil {
			logger.Errorf("open fuse access log %s: %s", fuseAccessLog, err)
		} else {
			logger.Infof("fuse access log: %s", fuseAccessLog)
			_ = os.Chmod(fuseAccessLog, 0666)
			jfs.logBuffer = make(chan string, 1024)
			rotateCount := c.Int("fuse-access-log-rotate-count")
			if rotateCount <= 0 {
				rotateCount = 7
			}
			go jfs.flushLog(f, fuseAccessLog, rotateCount)
		}
	}

	var err error
	jfs.fs, err = fs.NewFileSystem(conf, v.Meta, v.Store, nil)
	if err != nil {
		logger.Fatalf("Initialize FileSystem failed: %s", err)
	}
	jfs.disableSymlink = os.Getenv("JUICEFS_ENABLE_SYMLINK") != "1"
	jfs.asRoot = asRoot
	jfs.delayClose = delayCloseSec
	host := fuse.NewFileSystemHost(&jfs)
	jfs.host = host
	var options = "volname=" + volAlias
	svrName := fmt.Sprintf("juicefs-%s", volAlias)
	options += fmt.Sprintf(",ExactFileSystemName=%s,ThreadCount=%d", svrName, threadsCount)
	options += fmt.Sprintf(",DirInfoTimeout=%d,VolumeInfoTimeout=1000,KeepFileCache", int(conf.DirEntryTimeout.Seconds()*1000))
	options += fmt.Sprintf(",FileInfoTimeout=%d", int(conf.EntryTimeout.Seconds()*1000))

	mountAsNetworkDrive := !c.Bool("as-local-volume")
	if mountAsNetworkDrive {
		// when mounting as network drive, the second part of volume prefix should be the volume alias or the display won't be correct
		options += fmt.Sprintf(",VolumePrefix=/%s/%s", svrName, volAlias)
	}

	createPerms := c.String("create-perm")
	if createPerms != "" {
		if p, err := strconv.ParseUint(createPerms, 8, 32); err == nil {
			options += fmt.Sprintf(",create_umask=%03o", 0o0777&^p)
		} else {
			logger.Warningf("Invalid create-perm value: %s", createPerms)
		}
	}

	if asRoot {
		options += ",uid=-1,gid=-1"
	}
	if fuseOpt != "" {
		options += "," + fuseOpt
	}
	if !showDotFiles {
		options += ",dothidden"
	}

	winfspDbgLog := c.String("winfsp-dbg-log")
	if winfspDbgLog != "" {
		logger.Infof("WinFsp Debug Log Path: %s", winfspDbgLog)
		options += ",debug,DebugLog=" + winfspDbgLog
	}
	flushOnCleanup := c.Bool("flush-on-cleanup")
	if flushOnCleanup {
		winFSPVersion := getWinFspVersion()
		if winFSPVersion == "" {
			logger.Warningf("Failed to detect WinFsp version, disabling flush-on-cleanup")
			flushOnCleanup = false
		} else {
			const minVersion = "2.1.25156"
			if compareWinFspVersion(winFSPVersion, minVersion) <= 0 {
				logger.Warningf("Winfsp version %s <= %s, flush-on-cleanup disabled", winFSPVersion, minVersion)
				flushOnCleanup = false
			} else {
				logger.Debugf("Winfsp version %s > %s, flush-on-cleanup enabled", winFSPVersion, minVersion)
			}
		}
	}
	if flushOnCleanup {
		options += ",FlushOnCleanup=1"
	}

	host.SetCapCaseInsensitive(!caseSensitive)
	host.SetCapReaddirPlus(true)

	mountVolumeName := filepath.VolumeName(conf.Mountpoint)
	mountPointIsDrive := isDriveByVolumeName(conf.Mountpoint)
	if mountPointIsDrive {
		conf.Mountpoint = mountVolumeName
	}

	if !mountPointIsDrive && mountAsNetworkDrive {
		return fmt.Errorf("Cannot mount to a local directory when --as-local-volume is not set")
	}

	if !mountPointIsDrive {
		if _, err := os.Stat(conf.Mountpoint); err == nil {
			return fmt.Errorf("Mount point %s cannot be an existing folder", conf.Mountpoint)
		}

		// the parent directory of the mount point must exist
		parentDir := filepath.Dir(conf.Mountpoint)
		if _, err := os.Stat(parentDir); os.IsNotExist(err) {
			return fmt.Errorf("Parent directory %s of mount point %s does not exist", parentDir, conf.Mountpoint)
		}
	}

	logger.Debugf("mount point: %s, mountPointIsDrive: %v, options: %s", conf.Mountpoint, mountPointIsDrive, options)
	exitOk := host.Mount(conf.Mountpoint, []string{"-o", options})
	if exitOk {
		return nil
	}

	return fmt.Errorf("juicefs mount command exit with error, please check the log for details")
}

const winfspSecurityDescriptor = "D:P(A;;RPWPLC;;;WD)"

func updateWinFspRegService(winfspServiceName string, cmdLine string, alias string, logPath string, asNetworkDrive bool) error {
	regKeyPath := "SOFTWARE\\WOW6432Node\\WinFsp\\Services\\" + winfspServiceName
	k, err := registry.OpenKey(registry.LOCAL_MACHINE, regKeyPath, registry.ALL_ACCESS)
	if err != nil {
		if err == syscall.ERROR_FILE_NOT_FOUND || err == syscall.ERROR_PATH_NOT_FOUND {
			logger.Info("WinFsp service registry key not found, creating it.")
			k, _, err = registry.CreateKey(registry.LOCAL_MACHINE, regKeyPath, registry.ALL_ACCESS)
			if err != nil {
				return fmt.Errorf("Failed to create registry key: %s", err)
			}
		} else {
			return fmt.Errorf("Failed to open registry key: %s", err)
		}
	}
	defer k.Close()

	err = k.SetStringValue("CommandLine", cmdLine)
	if err != nil {
		return fmt.Errorf("Failed to set registry key: %s", err)
	}

	securityDescriptor := winfspSecurityDescriptor
	err = k.SetStringValue("Security", securityDescriptor)
	if err != nil {
		return fmt.Errorf("Failed to set registry key: %s", err)
	}

	filePath, err := os.Executable()
	if err != nil {
		return fmt.Errorf("Failed to get current file path: %s", err)
	}

	err = k.SetStringValue("Executable", filePath)
	if err != nil {
		return fmt.Errorf("Failed to set registry key: %s", err)
	}

	err = k.SetDWordValue("JobControl", 1)
	if err != nil {
		return fmt.Errorf("Failed to set registry key: %s", err)
	}

	if logPath != "" {
		err = k.SetStringValue("Stderr", logPath)
		if err != nil {
			return fmt.Errorf("Failed to set registry key: %s", err)
		}
	} else {
		err = k.DeleteValue("Stderr")
		if err != nil {
			return fmt.Errorf("Failed to delete registry key: %s", err)
		}
	}

	// RunAs NetworkService/LocalSystem
	if !asNetworkDrive {
		err = k.SetStringValue("RunAs", "LocalSystem")
		if err != nil {
			return fmt.Errorf("Failed to set RunAs value: %s", err)
		}
	} else {
		k.DeleteValue("RunAs")
	}

	//  SET "HKLM\\SOFTWARE\\WOW6432Node\\WinFsp\\MountBroadcastDriveChange" to 1
	k2, err := registry.OpenKey(registry.LOCAL_MACHINE, "SOFTWARE\\WOW6432Node\\WinFsp", registry.ALL_ACCESS)
	if err != nil {
		logger.Warningf("Failed to open registry key for MountBroadcastDriveChange: %s", err)
	} else {
		defer k2.Close()
		err = k2.SetDWordValue("MountBroadcastDriveChange", 1)
		if err != nil {
			logger.Warningf("Failed to set MountBroadcastDriveChange value: %s", err)
		}
	}

	return nil
}

func isDriveByVolumeName(s string) bool {
	// remove prefix "\\.\" if exists
	if strings.HasPrefix(s, `\\.\`) {
		s = s[4:]
	}

	vol := filepath.VolumeName(s)
	if len(vol) < 2 {
		return false
	}
	if !unicode.IsLetter(rune(vol[0])) || vol[1] != ':' {
		return false
	}
	if s == vol {
		return true
	}
	if len(s) == len(vol)+1 && (s[len(vol)] == '\\' || s[len(vol)] == '/') {
		return true
	}
	return false
}

func getWinFspBinPath() string {
	// read InstallDir in Computer\HKEY_LOCAL_MACHINE\SOFTWARE\WOW6432Node\WinFsp

	const winfspKey = `SOFTWARE\WOW6432Node\WinFsp`
	const installDirValue = "InstallDir"
	var installDir string
	k, err := registry.OpenKey(registry.LOCAL_MACHINE, winfspKey, registry.QUERY_VALUE)
	if err != nil {
		logger.Errorf("Failed to open registry key %s: %v", winfspKey, err)
		return ""
	}
	defer k.Close()
	installDir, _, err = k.GetStringValue(installDirValue)
	if err != nil {
		logger.Errorf("Failed to get value %s from registry key %s: %v", installDirValue, winfspKey, err)
		return ""
	}

	// check if the path exists
	if installDir == "" {
		logger.Errorf("InstallDir value is empty in registry key %s", winfspKey)
		return ""
	}

	return filepath.Join(installDir, "bin")
}

func checkIfMountProcessReady(mountpoint string, timeoutSec int) bool {
	// check if the mountpoint is ready
	start := time.Now()
	lastPrint := start
	for {
		time.Sleep(time.Second)
		_, err := os.Stat(mountpoint)
		if err == nil {
			return true
		}
		if time.Since(lastPrint) >= 5*time.Second {
			logger.Infof("Waiting for the mount point %s to be ready...", mountpoint)
			lastPrint = time.Now()
		}
		if time.Since(start) > time.Duration(timeoutSec)*time.Second {
			return false
		}
	}
}

func RunAsSystemService(name string, mountpoint string, logPath string, defaultCacheDir string, ctx *cli.Context) error {
	// https://winfsp.dev/doc/WinFsp-Service-Architecture/
	logger.Info("Running as Windows system service.")

	addr := ctx.Args().Get(0)
	var cmds []string = []string{"mount", addr, "%2"}

	hasCacheDir := false

	alias := ctx.String("alias")
	if alias == "" {
		alias = name
	}

	asNetworkDrive := !ctx.Bool("as-local-volume")

	logger.Infof("Mounting juicefs as Windows system service. This may require elevated privileges. (Network drive: %v)", asNetworkDrive)

	// reconstruct command line from flags
	for _, flag := range ctx.Command.Flags {
		for _, v := range flag.Names() {
			if !ctx.IsSet(v) {
				continue
			}

			if v == "cache-dir" {
				hasCacheDir = true
			}
			if v == "d" || v == "background" {
				continue
			}
			if v == "alias" {
				continue
			}

			if len(v) == 1 {
				cmds = append(cmds, "-"+v)
			} else {
				cmds = append(cmds, "--"+v)
			}

			val := ctx.Value(v)
			switch val := val.(type) {
			case bool:
				cmds[len(cmds)-1] = fmt.Sprintf("%s=%t", cmds[len(cmds)-1], val)
			case string:
				cmds = append(cmds, fmt.Sprintf("\"%s\"", val))
			default:
				cmds = append(cmds, fmt.Sprintf("%v", val))
			}
			break
		}
	}

	// check global flags
	for _, flag := range ctx.App.Flags {
		for _, v := range flag.Names() {
			if !ctx.IsSet(v) {
				continue
			}

			if len(v) == 1 {
				cmds = append(cmds, "-"+v)
			} else {
				cmds = append(cmds, "--"+v)
			}

			val := ctx.Value(v)
			switch val := val.(type) {
			case bool:
				cmds[len(cmds)-1] = fmt.Sprintf("%s=%t", cmds[len(cmds)-1], val)
			case string:
				cmds = append(cmds, fmt.Sprintf("\"%s\"", val))
			default:
				cmds = append(cmds, fmt.Sprintf("%v", val))
			}
			break
		}
	}

	cmds = append(cmds, "--alias", "\"%1\"") // We put %1 here since it will be replaced by WinFsp with the alias

	if !hasCacheDir && defaultCacheDir != "" {
		cmds = append(cmds, "--cache-dir", "\""+defaultCacheDir+"\"")
	}

	logger.Debug("Command line for juicefs service: ", strings.Join(cmds, " "))

	cmdLine := strings.Join(cmds, " ")

	winfspServiceName := "juicefs-" + alias
	if err := updateWinFspRegService(winfspServiceName, cmdLine, alias, logPath, asNetworkDrive); err != nil {
		return fmt.Errorf("Failed to update WinFsp service registry: %s", err)
	}

	// We need to use the "net use" for some users who have enabled the 'net use /persistent:yes' option for
	// auto-reconnecting after reboot.
	winFspBinPath := getWinFspBinPath()
	mountByNetUse := os.Getenv("JFS_WIN_MOUNT_VIA") != "winfsp-launchctl"
	if !asNetworkDrive {
		mountByNetUse = false
	}

	if winFspBinPath == "" && !mountByNetUse {
		return fmt.Errorf(`Cannot find WinFsp installation path from registry, please make sure WinFsp is installed correctly.`)
	}

	if !mountByNetUse {
		winfspLauncher := "launchctl-x64.exe"
		logger.Debugf("WinFsp Bin Path: %s", winFspBinPath)
		if winFspBinPath != "" {
			winfspLauncher = filepath.Join(winFspBinPath, winfspLauncher)
		}

		// the second param of start subcommand must be the same as the third param
		// or the Explorer will not be able to disconnect the volume.
		cmd := exec.Command(winfspLauncher, "start", winfspServiceName, alias, alias, mountpoint)
		cmd.Dir = winFspBinPath
		logger.Debugf("Mounting command(using launchctl): %s", cmd.String())

		out, err := cmd.CombinedOutput()
		if err != nil {
			return fmt.Errorf("Failed to mount juicefs as system service: %s, output: %s", err, string(out))
		}

		if !checkIfMountProcessReady(mountpoint, 25) {
			return fmt.Errorf("Mount command succeeded, but the mountpoint %s did not become ready in %d seconds, please check the juicefs logs for more information.", mountpoint, 25)
		}
	} else {
		logger.Debugf("Trying to start juicefs service by 'net use' command.")
		cmd := exec.Command("net", "use", mountpoint, fmt.Sprintf("\\\\%s\\%s", winfspServiceName, alias), "/Y")
		out, err := cmd.CombinedOutput()
		if err != nil {
			return fmt.Errorf("Failed to start juicefs service by 'net use': %s, output: %s", err, string(out))
		}
	}

	logger.Info("Juicefs mount process started successfully.")

	return nil
}


================================================
FILE: rfcs/1-dir-used-statistics.md
================================================
# Count space and inodes usage for each directory

## Background

Currently, we have several counters to globally count the used space and inodes, which can be used to show information or set quota. However, we do not have efficient ways to show used information of or set quota for each directory.

## Proposal

This document give a proposal to efficiently and almost immediately collect used space and inodes for each directory. The "efficiently" means this operation cannot affect the performance of normal IO operations like `mknod`, `write` .etc. And the "almost immediately" means this operation cannot be lazy or scheduled, we must update the used space and inodes actively, but there may be a little latency (between several seconds and 1 minute).

## Implementation

### Storage

The counters should be stored in meta engines, in this section we introduce how to store them in three kinds of meta engines.

#### Redis

Redis engine stores the counters in hashes.

```go
func (m *redisMeta) dirUsedSpaceKey() string {
    return m.prefix + "dirUsedSpace"
}
 
func (m *redisMeta) dirUsedInodesKey() string {
    return m.prefix + "dirUsedInodes"
}
```

#### SQL

SQL engine stores the counters in a table.

```go
type dirUsage struct {
    Inode       Ino    `xorm:"pk"`
    UsedSpace   uint64 `xorm:"notnull"`
    UsedInodes  uint64 `xorm:"notnull"`
}
```

#### TKV

TKV engine stores each counter in one key.

```go
func (m *kvMeta) dirUsageKey(inode Ino) []byte {
    return m.fmtKey("U", inode)
}
```

### Usage

In this section we represent how and when to update and read the counters.

#### Update

The are several file types among the children, we should clarify how to deal with each kinds of files first.

| Type          | Used space      | Used inodes |
| ------------- | --------------- | ----------- |
| Normal file   | `align4K(size)` | 1           |
| Directory     | 4KiB            | 1           |
| Symlink       | 4KiB            | 1           |
| FIFO          | 4KiB            | 1           |
| Block device  | 4KiB            | 1           |
| Char device   | 4KiB            | 1           |
| Socket        | 4KiB            | 1           |

Each meta engine should implement `doUpdateDirUsage`.

```go
type engine interface {
    ...
    doUpdateDirUsage(ctx Context, ino Ino, space int64, inodes int64)
}
```

Relevant IO operations should call `doUpdateDirUsage` asynchronously.

```go
func (m *baseMeta) Mknod(ctx Context, parent Ino, ...) syscall.Errno {
    ...
    err := m.en.doMknod(ctx, m.checkRoot(parent), ...)
    ...
    go m.en.doUpdateDirUsage(ctx, parent, 1<<12, 1)
    return err
}

func (m *baseMeta) Unlink(ctx Context, parent Ino, name string) syscall.Errno {
    ...
    err := m.en.doUnlink(ctx, m.checkRoot(parent), name)
    ...
    go m.en.doUpdateDirUsage(ctx, parent, -align4K(attr.size), -1)
    return err
}
```

#### Read

Each meta engine should implement `doGetDirUsage`.

```go
type engine interface {
    ...
    doGetDirUsage(ctx Context, ino Ino) (space, inodes uint64, err syscall.Errno)
}
```

Now we can fasly recursively calculate the space and inodes usage in a directory by `doGetDirUsage`.

```go
// walk all directories in root
func (m *baseMeta) fastWalkDir(ctx Context, inode Ino, walkDir func(Context, Ino)) syscall.Errno {
    walkDir(ctx, inode)
    var entries []*Entry
    st := m.en.doReaddir(ctx, inode, 0, &entries, -1) // disable plus
    ...
    for _, entry := range entries {
    	if ent.Attr.Typ != TypeDirectory {
            continue
    	}
    	m.fastWalkDir(ctx, entry.Inode, walkFn)
        ...
    }
    return 0
}
func (m *baseMeta) getDirUsage(ctx Context, root Ino) (space, inodes uint64, err syscall.Errno) {
    m.fastWalkDir(ctx, root, func(_ Context, ino Ino) {
        s, i, err := m.doGetDirUsage(ctx, ino)
        ...
        space += s
        inodes += i
    })
    return
}
```


================================================
FILE: sdk/java/.gitignore
================================================
*.dll
*.dylib
*.so
.classpath
.project
.settings/
dependency-reduced-pom.xml
target/


================================================
FILE: sdk/java/Makefile
================================================
GOROOT=$(shell go env GOROOT)

all: package

ceph: libjfs-ceph
	mvn package -B -Dmaven.test.skip=true

libjfs-ceph: ../../pkg/*/*.go libjfs/*.go
	make -C libjfs ceph

libjfs/libjfs: ../../pkg/*/*.go libjfs/*.go
	make -C libjfs

compile:
	mvn compile -B --quiet
test: libjfs
	mvn test -B --quiet
package: libjfs/libjfs
	mvn package -B -Dmaven.test.skip=true

win: win-package package

win-package: ../../pkg/*/*.go libjfs/*.go
	make -C libjfs win

package-all: libjfs-all
	mvn clean package -B -Dmaven.test.skip=true

libjfs-all: libjfs.so
	docker run --rm \
		-v ~/go/pkg/mod:/go/pkg/mod \
		-v ~/work/juicefs/juicefs:/go/src/github.com/juicedata/juicefs \
		-v /var/run/docker.sock:/var/run/docker.sock \
		-w /go/src/github.com/juicedata/juicefs/sdk/java/libjfs \
		--entrypoint=/bin/bash \
		juicedata/golang-cross:latest \
		-c 'make mac win linux-arm64 mac-arm64'

libjfs.so:
	docker run --rm \
		-v ~/go/pkg/mod:/go/pkg/mod \
		-v $(GOROOT):/go \
        -v ~/work/juicefs/juicefs:/go/src/github.com/juicedata/juicefs \
        -v /var/run/docker.sock:/var/run/docker.sock \
        -w /go/src/github.com/juicedata/juicefs/sdk/java/libjfs \
        juicedata/sdk-builder \
        /bin/bash -c 'make'


================================================
FILE: sdk/java/conf/contract/juicefs.xml
================================================
<configuration>
	<property>
		<name>fs.contract.test.fs.jfs</name>
		<value>jfs:///</value>
	</property>
	<property>
		<name>fs.jfs.impl</name>
		<value>io.juicefs.JuiceFileSystem</value>
	</property>
	<property>
		<name>juicefs.no-usage-report</name>
		<value>true</value>
	</property>	
	<property>
		<name>juicefs.names</name>
		<value>a.local,b.local,c.local,d.local,e.local</value>
	</property>
	<property>
		<name>juicefs.hosts</name>
		<value>127.0.0.2,127.0.0.3,127.0.0.4,127.0.0.5,127.0.0.6</value>
	</property>
	<property>
		<name>fs.contract.test.root-tests-enabled</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.is-case-sensitive</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-append</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-atomic-directory-delete</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-block-locality</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-atomic-rename</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-settimes</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-getfilestatus</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-concat</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-seek</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.rejects-seek-past-eof</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-strict-exceptions</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-unix-permissions</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.rename-returns-false-if-dest-exists</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.supports-file-reference</name>
		<value>true</value>
	</property>
	<property>
		<name>fs.contract.rename-returns-false-if-source-missing</name>
		<value>true</value>
	</property>
</configuration>


================================================
FILE: sdk/java/conf/core-site.xml
================================================
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
	<property>
		<name>fs.defaultFS</name>
		<value>jfs://dev/</value>
	</property>
	<property>
		<name>fs.jfs.impl</name>
		<value>io.juicefs.JuiceFileSystem</value>
	</property>
	<property>
		<name>juicefs.no-usage-report</name>
		<value>true</value>
	</property>
	<property>
		<name>juicefs.file.checksum</name>
		<value>true</value>
	</property>
	<property>
		<name>juicefs.access-log</name>
		<value>/tmp/juicefs-access.log</value>
	</property>	
	<property>
		<name>juicefs.dev.meta</name>
		<value>127.0.0.1</value>
	</property>
	<property>
		<name>juicefs.names</name>
		<value>a.local,b.local,c.local,d.local,e.local</value>
	</property>
	<property>
		<name>juicefs.hosts</name>
		<value>127.0.0.2,127.0.0.3,127.0.0.4,127.0.0.5,127.0.0.6</value>
	</property>
</configuration>


================================================
FILE: sdk/java/conf/log4j.properties
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

================================================
FILE: sdk/java/kerberos.sh
================================================
#!/bin/sh

# JuiceFS, Copyright 2026 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

KERBEROS_REALM="EXAMPLE.COM"
KERBEROS_PRINCIPLE="administrator"
KERBEROS_PASSWORD="password1234"

sudo tee /etc/krb5.conf << EOF
[libdefaults]
    default_realm = $KERBEROS_REALM
    dns_lookup_realm = false
    dns_lookup_kdc = false
[realms]
    $KERBEROS_REALM = {
        kdc = localhost
        admin_server = localhost
    }
[logging]
    default = FILE:/var/log/krb5libs.log
    kdc = FILE:/var/log/krb5kdc.log
    admin_server = FILE:/var/log/kadmind.log
[domain_realm]
    .localhost = $KERBEROS_REALM
    localhost = $KERBEROS_REALM
EOF

sudo mkdir /etc/krb5kdc
sudo printf '*/*@%s\t*' "$KERBEROS_REALM" | sudo tee /etc/krb5kdc/kadm5.acl

sudo apt-get update
sudo apt-get install -y krb5-kdc krb5-admin-server

printf "$KERBEROS_PASSWORD\n$KERBEROS_PASSWORD" | sudo kdb5_util -r "$KERBEROS_REALM" create -s -W
for p in client server tom jerry; do
  sudo kadmin.local -q "addprinc -randkey $p/localhost@$KERBEROS_REALM"
  sudo kadmin.local -q "xst -k /tmp/$p.keytab $p/localhost@$KERBEROS_REALM"
  sudo chmod +rx /tmp/$p.keytab
done

echo "Restarting krb services..."
sudo service krb5-kdc restart
sudo service krb5-admin-server restart

================================================
FILE: sdk/java/libjfs/Makefile
================================================
export GO111MODULE=on
LDFLAGS = -s -w

REVISION := $(shell git rev-parse --short HEAD 2>/dev/null)
REVISIONDATE := $(shell git log -1 --pretty=format:'%cd' --date short 2>/dev/null)
PKG := github.com/juicedata/juicefs/pkg/version
LDFLAGS = -s -w
LDFLAGS += -X $(PKG).revision=$(REVISION) \
		-X $(PKG).revisionDate=$(REVISIONDATE)
GOROOT=$(shell go env GOROOT)

ifeq ($(OS),Windows_NT)
    uname_S := Windows
else
    uname_S := $(shell uname -s)
    uname_m := $(shell uname -m)
endif

ARCHNAME := amd64

ifeq ($(uname_m), aarch64)
    ARCHNAME = arm64
endif
ifeq ($(uname_m), arm64)
    ARCHNAME = arm64
endif

LIBFILE := libjfs-$(ARCHNAME).so
ifeq ($(uname_S), Windows)
    LIBFILE = libjfs-$(ARCHNAME).dll
    CC = /usr/bin/musl-gcc
    export CC
endif
ifeq ($(uname_S), Darwin)
    LIBFILE = libjfs-$(ARCHNAME).dylib
endif

all: default

default: libjfs
	mkdir -p target
	gzip -c $(LIBFILE) > target/$(LIBFILE).gz

ceph: libjfs-ceph
	mkdir -p target
	gzip -c $(LIBFILE) > target/$(LIBFILE).gz

libjfs-ceph: *.go ../../../pkg/*/*.go
	go build -tags "ceph nogspt" -buildmode=c-shared -ldflags="$(LDFLAGS)" -o $(LIBFILE) .

libjfs: *.go ../../../pkg/*/*.go
	go build -tags nogspt -buildmode=c-shared -ldflags="$(LDFLAGS)" -o $(LIBFILE) .

linux-arm64: libjfs-arm64.so
	mkdir -p target
	gzip -c libjfs-arm64.so > target/libjfs-arm64.so.gz

libjfs-arm64.so: *.go ../../../pkg/*/*.go
	GOARCH=arm64 CGO_ENABLED=1 CC=aarch64-linux-gnu-gcc go build -tags nogspt -buildmode=c-shared -ldflags="$(LDFLAGS)" -o libjfs-arm64.so .

mac: libjfs.dylib
	mkdir -p target
	gzip -c libjfs-amd64.dylib > target/libjfs-amd64.dylib.gz

libjfs.dylib: *.go ../../../pkg/*/*.go
	GOOS=darwin CGO_ENABLED=1 CC=o64-clang go build -o libjfs-amd64.dylib \
	-tags nogspt -buildmode=c-shared -ldflags="$(LDFLAGS)"

mac-arm64: libjfs-arm64.dylib
	mkdir -p target
	gzip -c libjfs-arm64.dylib > target/libjfs-arm64.dylib.gz

libjfs-arm64.dylib: *.go ../../../pkg/*/*.go
	GOOS=darwin GOARCH=arm64 CGO_ENABLED=1 CC=o64-clang go build -o libjfs-arm64.dylib \
	-tags nogspt -buildmode=c-shared -ldflags="$(LDFLAGS)"

/usr/local/include/winfsp:
	mkdir -p /usr/local/include/winfsp
	cp ../../../hack/winfsp_headers/* /usr/local/include/winfsp

win: libjfs.dll
	mkdir -p target
	gzip -c libjfs-amd64.dll > target/libjfs-amd64.dll.gz

libjfs.dll: /usr/local/include/winfsp *.go ../../../pkg/*/*.go
	GOOS=windows CGO_ENABLED=1 CC=x86_64-w64-mingw32-gcc go build -o libjfs-amd64.dll \
	-tags nogspt -buildmode=c-shared -ldflags="$(LDFLAGS)"


================================================
FILE: sdk/java/libjfs/bridge.go
================================================
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package graphite provides a bridge to push Prometheus metrics to a Graphite
// server.

//nolint
package main

import (
	"bufio"
	"context"
	"errors"
	"fmt"
	"io"
	"net"
	"sort"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	dto "github.com/prometheus/client_model/go"
	"github.com/prometheus/common/expfmt"
	"github.com/prometheus/common/model"
	"google.golang.org/protobuf/proto"
)

const (
	defaultInterval       = 15 * time.Second
	millisecondsPerSecond = 1000
)

// HandlerErrorHandling defines how a Handler serving metrics will handle
// errors.
type HandlerErrorHandling int

// These constants cause handlers serving metrics to behave as described if
// errors are encountered.
const (
	// Ignore errors and try to push as many metrics to Graphite as possible.
	ContinueOnError HandlerErrorHandling = iota

	// Abort the push to Graphite upon the first error encountered.
	AbortOnError
)

// Config defines the Graphite bridge config.
type Config struct {
	// Whether to use Graphite tags or not. Defaults to false.
	UseTags bool

	// The url to push data to. Required.
	URL string

	// The prefix for the pushed Graphite metrics. Defaults to empty string.
	Prefix string

	// The interval to use for pushing data to Graphite. Defaults to 15 seconds.
	Interval time.Duration

	// The timeout for pushing metrics to Graphite. Defaults to 15 seconds.
	Timeout time.Duration

	// The Gatherer to use for metrics. Defaults to prometheus.DefaultGatherer.
	Gatherer prometheus.Gatherer

	// The logger that messages are written to. Defaults to no logging.
	Logger Logger

	// ErrorHandling defines how errors are handled. Note that errors are
	// logged regardless of the configured ErrorHandling provided Logger
	// is not nil.
	ErrorHandling HandlerErrorHandling

	CommonLabels map[string]string
}

// Bridge pushes metrics to the configured Graphite server.
type Bridge struct {
	useTags  bool
	url      string
	prefix   string
	interval time.Duration
	timeout  time.Duration

	errorHandling HandlerErrorHandling
	logger        Logger

	g            prometheus.Gatherer
	commonLabels map[string]string
}

// Logger is the minimal interface Bridge needs for logging. Note that
// log.Logger from the standard library implements this interface, and it is
// easy to implement by custom loggers, if they don't do so already anyway.
type Logger interface {
	Println(v ...interface{})
}

// NewBridge returns a pointer to a new Bridge struct.
func NewBridge(c *Config) (*Bridge, error) {
	b := &Bridge{}

	b.useTags = c.UseTags

	if c.URL == "" {
		return nil, errors.New("missing URL")
	}
	b.url = c.URL

	if c.Gatherer == nil {
		b.g = prometheus.DefaultGatherer
	} else {
		b.g = c.Gatherer
	}

	if c.Logger != nil {
		b.logger = c.Logger
	}

	if c.Prefix != "" {
		b.prefix = c.Prefix
	}

	var z time.Duration
	if c.Interval == z {
		b.interval = defaultInterval
	} else {
		b.interval = c.Interval
	}

	if c.Timeout == z {
		b.timeout = defaultInterval
	} else {
		b.timeout = c.Timeout
	}

	b.errorHandling = c.ErrorHandling

	b.commonLabels = c.CommonLabels
	return b, nil
}

// Run starts the event loop that pushes Prometheus metrics to Graphite at the
// configured interval.
func (b *Bridge) Run(ctx context.Context) {
	ticker := time.NewTicker(b.interval)
	defer ticker.Stop()
	for {
		select {
		case <-ticker.C:
			if err := b.Push(); err != nil && b.logger != nil {
				b.logger.Println("error pushing to Graphite:", err)
			}
		case <-ctx.Done():
			return
		}
	}
}

// Push pushes Prometheus metrics to the configured Graphite server.
func (b *Bridge) Push() error {
	mfs, err := b.g.Gather()
	if b.commonLabels != nil {
		for _, mf := range mfs {
			for _, metric := range mf.Metric {
				for k, v := range b.commonLabels {
					metric.Label = append(metric.Label, &dto.LabelPair{
						Name:  proto.String(k),
						Value: proto.String(v),
					})
				}
			}
		}
	}
	if err != nil || len(mfs) == 0 {
		switch b.errorHandling {
		case AbortOnError:
			return err
		case ContinueOnError:
			if b.logger != nil {
				b.logger.Println("continue on error:", err)
			}
		default:
			panic("unrecognized error handling value")
		}
	}

	conn, err := net.DialTimeout("tcp", b.url, b.timeout)
	if err != nil {
		return err
	}
	defer conn.Close()

	return writeMetrics(conn, mfs, b.useTags, b.prefix, model.Now())
}

func writeMetrics(w io.Writer, mfs []*dto.MetricFamily, useTags bool, prefix string, now model.Time) error {
	vec, err := expfmt.ExtractSamples(&expfmt.DecodeOptions{
		Timestamp: now,
	}, mfs...)
	if err != nil {
		return err
	}

	buf := bufio.NewWriter(w)
	for _, s := range vec {
		if prefix != "" {
			for _, c := range prefix {
				if _, err := buf.WriteRune(c); err != nil {
					return err
				}
			}
			if err := buf.WriteByte('.'); err != nil {
				return err
			}
		}
		if err := writeMetric(buf, s.Metric, useTags); err != nil {
			return err
		}
		if _, err := fmt.Fprintf(buf, " %g %d\n", s.Value, int64(s.Timestamp)/millisecondsPerSecond); err != nil {
			return err
		}
		if err := buf.Flush(); err != nil {
			return err
		}
	}

	return nil
}

func writeMetric(buf *bufio.Writer, m model.Metric, useTags bool) error {
	metricName, hasName := m[model.MetricNameLabel]
	numLabels := len(m) - 1
	if !hasName {
		numLabels = len(m)
	}

	var err error
	switch numLabels {
	case 0:
		if hasName {
			return writeSanitized(buf, string(metricName))
		}
	default:
		if err = writeSanitized(buf, string(metricName)); err != nil {
			return err
		}
		if useTags {
			return writeTags(buf, m)
		} else {
			return writeLabels(buf, m, numLabels)
		}
	}
	return nil
}

func writeTags(buf *bufio.Writer, m model.Metric) error {
	for label, value := range m {
		if label != model.MetricNameLabel {
			_, _ = buf.WriteRune(';')
			if _, err := buf.WriteString(string(label)); err != nil {
				return err
			}
			_, _ = buf.WriteRune('=')
			if _, err := buf.WriteString(string(value)); err != nil {
				return err
			}
		}
	}
	return nil
}

func writeLabels(buf *bufio.Writer, m model.Metric, numLabels int) error {
	labelStrings := make([]string, 0, numLabels)
	for label, value := range m {
		if label != model.MetricNameLabel {
			labelString := string(label) + " " + string(value)
			labelStrings = append(labelStrings, labelString)
		}
	}
	sort.Strings(labelStrings)
	for _, s := range labelStrings {
		if err := buf.WriteByte('.'); err != nil {
			return err
		}
		if err := writeSanitized(buf, s); err != nil {
			return err
		}
	}
	return nil
}

func writeSanitized(buf *bufio.Writer, s string) error {
	prevUnderscore := false

	for _, c := range s {
		c = replaceInvalidRune(c)
		if c == '_' {
			if prevUnderscore {
				continue
			}
			prevUnderscore = true
		} else {
			prevUnderscore = false
		}
		if _, err := buf.WriteRune(c); err != nil {
			return err
		}
	}

	return nil
}

func replaceInvalidRune(c rune) rune {
	if c == ' ' {
		return '.'
	}
	if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':' || c == '-' || (c >= '0' && c <= '9')) {
		return '_'
	}
	return c
}


================================================
FILE: sdk/java/libjfs/bridge_test.go
================================================
// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"bufio"
	"bytes"
	"context"
	"fmt"
	"io"
	"log"
	"net"
	"os"
	"reflect"
	"regexp"
	"sort"
	"strings"
	"testing"
	"time"

	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/common/model"
)

func TestSanitize(t *testing.T) {
	testCases := []struct {
		in, out string
	}{
		{in: "hello", out: "hello"},
		{in: "hE/l1o", out: "hE_l1o"},
		{in: "he,*ll(.o", out: "he_ll_o"},
		{in: "hello_there%^&", out: "hello_there_"},
		{in: "hell-.o", out: "hell-_o"},
	}

	var buf bytes.Buffer
	w := bufio.NewWriter(&buf)

	for i, tc := range testCases {
		if err := writeSanitized(w, tc.in); err != nil {
			t.Fatalf("write failed: %v", err)
		}
		if err := w.Flush(); err != nil {
			t.Fatalf("flush failed: %v", err)
		}

		if want, got := tc.out, buf.String(); want != got {
			t.Fatalf("test case index %d: got sanitized string %s, want %s", i, got, want)
		}

		buf.Reset()
	}
}

func TestWriteSummary(t *testing.T) {
	testWriteSummary(t, false)
	testWriteSummary(t, true)
}

func testWriteSummary(t *testing.T, useTags bool) {
	sumVec := prometheus.NewSummaryVec(
		prometheus.SummaryOpts{
			Name:        "name",
			Help:        "docstring",
			ConstLabels: prometheus.Labels{"constname": "constvalue"},
			Objectives:  map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
		},
		[]string{"labelname"},
	)

	sumVec.WithLabelValues("val1").Observe(float64(10))
	sumVec.WithLabelValues("val1").Observe(float64(20))
	sumVec.WithLabelValues("val1").Observe(float64(30))
	sumVec.WithLabelValues("val2").Observe(float64(20))
	sumVec.WithLabelValues("val2").Observe(float64(30))
	sumVec.WithLabelValues("val2").Observe(float64(40))

	reg := prometheus.NewRegistry()
	reg.MustRegister(sumVec)

	mfs, err := reg.Gather()
	if err != nil {
		t.Fatalf("error: %v", err)
	}

	testCases := []struct {
		prefix string
	}{
		{prefix: "prefix"},
		{prefix: "pre/fix"},
		{prefix: "pre.fix"},
		{prefix: ""},
	}

	var (
		want = `%s.name.constname.constvalue.labelname.val1.quantile.0_5 20 1477043
%s.name.constname.constvalue.labelname.val1.quantile.0_9 30 1477043
%s.name.constname.constvalue.labelname.val1.quantile.0_99 30 1477043
%s.name_sum.constname.constvalue.labelname.val1 60 1477043
%s.name_count.constname.constvalue.labelname.val1 3 1477043
%s.name.constname.constvalue.labelname.val2.quantile.0_5 30 1477043
%s.name.constname.constvalue.labelname.val2.quantile.0_9 40 1477043
%s.name.constname.constvalue.labelname.val2.quantile.0_99 40 1477043
%s.name_sum.constname.constvalue.labelname.val2 90 1477043
%s.name_count.constname.constvalue.labelname.val2 3 1477043
`
		wantTagged = `%s.name;constname=constvalue;labelname=val1;quantile=0.5 20 1477043
%s.name;constname=constvalue;labelname=val1;quantile=0.9 30 1477043
%s.name;constname=constvalue;labelname=val1;quantile=0.99 30 1477043
%s.name_sum;constname=constvalue;labelname=val1 60 1477043
%s.name_count;constname=constvalue;labelname=val1 3 1477043
%s.name;constname=constvalue;labelname=val2;quantile=0.5 30 1477043
%s.name;constname=constvalue;labelname=val2;quantile=0.9 40 1477043
%s.name;constname=constvalue;labelname=val2;quantile=0.99 40 1477043
%s.name_sum;constname=constvalue;labelname=val2 90 1477043
%s.name_count;constname=constvalue;labelname=val2 3 1477043
`
	)

	if useTags {
		want = wantTagged
	}

	for i, tc := range testCases {

		now := model.Time(1477043083)
		var buf bytes.Buffer
		err = writeMetrics(&buf, mfs, useTags, tc.prefix, now)
		if err != nil {
			t.Fatalf("error: %v", err)
		}

		var wantWithPrefix string
		if tc.prefix == "" {
			wantWithPrefix = strings.ReplaceAll(want, "%s.", "")
		} else {
			wantWithPrefix = fmt.Sprintf(want,
				tc.prefix, tc.prefix, tc.prefix, tc.prefix, tc.prefix,
				tc.prefix, tc.prefix, tc.prefix, tc.prefix, tc.prefix,
			)
		}

		got := buf.String()

		if err := checkLinesAreEqual(wantWithPrefix, got, useTags); err != nil {
			t.Fatalf("test case index %d:\n%s", i, err.Error())
		}
	}
}

func TestWriteHistogram(t *testing.T) {
	testWriteHistogram(t, false)
	testWriteHistogram(t, true)
}

func testWriteHistogram(t *testing.T, useTags bool) {
	histVec := prometheus.NewHistogramVec(
		prometheus.HistogramOpts{
			Name:        "name",
			Help:        "docstring",
			ConstLabels: prometheus.Labels{"constname": "constvalue"},
			Buckets:     []float64{0.01, 0.02, 0.05, 0.1},
		},
		[]string{"labelname"},
	)

	histVec.WithLabelValues("val1").Observe(float64(10))
	histVec.WithLabelValues("val1").Observe(float64(20))
	histVec.WithLabelValues("val1").Observe(float64(30))
	histVec.WithLabelValues("val2").Observe(float64(20))
	histVec.WithLabelValues("val2").Observe(float64(30))
	histVec.WithLabelValues("val2").Observe(float64(40))

	reg := prometheus.NewRegistry()
	reg.MustRegister(histVec)

	mfs, err := reg.Gather()
	if err != nil {
		t.Fatalf("error: %v", err)
	}

	now := model.Time(1477043083)
	var buf bytes.Buffer
	err = writeMetrics(&buf, mfs, useTags, "prefix", now)
	if err != nil {
		t.Fatalf("error: %v", err)
	}

	var (
		want = `prefix.name_bucket.constname.constvalue.labelname.val1.le.0_01 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le.0_02 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le.0_05 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le.0_1 0 1477043
prefix.name_sum.constname.constvalue.labelname.val1 60 1477043
prefix.name_count.constname.constvalue.labelname.val1 3 1477043
prefix.name_bucket.constname.constvalue.labelname.val1.le._Inf 3 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_01 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_02 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_05 0 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le.0_1 0 1477043
prefix.name_sum.constname.constvalue.labelname.val2 90 1477043
prefix.name_count.constname.constvalue.labelname.val2 3 1477043
prefix.name_bucket.constname.constvalue.labelname.val2.le._Inf 3 1477043
`
		wantTagged = `prefix.name_bucket;constname=constvalue;labelname=val1;le=0.01 0 1477043
prefix.name_bucket;constname=constvalue;labelname=val1;le=0.02 0 1477043
prefix.name_bucket;constname=constvalue;labelname=val1;le=0.05 0 1477043
prefix.name_bucket;constname=constvalue;labelname=val1;le=0.1 0 1477043
prefix.name_sum;constname=constvalue;labelname=val1 60 1477043
prefix.name_count;constname=constvalue;labelname=val1 3 1477043
prefix.name_bucket;constname=constvalue;labelname=val1;le=+Inf 3 1477043
prefix.name_bucket;constname=constvalue;labelname=val2;le=0.01 0 1477043
prefix.name_bucket;constname=constvalue;labelname=val2;le=0.02 0 1477043
prefix.name_bucket;constname=constvalue;labelname=val2;le=0.05 0 1477043
prefix.name_bucket;constname=constvalue;labelname=val2;le=0.1 0 1477043
prefix.name_sum;constname=constvalue;labelname=val2 90 1477043
prefix.name_count;constname=constvalue;labelname=val2 3 1477043
prefix.name_bucket;constname=constvalue;labelname=val2;le=+Inf 3 1477043
`
	)

	if useTags {
		want = wantTagged
	}

	got := buf.String()

	if err := checkLinesAreEqual(want, got, useTags); err != nil {
		t.Fatalf(err.Error())
	}
}

func TestToReader(t *testing.T) {
	testToReader(t, false)
	testToReader(t, true)
}

func testToReader(t *testing.T, useTags bool) {
	cntVec := prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name:        "name",
			Help:        "docstring",
			ConstLabels: prometheus.Labels{"constname": "constvalue"},
		},
		[]string{"labelname"},
	)
	cntVec.WithLabelValues("val1").Inc()
	cntVec.WithLabelValues("val2").Inc()

	reg := prometheus.NewRegistry()
	reg.MustRegister(cntVec)

	var (
		want = `prefix.name.constname.constvalue.labelname.val1 1 1477043
prefix.name.constname.constvalue.labelname.val2 1 1477043
`
		wantTagged = `prefix.name;constname=constvalue;labelname=val1 1 1477043
prefix.name;constname=constvalue;labelname=val2 1 1477043
`
	)

	if useTags {
		want = wantTagged
	}

	mfs, err := reg.Gather()
	if err != nil {
		t.Fatalf("error: %v", err)
	}

	now := model.Time(1477043083)
	var buf bytes.Buffer
	err = writeMetrics(&buf, mfs, useTags, "prefix", now)
	if err != nil {
		t.Fatalf("error: %v", err)
	}

	got := buf.String()

	if err := checkLinesAreEqual(want, got, useTags); err != nil {
		t.Fatalf(err.Error())
	}
}

func checkLinesAreEqual(w, g string, useTags bool) error {
	if useTags {
		taggedLineRegexp := regexp.MustCompile(`;| `)

		wantLines, err := stringToLines(w)
		if err != nil {
			return err
		}

		gotLines, err := stringToLines(g)
		if err != nil {
			return err
		}

		for lineInd := range gotLines {
			var log string
			// Tagged metric, order of tags doesn't matter
			// m1 := "prefix.name;tag1=val1;tag2=val2 3 1477043"
			// m2 := "prefix.name;tag2=val2;tag1=val1 3 1477043"
			// m1 should be equal to m2
			wantSplit := taggedLineRegexp.Split(wantLines[lineInd], -1)
			gotSplit := taggedLineRegexp.Split(gotLines[lineInd], -1)
			sort.Strings(wantSplit)
			sort.Strings(gotSplit)

			log += fmt.Sprintf("want: %v\ngot: %v\n\n", wantSplit, gotSplit)

			if !reflect.DeepEqual(wantSplit, gotSplit) {
				return fmt.Errorf(log)
			}
		}
		return nil
	}

	if w != g {
		return fmt.Errorf("wanted:\n\n%s\ngot:\n\n%s", w, g)
	}

	return nil
}

func stringToLines(s string) (lines []string, err error) {
	scanner := bufio.NewScanner(strings.NewReader(s))
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
	err = scanner.Err()
	return
}

func TestPush(t *testing.T) {
	reg := prometheus.NewRegistry()
	cntVec := prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Name:        "name",
			Help:        "docstring",
			ConstLabels: prometheus.Labels{"constname": "constvalue"},
		},
		[]string{"labelname"},
	)
	cntVec.WithLabelValues("val1").Inc()
	cntVec.WithLabelValues("val2").Inc()
	reg.MustRegister(cntVec)

	host := "localhost"
	port := ":56789"
	b, err := NewBridge(&Config{
		URL:          host + port,
		Gatherer:     reg,
		Prefix:       "prefix",
		UseTags:      true,
		CommonLabels: map[string]string{"a": "b"},
	})
	if err != nil {
		t.Fatalf("error creating bridge: %v", err)
	}

	nmg, err := newMockGraphite(port)
	if err != nil {
		t.Fatalf("error creating mock graphite: %v", err)
	}
	defer nmg.Close()

	err = b.Push()
	if err != nil {
		t.Fatalf("error pushing: %v", err)
	}

	wants := []string{
		"prefix.name.constname.constvalue.labelname.val1 1",
		"prefix.name.constname.constvalue.labelname.val2 1",
	}

	select {
	case got := <-nmg.readc:
		for _, want := range wants {
			matched, err := regexp.MatchString(want, got)
			if err != nil {
				t.Fatalf("error pushing: %v", err)
			}
			if !matched {
				t.Fatalf("missing metric:\nno match for %s received by server:\n%s", want, got)
			}
		}
		return
	case err := <-nmg.errc:
		t.Fatalf("error reading push: %v", err)
	case <-time.After(50 * time.Millisecond):
		t.Fatalf("no result from graphite server")
	}
}

func newMockGraphite(port string) (*mockGraphite, error) {
	readc := make(chan string)
	errc := make(chan error)
	ln, err := net.Listen("tcp", port)
	if err != nil {
		return nil, err
	}

	go func() {
		conn, err := ln.Accept()
		if err != nil {
			errc <- err
		}
		var b bytes.Buffer
		io.Copy(&b, conn)
		readc <- b.String()
	}()

	return &mockGraphite{
		readc:    readc,
		errc:     errc,
		Listener: ln,
	}, nil
}

type mockGraphite struct {
	readc chan string
	errc  chan error

	net.Listener
}

func ExampleBridge() {
	b, err := NewBridge(&Config{
		URL:           "graphite.example.org:3099",
		Gatherer:      prometheus.DefaultGatherer,
		Prefix:        "prefix",
		Interval:      15 * time.Second,
		Timeout:       10 * time.Second,
		ErrorHandling: AbortOnError,
		Logger:        log.New(os.Stdout, "graphite bridge: ", log.Lshortfile),
	})
	if err != nil {
		panic(err)
	}

	go func() {
		// Start something in a goroutine that uses metrics.
	}()

	// Push initial metrics to Graphite. Fail fast if the push fails.
	if err := b.Push(); err != nil {
		panic(err)
	}

	// Create a Context to control stopping the Run() loop that pushes
	// metrics to Graphite.
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	// Start pushing metrics to Graphite in the Run() loop.
	b.Run(ctx)
}


================================================
FILE: sdk/java/libjfs/callback.c
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdio.h>

static void (*log_callback)(const char *msg);

typedef void LogCallBack(const char *msg);

void jfs_set_logger(void*p);

void jfs_set_callback(LogCallBack *callback)
{
    log_callback = callback;
    jfs_set_logger(callback);
}

void jfs_callback(const char *msg)
{
    if (log_callback != NULL) {
        (*log_callback)(msg);
    } else {
        fprintf(stderr, "%s", msg);
    }
}


================================================
FILE: sdk/java/libjfs/guid.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package main

import (
	"crypto/md5"
	"encoding/binary"
	"os/user"
	"strconv"
	"sync"
)

type pwent struct {
	id   uint32
	name string
}

type mapping struct {
	sync.Mutex
	salt      string
	local     bool
	mask      uint32
	usernames map[string]uint32
	userIDs   map[uint32]string
	groups    map[string]uint32
	groupIDs  map[uint32]string
}

func newMapping(salt string) *mapping {
	m := &mapping{
		salt:      salt,
		usernames: make(map[string]uint32),
		userIDs:   make(map[uint32]string),
		groups:    make(map[string]uint32),
		groupIDs:  make(map[uint32]string),
	}
	m.update(genAllUids(), genAllGids(), true)
	return m
}

func (m *mapping) genGuid(name string) uint32 {
	digest := md5.Sum([]byte(m.salt + name + m.salt))
	a := binary.LittleEndian.Uint64(digest[0:8])
	b := binary.LittleEndian.Uint64(digest[8:16])
	id := uint32(a ^ b)
	if m.mask > 0 {
		id &= m.mask
	}
	return id
}

func (m *mapping) lookupUser(name string) uint32 {
	m.Lock()
	defer m.Unlock()
	var id uint32
	if id, ok := m.usernames[name]; ok {
		return id
	}
	if !m.local {
		id := m.genGuid(name)
		m.updateUser(name, id)
		return id
	}
	if name == "root" { // root in hdfs sdk is a normal user
		id = m.genGuid(name)
	} else {
		u, _ := user.Lookup(name)
		if u != nil {
			id_, _ := strconv.ParseUint(u.Uid, 10, 32)
			id = uint32(id_)
		} else {
			id = m.genGuid(name)
		}
	}
	logger.Debugf("update user to %s:%d by lookup user", name, id)
	m.updateUser(name, id)
	return id
}

func (m *mapping) lookupGroup(name string) uint32 {
	m.Lock()
	defer m.Unlock()
	var id uint32
	if id, ok := m.groups[name]; ok {
		return id
	}
	if !m.local {
		id := m.genGuid(name)
		m.updateGroup(name, id)
		return id
	}
	if name == "root" {
		id = m.genGuid(name)
	} else {
		g, _ := user.LookupGroup(name)
		if g == nil {
			id = m.genGuid(name)
		} else {
			id_, _ := strconv.ParseUint(g.Gid, 10, 32)
			id = uint32(id_)
		}
	}
	logger.Debugf("update group to %s:%d by lookup group", name, id)
	m.updateGroup(name, id)
	return id
}

func (m *mapping) lookupUserID(id uint32) string {
	m.Lock()
	defer m.Unlock()
	if name, ok := m.userIDs[id]; ok {
		return name
	}
	if !m.local {
		return strconv.Itoa(int(id))
	}
	u, _ := user.LookupId(strconv.Itoa(int(id)))
	if u == nil {
		u = &user.User{Username: strconv.Itoa(int(id))}
	}
	name := u.Username
	if len(name) > 49 {
		name = name[:49]
	}
	logger.Debugf("update user to %s:%d by lookup user id", name, id)
	m.updateUser(name, id)
	return name
}

func (m *mapping) lookupGroupID(id uint32) string {
	m.Lock()
	defer m.Unlock()
	if name, ok := m.groupIDs[id]; ok {
		return name
	}
	if !m.local {
		return strconv.Itoa(int(id))
	}
	g, _ := user.LookupGroupId(strconv.Itoa(int(id)))
	if g == nil {
		g = &user.Group{Name: strconv.Itoa(int(id))}
	}
	name := g.Name
	if len(name) > 49 {
		name = name[:49]
	}
	logger.Debugf("update group to %s:%d by lookup group id", name, id)
	m.updateGroup(name, id)
	return name
}

func (m *mapping) update(uids []pwent, gids []pwent, local bool) {
	m.Lock()
	defer m.Unlock()
	m.local = local
	for _, u := range uids {
		m.updateUser(u.name, u.id)
	}
	for _, g := range gids {
		m.updateGroup(g.name, g.id)
	}
	logger.Debugf("users:\n%+v", m.usernames)
	logger.Debugf("userids:\n%+v", m.userIDs)
	logger.Debugf("groups:\n%+v", m.groups)
	logger.Debugf("gorupids:\n%+v", m.groupIDs)
}

func (m *mapping) updateUser(name string, id uint32) {
	oldId := m.usernames[name]
	oldName := m.userIDs[id]
	delete(m.userIDs, oldId)
	delete(m.usernames, oldName)
	m.usernames[name] = id
	m.userIDs[id] = name
}

func (m *mapping) updateGroup(name string, id uint32) {
	oldId := m.groups[name]
	oldName := m.groupIDs[id]
	delete(m.groupIDs, oldId)
	delete(m.groups, oldName)
	m.groups[name] = id
	m.groupIDs[id] = name
}


================================================
FILE: sdk/java/libjfs/guid_unix.go
================================================
//go:build !windows
// +build !windows

/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package main

// #include <pwd.h>
// #include <grp.h>
import "C"
import (
	"sync"
)

// protect getpwent and getgrent
var cgoMutex sync.Mutex

func genAllUids() []pwent {
	cgoMutex.Lock()
	defer cgoMutex.Unlock()
	C.setpwent()
	defer C.endpwent()
	var uids []pwent
	for {
		p := C.getpwent()
		if p == nil {
			break
		}
		name := C.GoString(p.pw_name)
		if name != "root" {
			uids = append(uids, pwent{uint32(p.pw_uid), name})
		}
	}
	return uids
}

func genAllGids() []pwent {
	cgoMutex.Lock()
	defer cgoMutex.Unlock()
	C.setgrent()
	defer C.endgrent()
	var gids []pwent
	for {
		p := C.getgrent()
		if p == nil {
			break
		}
		name := C.GoString(p.gr_name)
		if name != "root" {
			gids = append(gids, pwent{uint32(p.gr_gid), name})
		}
	}
	return gids
}


================================================
FILE: sdk/java/libjfs/guid_windows.go
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package main

import (
	"os/exec"
	"strconv"
	"strings"
)

func genAllUids() []pwent {
	out, err := exec.Command("wmic", "useraccount", "list", "brief").Output()
	if err != nil {
		logger.Errorf("cmd : %s", err)
		return nil
	}
	lines := strings.Split(string(out), "\r\n")
	if len(lines) < 2 {
		logger.Errorf("no uids: %s", string(out))
		return nil
	}
	var uids []pwent
	for _, line := range lines[1 : len(lines)-1] {
		fields := strings.Fields(line)
		if len(fields) < 5 {
			continue
		}
		name := fields[len(fields)-2]
		sid := fields[len(fields)-1]
		ps := strings.Split(sid, "-")
		auth, _ := strconv.ParseUint(ps[2], 10, 32)
		count := len(ps) - 3
		var subAuth uint64
		if count > 0 {
			subAuth, _ = strconv.ParseUint(ps[3], 10, 32)
		}
		rid, _ := strconv.ParseUint(ps[len(ps)-1], 10, 32)
		var uid uint64
		if auth == 5 {
			if count == 1 {
				// "SYSTEM" S-1-5-18                   <=> uid/gid: 18
				uid = rid
			} else if count == 2 && subAuth == 32 {
				// "Users"  S-1-5-32-545               <=> uid/gid: 545
				uid = rid
			} else if count >= 2 && subAuth == 5 {
				// not supported
			} else if count >= 5 && subAuth == 21 {
				// S-1-5-21-X-Y-Z-RID                  <=> uid/gid: 0x30000 + RID
				// S-1-5-21-X-Y-Z-RID                  <=> uid/gid: 0x100000 + RID
				uid = 0x30000 + rid
			} else if count == 2 {
				// S-1-5-X-RID                         <=> uid/gid: 0x1000 * X + RID
				uid = 0x1000*subAuth + rid
			}
		} else if auth == 16 {
			// S-1-16-RID                          <=> uid/gid: 0x60000 + RID
			uid = 0x60000*subAuth + rid
		}
		if uid > 0 {
			uids = append(uids, pwent{uint32(uid), name})
			logger.Tracef("found account %s -> %d (%s)", name, uid, sid)
		}
	}
	return uids
}

func genAllGids() []pwent {
	out, err := exec.Command("wmic", "group", "list", "brief").Output()
	if err != nil {
		logger.Errorf("cmd : %s", err)
		return nil
	}
	lines := strings.Split(string(out), "\r\n")
	if len(lines) < 2 {
		logger.Errorf("no gids: %s", string(out))
		return nil
	}
	title := lines[0]
	nameIndex := strings.Index(title, "Name")
	sidIndex := strings.Index(title, "SID")
	var gids []pwent
	for _, line := range lines[1 : len(lines)-1] {
		if len(line) < sidIndex {
			continue
		}
		name := strings.TrimSpace(line[nameIndex : sidIndex-1])
		sid := strings.TrimSpace(line[sidIndex:])
		ps := strings.Split(sid, "-")
		auth, _ := strconv.ParseUint(ps[2], 10, 32)
		count := len(ps) - 3
		var subAuth uint64
		if count > 0 {
			subAuth, _ = strconv.ParseUint(ps[3], 10, 32)
		}
		rid, _ := strconv.ParseUint(ps[len(ps)-1], 10, 32)
		var gid uint64
		if auth == 5 {
			if count == 1 {
				// "SYSTEM" S-1-5-18                   <=> uid/gid: 18
				gid = rid
			} else if count == 2 && subAuth == 32 {
				// "Users"  S-1-5-32-545               <=> uid/gid: 545
				gid = rid
			} else if count >= 2 && subAuth == 5 {
				// not supported
			} else if count >= 5 && subAuth == 21 {
				// S-1-5-21-X-Y-Z-RID                  <=> uid/gid: 0x30000 + RID
				// S-1-5-21-X-Y-Z-RID                  <=> uid/gid: 0x100000 + RID
				gid = 0x30000 + rid
			} else if count == 2 {
				// S-1-5-X-RID                         <=> uid/gid: 0x1000 * X + RID
				gid = 0x1000*subAuth + rid
			}
		} else if auth == 16 {
			// S-1-16-RID                          <=> uid/gid: 0x60000 + RID
			gid = 0x60000*subAuth + rid
		}
		if gid > 0 {
			gids = append(gids, pwent{uint32(gid), name})
			logger.Tracef("found group %s -> %d (%s)", name, gid, sid)
		}
	}
	return gids
}


================================================
FILE: sdk/java/libjfs/kerberos.go
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package main

import (
	"bufio"
	"crypto/rand"
	"encoding/base64"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"io"
	"net"
	"regexp"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/jcmturner/gokrb5/v8/keytab"
	"github.com/jcmturner/gokrb5/v8/service"
	"github.com/jcmturner/gokrb5/v8/spnego"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
)

const (
	defaultLife  = 3600 * 24 * 7
	defaultRenew = 3600 * 24
)

const (
	mechanismHadoop = "hadoop"
	mechanismMIT    = "mit"
)

var (
	namePattern     = regexp.MustCompile(`([^/@]+)(/([^/@]+))?(@([^/@]+))?`)
	paramPattern    = regexp.MustCompile(`[^$]*(\$\d)`)
	ruleParser      = regexp.MustCompile(`(\[(\d+):([^\]]+)\](\(([^\)]+)\))?(s/([^/]+)/([^/]*)/(g)?)?)/?(L)?`)
	noSimplePattern = regexp.MustCompile(`[/@]`)
)

type kRule struct {
	isDefault   bool
	comps       int
	format      string
	match       *regexp.Regexp
	fromPattern *regexp.Regexp
	toPattern   string
	repeat      bool
	lower       bool
}

func (r *kRule) String() string {
	if r.isDefault {
		return "DEFAULT"
	}
	s := fmt.Sprintf("RULE:[%d:%s]", r.comps, r.format)
	if r.match != nil {
		s += fmt.Sprintf("(%s)", r.match)
	}
	if r.fromPattern != nil {
		s += fmt.Sprintf("s/%s/%s/", r.fromPattern, r.toPattern)
		if r.repeat {
			s += "g"
		}
	}
	if r.lower {
		s += "/L"
	}
	return s
}

func (r *kRule) replaceParameters(params []string) string {
	return paramPattern.ReplaceAllStringFunc(r.format, func(s string) string {
		m := paramPattern.FindStringSubmatchIndex(s)
		i, _ := strconv.Atoi(s[m[2]+1:])
		if i >= len(params) {
			logger.Warnf("invalid param %s", s)
			return s
		}
		return s[:m[2]] + params[i]
	})
}

func (r *kRule) replaceSubs(base string) string {
	if r.fromPattern == nil {
		return base
	}
	if r.repeat {
		return r.fromPattern.ReplaceAllString(base, r.toPattern)
	}
	m := r.fromPattern.FindStringIndex(base)
	if m != nil {
		return base[:m[0]] + r.toPattern + base[m[1]:]
	}
	return base
}

func (r *kRule) apply(param []string, mechanism string, realm string) string {
	var result string
	if r.isDefault {
		if realm == "" || param[0] == realm {
			result = param[1]
		}
	} else if r.comps+1 == len(param) {
		base := r.replaceParameters(param)
		if r.match == nil || r.match.MatchString(base) {
			result = r.replaceSubs(base)
		}
	}
	if mechanism == mechanismHadoop && noSimplePattern.FindString(result) != "" {
		return ""
	}
	if r.lower {
		result = strings.ToLower(result)
	}
	return result
}

func parseRule(rule string) *kRule {
	rule = strings.TrimSpace(rule)
	if rule == "DEFAULT" {
		return &kRule{isDefault: true}
	}
	var r kRule
	m := ruleParser.FindStringSubmatch(rule)
	if m == nil {
		return nil
	}
	r.comps, _ = strconv.Atoi(m[2])
	r.format = m[3]
	var err error
	r.match, err = regexp.Compile(m[5])
	if err != nil {
		logger.Warnf("compile %s: %s", m[5], err)
		return nil
	}
	r.fromPattern, err = regexp.Compile(m[7])
	if err != nil {
		logger.Warnf("compile %s: %s", m[7], err)
		return nil
	}
	r.toPattern = m[8]
	r.repeat = m[9] == "g"
	r.lower = m[10] == "L"
	return &r
}

type kerberosRules struct {
	mechanism string
	realm     string
	rules     []*kRule
}

func newkerberosRules(mechanism string, realm string, rules []string) *kerberosRules {
	if mechanism == "" {
		mechanism = mechanismHadoop
	}
	var rs []*kRule
	for _, rule := range rules {
		rs = append(rs, parseRule(rule))
	}
	return &kerberosRules{mechanism, realm, rs}
}

func (r *kerberosRules) getShortName(full string) string {
	service, host, realm := parseFullName(full)
	var param []string
	if host == "" {
		if realm == "" {
			return service
		}
		param = []string{realm, service}
	} else {
		param = []string{realm, service, host}
	}
	if r.rules == nil {
		r.rules = append(r.rules, &kRule{isDefault: true})
	}
	for _, rule := range r.rules {
		short := rule.apply(param, r.mechanism, r.realm)
		if short != "" {
			return short
		}
	}
	if r.mechanism == mechanismHadoop {
		return ""
	}
	return full
}

func parseFullName(full string) (string, string, string) {
	m := namePattern.FindStringSubmatch(full)
	if m == nil || m[0] != full {
		return "", "", ""
	}
	return m[1], m[3], m[5]
}

type token struct {
	User     string
	Renewer  string
	Password string
	Issued   int64
	Expire   int64
}

type hostParam struct {
	allAllowed bool
	cidr       []*net.IPNet
	addrs      map[string]bool
}
type proxyParam struct {
	users  []string
	groups []string
	hosts  *hostParam
}

type volParams struct {
	m          meta.Meta
	keytab     []byte
	renew      int64
	life       int64
	superuser  string
	supergroup string
	rules      *kerberosRules
	proxies    map[string]*proxyParam
}

func (vol *volParams) parse(kind, key, value string) {
	if vol.rules == nil {
		vol.rules = newkerberosRules(mechanismHadoop, "", nil)
	}
	switch kind {
	case "keytab":
		kt, err := base64.StdEncoding.DecodeString(value)
		if err != nil {
			logger.Errorf("decode keytab failed: %s", err)
		} else {
			vol.keytab = kt
		}
	case "life":
		period, err := strconv.ParseInt(value, 10, 64)
		if err != nil {
			logger.Errorf("can not parse %s as int: %s", value, err)
		} else {
			vol.life = period
		}
	case "renew":
		period, err := strconv.ParseInt(value, 10, 64)
		if err != nil {
			logger.Errorf("can not parse %s as int: %s", value, err)
		} else {
			vol.renew = period
		}
	case "superuser":
		vol.superuser = value
	case "supergroup":
		vol.supergroup = value
	case "mechanism":
		value = strings.ToLower(value)
		if value != mechanismHadoop && value != mechanismMIT {
			logger.Errorf("invalid mechanism: %s", value)
		} else {
			vol.rules.mechanism = value
		}
	case "realm":
		vol.rules.realm = value
	case "rule":
		rule := parseRule(value)
		if rule != nil {
			vol.rules.rules = append(vol.rules.rules, rule)
		} else {
			logger.Errorf("invalid kerberos rule: %s", value)
		}
	default:
		split := strings.Split(key, ".")
		if len(split) < 4 || split[1] != "proxy" {
			logger.Warnf("invalid key: %s", key)
			return
		}
		user := split[2]
		proxy := vol.proxies[user]
		if proxy == nil {
			proxy = &proxyParam{hosts: &hostParam{}}
			vol.proxies[user] = proxy
		}
		switch kind {
		case "users":
			proxy.users = strings.Split(value, ",")
			for i := range proxy.users {
				proxy.users[i] = strings.TrimSpace(proxy.users[i])
			}
		case "groups":
			proxy.groups = strings.Split(value, ",")
			for i := range proxy.groups {
				proxy.groups[i] = strings.TrimSpace(proxy.groups[i])
			}
		case "hosts":
			m := proxy.hosts
			if strings.Contains(value, "*") {
				m.allAllowed = true
			} else {
				m.addrs = make(map[string]bool)
				for _, v := range strings.Split(value, ",") {
					if strings.Contains(v, "/") {
						// ip range
						_, ipnet, err := net.ParseCIDR(v)
						if err != nil {
							logger.Errorf("wrong ip range %s: %s", v, err)
							continue
						}
						m.cidr = append(m.cidr, ipnet)
					} else {
						m.addrs[v] = true
					}
				}
			}
		default:
			logger.Errorf("invalid key: %s", key)
		}
	}
}

func (vol *volParams) canProxy(realUser, user, group, ips, hostname string) bool {
	if realUser == "" || realUser == user {
		return true
	}
	if !vol.isUserGroupAllowed(realUser, user, group) {
		logger.Errorf("user: %s is not allowed to impersonate %s", realUser, user)
		return false
	}
	if !vol.isHostAllowed(realUser, ips, hostname) {
		logger.Errorf("user: %s is not allowed to impersonate %s on %s", realUser, user, hostname)
		return false
	}
	return true
}

func (vol *volParams) isUserGroupAllowed(realUser, user, groups string) bool {
	proxy := vol.proxies[realUser]
	if proxy == nil {
		return false
	}
	for _, u := range proxy.users {
		if u == "*" || u == user {
			return true
		}
	}
	for _, group := range strings.Split(groups, ",") {
		for _, ag := range proxy.groups {
			if ag == "*" || ag == group {
				return true
			}
		}
	}
	return false
}

func (vol *volParams) isHostAllowed(realUser, ips, hostname string) bool {
	proxy := vol.proxies[realUser]
	if proxy == nil {
		return false
	}
	m := proxy.hosts
	if m.allAllowed {
		return true
	}
	if m.addrs[hostname] {
		return true
	}
	for _, ip := range strings.Split(ips, ",") {
		if m.addrs[ip] {
			return true
		}
		for _, ipNet := range m.cidr {
			if net.ParseIP(ip) != nil && ipNet.Contains(net.ParseIP(ip)) {
				return true
			}
		}
	}
	return false
}

type kerberos struct {
	vols map[string]*volParams
	mu   sync.Mutex
}

func (k *kerberos) getVol(volname string) *volParams {
	k.mu.Lock()
	defer k.mu.Unlock()
	return k.vols[volname]
}

func (k *kerberos) auth(volname, user, realUser, group, ips, hostname string, reqBytes []byte) syscall.Errno {
	krb5Token := spnego.KRB5Token{}
	err := krb5Token.Unmarshal(reqBytes)
	req := krb5Token.APReq
	if err != nil {
		logger.Errorf("invalid AP_REQ: %s", err)
		return syscall.EINVAL
	}
	vol := k.getVol(volname)
	if vol == nil || vol.keytab == nil {
		logger.Errorf("server keytab for %s not setted", volname)
		return syscall.ENODATA
	}
	kt := new(keytab.Keytab)
	err = kt.Unmarshal(vol.keytab)
	if err != nil {
		logger.Errorf("unmarshal keytab: %s", err)
		return syscall.EINVAL
	}
	s := service.NewSettings(kt, service.DecodePAC(false))
	ok, creds, err := service.VerifyAPREQ(&req, s)
	if err != nil {
		logger.Errorf("verify: %s", err)
		return syscall.EINVAL
	} else if !ok {
		return syscall.EACCES
	}

	principal := fmt.Sprintf("%s@%s", creds.UserName(), creds.Realm())
	authedUser := vol.rules.getShortName(principal)
	if authedUser == "" {
		logger.Warnf("no rule for principal %s", principal)
		return syscall.EINVAL
	}

	if realUser == "" {
		if user == authedUser {
			return 0
		}
	} else {
		if realUser == authedUser && vol.canProxy(realUser, user, group, ips, hostname) {
			return 0
		}
	}
	logger.Warnf("auth failed, principal: %s, authedUser: %s, user: %s, realUser: %s", principal, authedUser, user, realUser)
	return syscall.EACCES
}

func (k *kerberos) issue(ctx meta.Context, m meta.Meta, volname, user, renewer string) (uint32, *token, syscall.Errno) {
	vol := k.getVol(volname)
	if vol == nil {
		return 0, nil, syscall.EINVAL
	}
	now := time.Now()
	t := &token{
		User:    user,
		Renewer: renewer,
		Issued:  now.Unix(),
		Expire:  now.Unix() + vol.renew,
	}
	passwd := make([]byte, 20)
	_, _ = io.ReadFull(rand.Reader, passwd)
	t.Password = hex.EncodeToString(passwd)
	id, eno := k.storeToken(ctx, m, t)
	if eno != 0 {
		return 0, nil, eno
	}
	return id, t, 0
}

func (k *kerberos) check(ctx meta.Context, m meta.Meta, volname, user string, id uint32, password string) syscall.Errno {
	t, eno := k.loadToken(ctx, m, id)
	if eno != 0 {
		return eno
	}
	now := time.Now().Unix()
	if now > t.Expire {
		logger.Warnf("token %d expired", id)
		return syscall.EINVAL
	}
	if password != t.Password || user != t.User {
		logger.Warnf("token %d invalid user or password", id)
		return syscall.EACCES
	}
	return 0
}

func (k *kerberos) renew(ctx meta.Context, m meta.Meta, volname, renewer string, id uint32, password string) (int64, syscall.Errno) {
	t, eno := k.loadToken(ctx, m, id)
	if eno != 0 {
		return 0, eno
	}
	if password != t.Password || renewer != t.Renewer {
		return 0, syscall.EACCES
	}
	now := time.Now().Unix()
	if now > t.Expire {
		logger.Warnf("token %d expired for renew", id)
		return 0, syscall.EINVAL
	}
	vol := k.getVol(volname)
	t.Expire = min(t.Issued+vol.life, t.Expire+vol.renew)
	eno = k.updateToken(ctx, m, id, t)
	if eno != 0 {
		return 0, eno
	}
	return t.Expire, 0
}

func (k *kerberos) storeToken(ctx meta.Context, m meta.Meta, t *token) (id uint32, st syscall.Errno) {
	marshal, err := json.Marshal(t)
	if err != nil {
		logger.Errorf("marshal token: %s", err)
		return 0, syscall.EINVAL
	}
	return m.StoreToken(ctx, marshal)
}

func (k *kerberos) updateToken(ctx meta.Context, m meta.Meta, id uint32, t *token) syscall.Errno {
	marshal, err := json.Marshal(t)
	if err != nil {
		logger.Errorf("marshal token: %s", err)
		return syscall.EINVAL
	}
	return m.UpdateToken(ctx, id, marshal)
}

func (k *kerberos) loadToken(ctx meta.Context, m meta.Meta, id uint32) (*token, syscall.Errno) {
	tb, errno := m.LoadToken(ctx, id)
	if errno != 0 {
		return nil, errno
	}
	t := &token{}
	err := json.Unmarshal(tb, t)
	if err != nil {
		logger.Errorf("unmarshal token %d: %s", id, err)
		return nil, syscall.EINVAL
	}
	return t, 0
}

func (k *kerberos) cancelToken(ctx meta.Context, m meta.Meta, user string, id uint32, password string) syscall.Errno {
	t, eno := k.loadToken(ctx, m, id)
	if eno != 0 {
		return eno
	}
	if password != t.Password || user != t.Renewer && user != t.User {
		return syscall.EACCES
	}
	return m.DeleteTokens(ctx, []uint32{id})
}

func (k *kerberos) cleanupTokens() {
	var metas []meta.Meta
	k.mu.Lock()
	for _, vol := range k.vols {
		metas = append(metas, vol.m)
	}
	k.mu.Unlock()
	for _, m := range metas {
		ctx := meta.Background()
		tokens, eno := m.ListTokens(ctx)
		if eno != 0 {
			logger.Errorf("list tokens: %s", eno)
			return
		}
		var todelete []uint32
		now := time.Now().Unix()
		for id, data := range tokens {
			t := &token{}
			err := json.Unmarshal(data, t)
			if err != nil {
				logger.Warnf("unmarshal token %d: %s", id, err)
			}
			if t.Expire <= now {
				todelete = append(todelete, id)
			}
		}
		if len(todelete) == 0 {
			return
		}
		logger.Infof("cleaning up %d expired tokens", len(todelete))
		eno = m.DeleteTokens(ctx, todelete)
		if eno != 0 {
			logger.Errorf("delete tokens: %s", eno)
		}
	}
}

func (k *kerberos) loadConf(name, content string, jfs *fs.FileSystem) {
	vol := &volParams{
		m:       jfs.Meta(),
		life:    defaultLife,
		renew:   defaultRenew,
		proxies: make(map[string]*proxyParam),
	}
	scanner := bufio.NewScanner(strings.NewReader(content))
	for scanner.Scan() {
		line := scanner.Text()
		idx := strings.Index(line, "#")
		if idx >= 0 {
			line = line[:idx]
		}
		line = strings.TrimSpace(line)
		if len(line) == 0 {
			continue
		}
		fields := strings.SplitN(line, "=", 2)
		if len(fields) != 2 {
			logger.Warningf("bad line: %s", line)
			continue
		}
		key := strings.TrimSpace(fields[0])
		value := strings.TrimSpace(fields[1])
		split := strings.Split(key, ".")
		if len(split) < 2 {
			logger.Warningf("bad line: %s", line)
			continue
		}
		keySuffix := split[len(split)-1]
		volName := split[0]
		if volName != name {
			continue
		}
		vol.parse(keySuffix, key, value)
	}
	jfs.Superuser = vol.superuser
	jfs.Supergroup = vol.supergroup
	k.mu.Lock()
	k.vols[name] = vol
	k.mu.Unlock()
}

func (k *kerberos) init() int {
	k.vols = make(map[string]*volParams)
	go func() {
		for {
			time.Sleep(10 * time.Minute)
			k.cleanupTokens()
		}
	}()
	return 0
}

var kerb = kerberos{}


================================================
FILE: sdk/java/libjfs/main.go
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package main

// #cgo linux LDFLAGS: -ldl
// #cgo linux CFLAGS: -Wno-discarded-qualifiers -D_GNU_SOURCE
// #include <unistd.h>
// #include <inttypes.h>
// #include <sys/types.h>
// #include <sys/stat.h>
// #include <fcntl.h>
// #include <utime.h>
// #include <stdlib.h>
// void jfs_callback(const char *msg);
/*
#include <inttypes.h>

typedef struct {
	uint64_t inode;
	uint32_t mode;
	uint32_t uid;
	uint32_t gid;
	uint32_t atime;
	uint32_t mtime;
	uint32_t ctime;
	uint32_t nlink;
	uint64_t length;
} fileInfo;
*/
import "C"
import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net"
	"net/http"
	_ "net/http/pprof"
	"os"
	"path"
	"path/filepath"
	"runtime/debug"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"time"
	"unsafe"

	"github.com/juicedata/juicefs/cmd"
	"github.com/juicedata/juicefs/pkg/acl"
	"github.com/juicedata/juicefs/pkg/chunk"
	"github.com/juicedata/juicefs/pkg/fs"
	"github.com/juicedata/juicefs/pkg/meta"
	"github.com/juicedata/juicefs/pkg/metric"
	"github.com/juicedata/juicefs/pkg/object"
	"github.com/juicedata/juicefs/pkg/usage"
	"github.com/juicedata/juicefs/pkg/utils"
	"github.com/juicedata/juicefs/pkg/version"
	"github.com/juicedata/juicefs/pkg/vfs"
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/collectors"
	"github.com/prometheus/client_golang/prometheus/push"
	"github.com/sirupsen/logrus"
)

var (
	filesLock  sync.Mutex
	openFiles  = make(map[int32]*fwrapper)
	nextHandle = int32(1)

	fslock        sync.Mutex
	handlers            = make(map[int64]*wrapper)
	nextFsHandle  int64 = 0
	activefs            = make(map[fsKey][]*wrapper)
	logger              = utils.GetLogger("juicefs")
	bOnce         sync.Once
	bridges       []*Bridge
	pOnce         sync.Once
	pushers       []*push.Pusher
	rOnce         sync.Once
	remoteWriters []*RemoteWriter

	userGroupCache = make(map[string]map[string][]string) // name -> (user -> groups)

	formats = make(map[string]*meta.Format)

	kerbOnce           = sync.Once{}
	superuserChangedCb = make(map[string]struct{})

	MaxDeletes = meta.RmrDefaultThreads
	caller     = CALLER_JAVA
)

const (
	CALLER_JAVA = iota
	CALLER_PYTHON
)

const (
	BEHAVIOR_HADOOP = "Hadoop"
)

const (
	EPERM     = -0x01
	ENOENT    = -0x02
	EINTR     = -0x04
	EIO       = -0x05
	EACCES    = -0x0d
	EEXIST    = -0x11
	ENOTDIR   = -0x14
	EISDIR    = -0x15
	EINVAL    = -0x16
	ENOSPC    = -0x1c
	EDQUOT    = -0x45
	EROFS     = -0x1e
	ENOTEMPTY = -0x27
	ENODATA   = -0x3d
	ENOTSUP   = -0x5f
)

func errno(err error) int32 {
	if err == nil {
		return 0
	}
	eno, ok := err.(syscall.Errno)
	if !ok {
		return EIO
	}
	if eno == 0 {
		return 0
	}
	// Use the errno in Linux for all the OS
	switch eno {
	case syscall.EPERM:
		return EPERM
	case syscall.ENOENT:
		return ENOENT
	case syscall.EINTR:
		return EINTR
	case syscall.EIO:
		return EIO
	case syscall.EACCES:
		return EACCES
	case syscall.EEXIST:
		return EEXIST
	case syscall.ENOTDIR:
		return ENOTDIR
	case syscall.EISDIR:
		return EISDIR
	case syscall.EINVAL:
		return EINVAL
	case syscall.ENOSPC:
		return ENOSPC
	case syscall.EDQUOT:
		return EDQUOT
	case syscall.EROFS:
		return EROFS
	case syscall.ENOTEMPTY:
		return ENOTEMPTY
	case syscall.ENODATA:
		return ENODATA
	case syscall.ENOTSUP:
		return ENOTSUP
	default:
		logger.Warnf("unknown errno %d: %s", eno, err)
		return -int32(eno)
	}
}

type fsKey struct {
	name string
	conf javaConf
}

type wrapper struct {
	*fs.FileSystem
	volname    string
	ctx        meta.Context
	m          *mapping
	user       string
	superuser  string
	supergroup string
	conf       javaConf
}

type logWriter struct {
	buf chan string
}

func (w *logWriter) Write(p []byte) (int, error) {
	select {
	case w.buf <- string(p):
		_, _ = os.Stderr.Write(p)
		return len(p), nil
	default:
		return os.Stderr.Write(p)
	}
}

func newLogWriter() *logWriter {
	w := &logWriter{
		buf: make(chan string, 10),
	}
	go func() {
		for l := range w.buf {
			cmsg := C.CString(l)
			C.jfs_callback(cmsg)
			C.free(unsafe.Pointer(cmsg))
		}
	}()
	return w
}

//export jfs_set_logger
func jfs_set_logger(cb unsafe.Pointer) {
	utils.DisableLogColor()
	if cb != nil {
		utils.SetOutput(newLogWriter())
	} else {
		utils.SetOutput(os.Stderr)
	}
}

func (w *wrapper) withPid(pid int64) meta.Context {
	// mapping Java Thread ID to global one
	ctx := meta.NewContext(w.ctx.Pid()*1000+uint32(pid), w.ctx.Uid(), w.ctx.Gids())
	if caller == CALLER_JAVA {
		ctx = ctx.WithValue(meta.CtxKey("behavior"), BEHAVIOR_HADOOP)
	}
	return ctx
}

func (w *wrapper) getSuperUser() string {
	if w.Superuser != "" {
		return w.Superuser
	}
	return w.superuser
}

func (w *wrapper) getSuperGroup() string {
	if w.Supergroup != "" {
		return w.Supergroup
	}
	return w.supergroup
}

func (w *wrapper) isSuperuser(name string, groups []string) bool {
	if name == w.getSuperUser() || w.conf.SuperFS {
		return true
	}
	sg := w.getSuperGroup()
	for _, g := range groups {
		if g == sg {
			return true
		}
	}
	return false
}

func (w *wrapper) lookupUid(name string) uint32 {
	if name == w.superuser {
		return 0
	}
	return uint32(w.m.lookupUser(name))
}

func (w *wrapper) lookupGid(group string) uint32 {
	if group == w.supergroup {
		return 0
	}
	return uint32(w.m.lookupGroup(group))
}

func (w *wrapper) lookupGids(groups []string) []uint32 {
	var gids []uint32
	for _, g := range groups {
		gids = append(gids, w.lookupGid(g))
	}
	return gids
}

func (w *wrapper) uid2name(uid uint32) string {
	name := w.superuser
	if uid > 0 {
		name = w.m.lookupUserID(uid)
	}
	return name
}

func (w *wrapper) gid2name(gid uint32) string {
	group := w.supergroup
	if gid > 0 {
		group = w.m.lookupGroupID(gid)
	}
	return group
}

type fwrapper struct {
	*fs.File
	w *wrapper
}

func nextFileHandle(f *fs.File, w *wrapper) int32 {
	filesLock.Lock()
	defer filesLock.Unlock()
	for i := nextHandle; ; i++ {
		if _, ok := openFiles[i]; !ok {
			openFiles[i] = &fwrapper{f, w}
			nextHandle = i + 1
			return i
		}
	}
}

func freeHandle(fd int32) {
	filesLock.Lock()
	defer filesLock.Unlock()
	f := openFiles[fd]
	if f != nil {
		delete(openFiles, fd)
	}
}

type javaConf struct {
	MetaURL             string `json:"meta"`
	Bucket              string `json:"bucket"`
	StorageClass        string `json:"storageClass"`
	ReadOnly            bool   `json:"readOnly"`
	NoSession           bool   `json:"noSession"`
	NoBGJob             bool   `json:"noBGJob"`
	OpenCache           string `json:"openCache"`
	BackupMeta          string `json:"backupMeta"`
	BackupSkipTrash     bool   `json:"backupSkipTrash"`
	Heartbeat           string `json:"heartbeat"`
	CacheDir            string `json:"cacheDir"`
	CacheSize           string `json:"cacheSize"`
	CacheItems          int64  `json:"cacheItems"`
	FreeSpace           string `json:"freeSpace"`
	AutoCreate          bool   `json:"autoCreate"`
	CacheFullBlock      bool   `json:"cacheFullBlock"`
	CacheChecksum       string `json:"cacheChecksum"`
	CacheEviction       string `json:"cacheEviction"`
	CacheScanInterval   string `json:"cacheScanInterval"`
	CacheExpire         string `json:"cacheExpire"`
	Writeback           bool   `json:"writeback"`
	MemorySize          string `json:"memorySize"`
	Prefetch            int    `json:"prefetch"`
	Readahead           string `json:"readahead"`
	UploadLimit         string `json:"uploadLimit"`
	DownloadLimit       string `json:"downloadLimit"`
	MaxUploads          int    `json:"maxUploads"`
	MaxDownloads        int    `json:"maxDownloads"`
	MaxDeletes          int    `json:"maxDeletes"`
	SkipDirNlink        int    `json:"skipDirNlink"`
	SkipDirMtime        string `json:"skipDirMtime"`
	IORetries           int    `json:"ioRetries"`
	GetTimeout          string `json:"getTimeout"`
	PutTimeout          string `json:"putTimeout"`
	FastResolve         bool   `json:"fastResolve"`
	AttrTimeout         string `json:"attrTimeout"`
	EntryTimeout        string `json:"entryTimeout"`
	DirEntryTimeout     string `json:"dirEntryTimeout"`
	Debug               bool   `json:"debug"`
	NoUsageReport       bool   `json:"noUsageReport"`
	AccessLog           string `json:"accessLog"`
	PushGateway         string `json:"pushGateway"`
	PushInterval        string `json:"pushInterval"`
	PushAuth            string `json:"pushAuth"`
	PushLabels          string `json:"pushLabels"`
	PushGraphite        string `json:"pushGraphite"`
	PushRemoteWrite     string `json:"pushRemoteWrite"`
	PushRemoteWriteAuth string `json:"pushRemoteWriteAuth"`
	Caller              int    `json:"caller"`
	Subdir              string `json:"subdir"`

	AuthMethod string `json:"authMethod,omitempty"`
	RealUser   string `json:"realUser,omitempty"`

	SuperFS bool `json:"superFs,omitempty"`
}

func cleanConf(conf javaConf) javaConf {
	conf.AuthMethod = ""
	conf.RealUser = ""
	conf.SuperFS = false
	return conf
}

func getOrCreate(name, user, groups, superuser, supergroup string, conf javaConf, f func() *fs.FileSystem) int64 {
	fslock.Lock()
	defer fslock.Unlock()
	key := fsKey{name: name, conf: cleanConf(conf)}
	ws := activefs[key]
	var jfs *fs.FileSystem
	var m *mapping
	if len(ws) > 0 {
		jfs = ws[0].FileSystem
		m = ws[0].m
	} else {
		m = newMapping(name)
		jfs = f()
		if jfs == nil {
			return 0
		}
		switch jfs.Meta().Name() {
		case "mysql", "postgres", "sqlite3":
			m.mask = 0x7FFFFFFF // limit generated uid to int32
		}
		logger.Infof("JuiceFileSystem created for user:%s groups:%s", user, groups)
	}
	w := &wrapper{jfs, name, nil, m, user, superuser, supergroup, conf}
	if formats[name] != nil && formats[name].KerbConf != "" {
		if _, ok := superuserChangedCb[name]; !ok {
			jfs.Meta().OnReload(func(format *meta.Format) {
				kerb.loadConf(name, format.KerbConf, jfs)
				updateAllCtx(name, user, groups)
			})
			superuserChangedCb[name] = struct{}{}
		}
	}
	activefs[key] = append(ws, w)
	updateAllCtx(name, user, groups)
	nextFsHandle = nextFsHandle + 1
	handlers[nextFsHandle] = w
	return nextFsHandle
}

func updateAllCtx(name string, user, groups string) {
	var ws []*wrapper
	for k, v := range activefs {
		if k.name == name {
			ws = append(ws, v...)
		}
	}
	if len(ws) > 0 {
		for _, w := range ws {
			var gs []string
			if userGroupCache[name] != nil {
				gs = userGroupCache[name][user]
			}
			if gs == nil {
				gs = strings.Split(groups, ",")
			}
			logger.Debugf("update groups of %s to %s", user, strings.Join(gs, ","))
			updateCtx(w, gs)
		}
	}
}

func push2Gateway(pushGatewayAddr, pushAuth string, pushInterVal time.Duration, registry *prometheus.Registry, commonLabels map[string]string) {
	pusher := push.New(pushGatewayAddr, "juicefs").Gatherer(registry)
	for k, v := range commonLabels {
		pusher.Grouping(k, v)
	}
	if pushAuth != "" {
		if strings.Contains(pushAuth, ":") {
			parts := strings.Split(pushAuth, ":")
			pusher.BasicAuth(parts[0], parts[1])
		}
	}
	pusher.Client(&http.Client{Timeout: 2 * time.Second})
	pushers = append(pushers, pusher)

	pOnce.Do(func() {
		go func() {
			for range time.NewTicker(pushInterVal).C {
				for _, pusher := range pushers {
					if err := pusher.Push(); err != nil {
						logger.Warnf("error pushing to PushGateway: %s", err)
					}
				}
			}
		}()
	})
}

func push2RemoteWrite(remoteWrite string, pushRemoteWriteAuth string, pushInterVal time.Duration, registry *prometheus.Registry, commonLabels map[string]string) {
	writer, err := NewRemoteWriter(&RemoteWriteConfig{
		URL:           remoteWrite,
		Gatherer:      registry,
		Auth:          pushRemoteWriteAuth,
		Interval:      pushInterVal,
		Timeout:       2 * time.Second,
		ErrorHandling: ContinueOnError,
		Logger:        logger,
		CommonLabels:  commonLabels,
	})
	if err != nil {
		logger.Warnf("NewRemoteWriter error: %s", err)
		return
	}
	remoteWriters = append(remoteWriters, writer)

	rOnce.Do(func() {
		go func() {
			for range time.NewTicker(pushInterVal).C {
				for _, writer := range remoteWriters {
					if err := writer.Push(); err != nil {
						logger.Warnf("error pushing to remote write: %s", err)
					}
				}
			}
		}()
	})
}

func push2Graphite(graphite string, pushInterVal time.Duration, registry *prometheus.Registry, commonLabels map[string]string) {
	if bridge, err := NewBridge(&Config{
		URL:           graphite,
		Gatherer:      registry,
		UseTags:       true,
		Timeout:       2 * time.Second,
		ErrorHandling: ContinueOnError,
		Logger:        logger,
		CommonLabels:  commonLabels,
	}); err != nil {
		logger.Warnf("NewBridge error:%s", err)
	} else {
		bridges = append(bridges, bridge)
	}

	bOnce.Do(func() {
		go func() {
			for range time.NewTicker(pushInterVal).C {
				for _, brg := range bridges {
					if err := brg.Push(); err != nil {
						logger.Warnf("error pushing to Graphite: %s", err)
					}
				}
			}
		}()
	})
}

//export jfs_init
func jfs_init(credentialPtr uintptr, count int32, cname, cjsonConf, cuser, group, superuser, supergroup *C.char) int64 {
	name := C.GoString(cname)
	user := C.GoString(cuser)
	debug.SetGCPercent(50)
	object.UserAgent = "JuiceFS-SDK " + version.Version()
	var jConf javaConf
	err := json.Unmarshal([]byte(C.GoString(cjsonConf)), &jConf)
	if err != nil {
		if os.Getenv("JUICEFS_DEBUG") != "" {
			logger.Fatalf("invalid json: %s", C.GoString(cjsonConf))
		} else {
			logger.Fatalf("invalid json")
		}
	}
	return getOrCreate(name, user, C.GoString(group), C.GoString(superuser), C.GoString(supergroup), jConf, func() *fs.FileSystem {
		if jConf.Debug || os.Getenv("JUICEFS_DEBUG") != "" {
			utils.SetLogLevel(logrus.DebugLevel)
			go func() {
				for port := 6060; port < 6100; port++ {
					logger.Debugf("listen at 127.0.0.1:%d", port)
					_ = http.ListenAndServe(fmt.Sprintf("127.0.0.1:%d", port), nil)
				}
			}()
		} else if os.Getenv("JUICEFS_LOGLEVEL") != "" {
			level, err := logrus.ParseLevel(os.Getenv("JUICEFS_LOGLEVEL"))
			if err == nil {
				utils.SetLogLevel(level)
			} else {
				utils.SetLogLevel(logrus.WarnLevel)
				logger.Errorf("JUICEFS_LOGLEVEL: %s", err)
			}
		} else {
			utils.SetLogLevel(logrus.WarnLevel)
		}

		caller = jConf.Caller
		if jConf.MaxDeletes > 0 {
			MaxDeletes = jConf.MaxDeletes
		}

		metaConf := meta.DefaultConf()
		metaConf.Retries = jConf.IORetries
		metaConf.MaxDeletes = jConf.MaxDeletes
		metaConf.SkipDirNlink = jConf.SkipDirNlink
		metaConf.SkipDirMtime = utils.Duration(jConf.SkipDirMtime)
		metaConf.ReadOnly = jConf.ReadOnly
		metaConf.NoBGJob = jConf.NoBGJob || jConf.NoSession
		metaConf.OpenCache = utils.Duration(jConf.OpenCache)
		metaConf.Heartbeat = utils.Duration(jConf.Heartbeat)
		m := meta.NewClient(jConf.MetaURL, metaConf)
		format, err := m.Load(true)
		if err != nil {
			logger.Errorf("load setting: %s", err)
			return nil
		}
		formats[name] = format
		var registerer prometheus.Registerer
		var registry *prometheus.Registry
		if jConf.PushGateway != "" || jConf.PushGraphite != "" || jConf.PushRemoteWrite != "" || jConf.Caller == CALLER_PYTHON {
			commonLabels := prometheus.Labels{"vol_name": name, "mp": "sdk-" + strconv.Itoa(os.Getpid())}
			if h, err := os.Hostname(); err == nil {
				commonLabels["instance"] = h
			} else {
				logger.Warnf("cannot get hostname: %s", err)
			}
			if jConf.PushLabels != "" {
				for _, kv := range strings.Split(jConf.PushLabels, ";") {
					var splited = strings.Split(kv, ":")
					if len(splited) != 2 {
						logger.Errorf("invalid label format: %s", kv)
						return nil
					}
					if utils.StringContains([]string{"mp", "vol_name", "instance"}, splited[0]) {
						logger.Warnf("overriding reserved label: %s", splited[0])
					}
					commonLabels[splited[0]] = splited[1]
				}
			}
			registry = prometheus.NewRegistry()
			registerer = prometheus.WrapRegistererWithPrefix("juicefs_", registry)
			registerer.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
			registerer.MustRegister(collectors.NewGoCollector())

			var interval = utils.Duration(jConf.PushInterval)
			if jConf.PushGraphite != "" {
				push2Graphite(jConf.PushGraphite, interval, registry, commonLabels)
			}
			if jConf.PushGateway != "" {
				push2Gateway(jConf.PushGateway, jConf.PushAuth, interval, registry, commonLabels)
			}
			if jConf.PushRemoteWrite != "" {
				push2RemoteWrite(jConf.PushRemoteWrite, jConf.PushRemoteWriteAuth, interval, registry, commonLabels)
			}
			m.InitMetrics(registerer)
			vfs.InitMetrics(registerer)
			go metric.UpdateMetrics(registerer)
		}

		blob, err := cmd.NewReloadableStorage(format, m, func(f *meta.Format) {
			if jConf.Bucket != "" {
				format.Bucket = jConf.Bucket
			}
			if jConf.StorageClass != "" {
				format.StorageClass = jConf.StorageClass
			}
		})
		if err != nil {
			logger.Errorf("object storage: %s", err)
			return nil
		}
		logger.Infof("Data use %s", blob)

		var freeSpaceRatio = 0.1
		if jConf.FreeSpace != "" {
			freeSpaceRatio, _ = strconv.ParseFloat(jConf.FreeSpace, 64)
		}
		chunkConf := chunk.Config{
			BlockSize:         format.BlockSize * 1024,
			Compress:          format.Compression,
			CacheDir:          jConf.CacheDir,
			CacheMode:         0644, // all user can read cache
			CacheSize:         utils.ParseBytesStr("cache-size", jConf.CacheSize, 'M'),
			CacheItems:        jConf.CacheItems,
			FreeSpace:         float32(freeSpaceRatio),
			AutoCreate:        jConf.AutoCreate,
			CacheFullBlock:    jConf.CacheFullBlock,
			CacheChecksum:     jConf.CacheChecksum,
			CacheEviction:     jConf.CacheEviction,
			CacheScanInterval: utils.Duration(jConf.CacheScanInterval),
			CacheExpire:       utils.Duration(jConf.CacheExpire),
			OSCache:           true,
			MaxUpload:         jConf.MaxUploads,
			MaxDownload:       jConf.MaxDownloads,
			MaxRetries:        jConf.IORetries,
			UploadLimit:       utils.ParseMbpsStr("upload-limit", jConf.UploadLimit) * 1e6 / 8,
			DownloadLimit:     utils.ParseMbpsStr("download-limit", jConf.DownloadLimit) * 1e6 / 8,
			Prefetch:          jConf.Prefetch,
			Writeback:         jConf.Writeback,
			HashPrefix:        format.HashPrefix,
			GetTimeout:        utils.Duration(jConf.GetTimeout),
			PutTimeout:        utils.Duration(jConf.PutTimeout),
			BufferSize:        utils.ParseBytesStr("memory-size", jConf.MemorySize, 'M'),
			Readahead:         int(utils.ParseBytesStr("max-readahead", jConf.Readahead, 'M')),
		}
		if chunkConf.UploadLimit == 0 {
			chunkConf.UploadLimit = format.UploadLimit * 1e6 / 8
		}
		if chunkConf.DownloadLimit == 0 {
			chunkConf.DownloadLimit = format.DownloadLimit * 1e6 / 8
		}
		chunkConf.SelfCheck(format.UUID)
		store := chunk.NewCachedStore(blob, chunkConf, registerer)
		m.OnMsg(meta.DeleteSlice, func(args ...interface{}) error {
			id := args[0].(uint64)
			length := args[1].(uint32)
			return store.Remove(id, int(length))
		})
		m.OnMsg(meta.CompactChunk, func(args ...interface{}) error {
			slices := args[0].([]meta.Slice)
			id := args[1].(uint64)
			return vfs.Compact(chunkConf, store, slices, id)
		})
		err = m.NewSession(!jConf.NoSession)
		if err != nil {
			logger.Errorf("new session: %s", err)
			return nil
		}
		m.OnReload(func(fmt *meta.Format) {
			if chunkConf.UploadLimit > 0 {
				fmt.UploadLimit = chunkConf.UploadLimit
			}
			if chunkConf.DownloadLimit > 0 {
				fmt.DownloadLimit = chunkConf.DownloadLimit
			}
			store.UpdateLimit(fmt.UploadLimit, fmt.DownloadLimit)
		})

		conf := &vfs.Config{
			Meta:            metaConf,
			Format:          *format,
			Chunk:           &chunkConf,
			AttrTimeout:     utils.Duration(jConf.AttrTimeout),
			EntryTimeout:    utils.Duration(jConf.EntryTimeout),
			DirEntryTimeout: utils.Duration(jConf.DirEntryTimeout),
			AccessLog:       jConf.AccessLog,
			FastResolve:     jConf.FastResolve,
			Subdir:          jConf.Subdir,
			BackupMeta:      utils.Duration(jConf.BackupMeta),
			BackupSkipTrash: jConf.BackupSkipTrash,
		}
		if !jConf.ReadOnly && !jConf.NoSession && !jConf.NoBGJob && conf.BackupMeta > 0 {
			go vfs.Backup(m, blob, conf.BackupMeta, conf.BackupSkipTrash)
		}
		if !jConf.NoUsageReport && !jConf.NoSession {
			go usage.ReportUsage(m, "java-sdk "+version.Version())
		}
		jfs, err := fs.NewFileSystem(conf, m, store, registry)
		if err != nil {
			logger.Errorf("Initialize failed: %s", err)
			return nil
		}
		jfs.InitMetrics(registerer)
		if format.KerbConf != "" {
			kerbOnce.Do(func() {
				kerb.init()
			})
			kerb.loadConf(name, format.KerbConf, jfs)
			var credential []byte
			if credentialPtr == 0 {
				logger.Errorf("kerberos credential is needed")
				return nil
			}
			credential = toBuf(credentialPtr, count)
			hostname, _ := os.Hostname()
			ip := resolve(hostname)
			if ip == "" {
				ip, _ = findLocalIP("", "")
				logger.Infof("use local ip %s for %s", ip, hostname)
			}
			var eno syscall.Errno
			if jConf.AuthMethod == "kerberos" {
				eno = kerb.auth(name, user, jConf.RealUser, C.GoString(group), ip, hostname, credential)
			} else {
				tbuf := utils.FromBuffer(credential)
				id := tbuf.Get32()
				password := tbuf.Get(int(tbuf.Get32()))
				eno = kerb.check(meta.Background(), jfs.Meta(), name, user, id, string(password))
			}
			if eno != 0 {
				logger.Errorf("%s auth failed for vol:%s(%s:%s): %s", jConf.AuthMethod, name, user, jConf.RealUser, eno)
				return nil
			}
		}
		return jfs
	})
}

func F(p int64) *wrapper {
	fslock.Lock()
	defer fslock.Unlock()
	return handlers[p]
}

//export jfs_update_uid_grouping
func jfs_update_uid_grouping(cname, uidstr *C.char, grouping *C.char) {
	name := C.GoString(cname)
	var uids []pwent
	if uidstr != nil {
		for _, line := range strings.Split(C.GoString(uidstr), "\n") {
			fields := strings.Split(line, ":")
			if len(fields) < 2 {
				continue
			}
			username := strings.TrimSpace(fields[0])
			uid, _ := strconv.ParseUint(strings.TrimSpace(fields[1]), 10, 32)
			uids = append(uids, pwent{uint32(uid), username})
		}

		var buffer bytes.Buffer
		for _, u := range uids {
			buffer.WriteString(fmt.Sprintf("\t%v:%v\n", u.name, u.id))
		}
		logger.Debugf("Update uids mapping\n %s", buffer.String())
	}

	var userGroups = make(map[string][]string) // user -> groups

	var gids []pwent
	if grouping != nil {
		for _, line := range strings.Split(C.GoString(grouping), "\n") {
			fields := strings.Split(line, ":")
			if len(fields) < 2 {
				continue
			}
			gname := strings.TrimSpace(fields[0])
			gid, _ := strconv.ParseUint(strings.TrimSpace(fields[1]), 10, 32)
			gids = append(gids, pwent{uint32(gid), gname})
			if len(fields) > 2 {
				for _, user := range strings.Split(fields[len(fields)-1], ",") {
					userGroups[user] = append(userGroups[user], gname)
				}
			}
		}
		var buffer bytes.Buffer
		for _, g := range gids {
			buffer.WriteString(fmt.Sprintf("\t%v:%v\n", g.name, g.id))
		}
		logger.Debugf("Update gids mapping\n %s", buffer.String())
	}

	fslock.Lock()
	defer fslock.Unlock()
	userGroupCache[name] = userGroups
	var ws []*wrapper
	for k, wrappers := range activefs {
		if k.name == name {
			ws = append(ws, wrappers...)
		}
	}
	if len(ws) > 0 {
		for _, w := range ws {
			w.m.update(uids, gids, false)
			logger.Debugf("Update groups of %s to %s", w.user, strings.Join(userGroups[w.user], ","))
			updateCtx(w, userGroups[w.user])
		}
	}
}

func updateCtx(w *wrapper, groups []string) {
	if w.isSuperuser(w.user, groups) {
		w.ctx = meta.NewContext(uint32(os.Getpid()), 0, []uint32{0})
	} else {
		var gids []uint32
		if w.ctx != nil {
			gids = w.ctx.Gids()
		}
		if len(groups) > 0 {
			gids = w.lookupGids(groups)
		}
		w.ctx = meta.NewContext(uint32(os.Getpid()), w.lookupUid(w.user), gids)
	}
}

//export jfs_getGroups
func jfs_getGroups(cname, cuser *C.char, buf uintptr, count int32) int32 {
	name := C.GoString(cname)
	user := C.GoString(cuser)
	fslock.Lock()
	userGroups := userGroupCache[name]
	fslock.Unlock()
	var gStr string
	if userGroups != nil {
		gs := userGroups[user]
		if gs != nil {
			gStr = strings.Join(gs, ",")
		}
	}
	copy(toBuf(buf, count), gStr)
	return int32(len(gStr))
}

//export jfs_is_superuser
func jfs_is_superuser(h int64, user *C.char, groups *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	if w.isSuperuser(C.GoString(user), strings.Split(C.GoString(groups), ",")) {
		return 1
	} else {
		return 0
	}
}

//export jfs_term
func jfs_term(pid int64, h int64) int32 {
	w := F(h)
	if w == nil {
		return 0
	}
	ctx := w.withPid(pid)
	// sync all open files
	filesLock.Lock()
	var m sync.WaitGroup
	var toClose []int32
	for fd, f := range openFiles {
		if f.w == w {
			m.Add(1)
			go func(f *fs.File) {
				defer m.Done()
				_ = f.Close(ctx)
			}(f.File)
			toClose = append(toClose, fd)
		}
	}
	for _, fd := range toClose {
		delete(openFiles, fd)
	}
	filesLock.Unlock()
	m.Wait()

	fslock.Lock()
	defer fslock.Unlock()
	delete(handlers, h)
	for k, ws := range activefs {
		for i := range ws {
			if ws[i] == w {
				if len(ws) > 1 {
					ws[i] = ws[len(ws)-1]
					activefs[k] = ws[:len(ws)-1]
				} else {
					_ = w.Flush()
					// don't close the filesystem, so it can be re-used later
					// w.Close()
					// delete(activefs, name)
				}
			}
		}
	}
	for _, bridge := range bridges {
		if err := bridge.Push(); err != nil {
			logger.Warnf("error pushing to Graphite: %s", err)
		}
	}
	for _, pusher := range pushers {
		if err := pusher.Push(); err != nil {
			logger.Warnf("error pushing to PushGatway: %s", err)
		}
	}
	for _, remoteWriter := range remoteWriters {
		if err := remoteWriter.Push(); err != nil {
			logger.Warnf("error pushing to RemoteWrite: %s", err)
		}
	}
	return 0
}

//export jfs_open
func jfs_open(pid int64, h int64, cpath *C.char, lenPtr uintptr, flags int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	path := C.GoString(cpath)
	f, err := w.Open(w.withPid(pid), path, uint32(flags))
	if err != 0 {
		return errno(err)
	}
	st, _ := f.Stat()
	if st.IsDir() {
		return ENOENT
	}
	if lenPtr != 0 {
		buf := toBuf(lenPtr, 8)
		wb := utils.NewNativeBuffer(buf)
		wb.Put64(uint64(st.Size()))
	}
	return nextFileHandle(f, w)
}

//export jfs_open_posix
func jfs_open_posix(pid int64, h int64, cpath *C.char, lenPtr uintptr, flags int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	path := C.GoString(cpath)
	f, err := w.Open(w.withPid(pid), path, uint32(flags))
	if err != 0 {
		return errno(err)
	}
	st, _ := f.Stat()
	if st.IsDir() {
		return EISDIR
	}
	if lenPtr != 0 {
		buf := toBuf(lenPtr, 8)
		wb := utils.NewNativeBuffer(buf)
		wb.Put64(uint64(st.Size()))
	}
	return nextFileHandle(f, w)
}

//export jfs_access
func jfs_access(pid int64, h int64, cpath *C.char, flags int64) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Access(w.withPid(pid), C.GoString(cpath), int(flags)))
}

//export jfs_create
func jfs_create(pid int64, h int64, cpath *C.char, mode uint16, umask uint16) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	path := C.GoString(cpath)
	f, err := w.Create(w.withPid(pid), path, mode, umask)
	if err != 0 {
		return errno(err)
	}
	if w.ctx.Uid() == 0 && w.user != w.superuser {
		// belongs to supergroup
		_ = setOwner(w, w.withPid(pid), C.GoString(cpath), w.user, "")
	}
	return nextFileHandle(f, w)
}

//export jfs_mkdir
func jfs_mkdir(pid int64, h int64, cpath *C.char, mode uint16, umask uint16) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	err := errno(w.Mkdir(w.withPid(pid), C.GoString(cpath), mode, umask))
	if err == 0 && w.ctx.Uid() == 0 && w.user != w.superuser {
		// belongs to supergroup
		_ = setOwner(w, w.withPid(pid), C.GoString(cpath), w.user, "")
	}
	return err
}

//export jfs_mkdirAll
func jfs_mkdirAll(pid int64, h int64, cpath *C.char, mode, umask uint16, existOK bool) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	path := C.GoString(cpath)
	err := errno(w.MkdirAll0(w.withPid(pid), path, mode, umask, existOK))
	if err == 0 && w.ctx.Uid() == 0 && w.user != w.superuser {
		// belongs to supergroup
		if err := setOwner(w, w.withPid(pid), path, w.user, ""); err != 0 {
			logger.Errorf("change owner of %s to %s: %d", path, w.user, err)
		}
	}
	return err
}

//export jfs_delete
func jfs_delete(pid int64, h int64, cpath *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Delete(w.withPid(pid), C.GoString(cpath)))
}

//export jfs_unlink
func jfs_unlink(pid int64, h int64, cpath *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Unlink(w.withPid(pid), C.GoString(cpath)))
}

//export jfs_rmdir
func jfs_rmdir(pid int64, h int64, cpath *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Rmdir(w.withPid(pid), C.GoString(cpath)))
}

//export jfs_rmr
func jfs_rmr(pid int64, h int64, cpath *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Rmr(w.withPid(pid), C.GoString(cpath), false, MaxDeletes))
}

//export jfs_rename
func jfs_rename(pid int64, h int64, oldpath *C.char, newpath *C.char) int32 {
	return jfs_rename0(pid, h, oldpath, newpath, meta.RenameNoReplace)
}

//export jfs_rename0
func jfs_rename0(pid int64, h int64, oldpath *C.char, newpath *C.char, flags uint32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Rename(w.withPid(pid), C.GoString(oldpath), C.GoString(newpath), flags))
}

//export jfs_truncate
func jfs_truncate(pid int64, h int64, path *C.char, length uint64) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Truncate(w.withPid(pid), C.GoString(path), length))
}

//export jfs_setXattr
func jfs_setXattr(pid int64, h int64, path *C.char, name *C.char, value uintptr, vlen, mode int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	var flags uint32
	switch mode {
	case 1:
		flags = meta.XattrCreate
	case 2:
		flags = meta.XattrReplace
	}
	return errno(w.SetXattr(w.withPid(pid), C.GoString(path), C.GoString(name), toBuf(value, vlen), flags))
}

//export jfs_setXattr2
func jfs_setXattr2(pid int64, h int64, path *C.char, name *C.char, value *C.char, mode int64) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	var flags uint32
	switch mode {
	case 1:
		flags = meta.XattrCreate
	case 2:
		flags = meta.XattrReplace
	}
	return errno(w.SetXattr(w.withPid(pid), C.GoString(path), C.GoString(name), []byte(C.GoString(value)), flags))
}

//export jfs_getXattr
func jfs_getXattr(pid int64, h int64, path *C.char, name *C.char, buf uintptr, bufsize int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	buff, err := w.GetXattr(w.withPid(pid), C.GoString(path), C.GoString(name))
	if err != 0 {
		return errno(err)
	}
	if int32(len(buff)) >= bufsize {
		return bufsize
	}
	copy(toBuf(buf, bufsize), buff)
	return int32(len(buff))
}

//export jfs_getXattr2
func jfs_getXattr2(pid int64, h int64, path *C.char, name *C.char, value **C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	t, err := w.GetXattr(w.withPid(pid), C.GoString(path), C.GoString(name))
	if err == 0 {
		*value = C.CString(string(t))
	}
	return errno(err)
}

//export jfs_listXattr
func jfs_listXattr(pid int64, h int64, path *C.char, buf uintptr, bufsize int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	buff, err := w.ListXattr(w.withPid(pid), C.GoString(path))
	if err != 0 {
		return errno(err)
	}
	if int32(len(buff)) >= bufsize {
		return bufsize
	}
	copy(toBuf(buf, bufsize), buff)
	return int32(len(buff))
}

//export jfs_listXattr2
func jfs_listXattr2(pid int64, h int64, path *C.char, value **C.char, size *int) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	t, err := w.ListXattr(w.withPid(pid), C.GoString(path))
	if err == 0 {
		*value = C.CString(string(t))
		*size = len(t)
	}
	return errno(err)
}

//export jfs_removeXattr
func jfs_removeXattr(pid int64, h int64, path *C.char, name *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.RemoveXattr(w.withPid(pid), C.GoString(path), C.GoString(name)))
}

//export jfs_getfacl
func jfs_getfacl(pid int64, h int64, path *C.char, acltype int32, buf uintptr, blen int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	rule := acl.EmptyRule()
	err := w.GetFacl(w.withPid(pid), C.GoString(path), uint8(acltype), rule)
	if err != 0 {
		return errno(err)
	}
	wb := utils.NewNativeBuffer(toBuf(buf, blen))
	wb.Put16(rule.Owner)
	wb.Put16(rule.Group)
	wb.Put16(rule.Other)
	wb.Put16(rule.Mask)
	wb.Put16(uint16(len(rule.NamedUsers)))
	wb.Put16(uint16(len(rule.NamedGroups)))
	var off uintptr = 12
	for i, entry := range append(rule.NamedUsers, rule.NamedGroups...) {
		var name string
		if i < len(rule.NamedUsers) {
			name = w.uid2name(entry.Id)
		} else {
			name = w.gid2name(entry.Id)
		}
		if wb.Left() < len(name)+1+2 {
			return -100
		}
		wb.Put([]byte(name))
		wb.Put8(0)
		wb.Put16(entry.Perm)
	}
	return int32(off)
}

//export jfs_setfacl
func jfs_setfacl(pid int64, h int64, path *C.char, acltype int32, buf uintptr, alen int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	rule := acl.EmptyRule()
	r := utils.NewNativeBuffer(toBuf(buf, alen))
	rule.Owner = r.Get16()
	rule.Group = r.Get16()
	rule.Other = r.Get16()
	rule.Mask = r.Get16()
	namedusers := r.Get16()
	namedgroups := r.Get16()
	for i := uint16(0); i < namedusers+namedgroups; i++ {
		name := string(r.Get(int(r.Get8())))
		var entry acl.Entry
		entry.Perm = uint16(r.Get8())
		if i < namedusers {
			entry.Id = w.lookupUid(name)
			rule.NamedUsers = append(rule.NamedUsers, entry)
		} else {
			entry.Id = w.lookupGid(name)
			rule.NamedGroups = append(rule.NamedGroups, entry)
		}
	}
	return errno(w.SetFacl(w.withPid(pid), C.GoString(path), uint8(acltype), rule))
}

//export jfs_link
func jfs_link(pid int64, h int64, src *C.char, dst *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(w.Link(w.withPid(pid), C.GoString(src), C.GoString(dst)))
}

//export jfs_symlink
func jfs_symlink(pid int64, h int64, target_ *C.char, link_ *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	target := C.GoString(target_)
	link := C.GoString(link_)
	dir := path.Dir(strings.TrimRight(link, "/"))
	rel, e := filepath.Rel(dir, target)
	if e != nil {
		// external link
		rel = target
	}
	return errno(w.Symlink(w.withPid(pid), rel, link))
}

//export jfs_readlink
func jfs_readlink(pid int64, h int64, link *C.char, buf uintptr, bufsize int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	target, err := w.Readlink(w.withPid(pid), C.GoString(link))
	if err != 0 {
		return errno(err)
	}
	if int32(len(target)+1) >= bufsize {
		target = target[:bufsize-1]
	}
	wb := utils.NewNativeBuffer(toBuf(buf, bufsize))
	wb.Put(target)
	wb.Put8(0)
	return int32(len(target))
}

// mode:4 length:8 mtime:8 atime:8 user:50 group:50
func fill_stat(w *wrapper, wb *utils.Buffer, st *fs.FileStat) int32 {
	wb.Put32(uint32(st.Mode()))
	wb.Put64(uint64(st.Size()))
	wb.Put64(uint64(st.Mtime()))
	wb.Put64(uint64(st.Atime()))
	user := w.uid2name(uint32(st.Uid()))
	wb.Put([]byte(user))
	wb.Put8(0)
	group := w.gid2name(uint32(st.Gid()))
	wb.Put([]byte(group))
	wb.Put8(0)
	return 30 + int32(len(user)) + int32(len(group))
}

//export jfs_stat1
func jfs_stat1(pid int64, h int64, cpath *C.char, buf uintptr) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	info, err := w.Stat(w.withPid(pid), C.GoString(cpath))
	if err != 0 {
		return errno(err)
	}
	return fill_stat(w, utils.NewNativeBuffer(toBuf(buf, 130)), info)
}

//export jfs_lstat1
func jfs_lstat1(pid int64, h int64, cpath *C.char, buf uintptr) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	fi, err := w.Lstat(w.withPid(pid), C.GoString(cpath))
	if err != 0 {
		return errno(err)
	}
	return fill_stat(w, utils.NewNativeBuffer(toBuf(buf, 130)), fi)
}

func attrToInfo(fi *fs.FileStat, info *C.fileInfo) {
	attr := fi.Sys().(*meta.Attr)
	info.mode = C.uint32_t(attr.SMode())
	info.uid = C.uint32_t(attr.Uid)
	info.gid = C.uint32_t(attr.Gid)
	info.atime = C.uint32_t(attr.Atime)
	info.mtime = C.uint32_t(attr.Mtime)
	info.ctime = C.uint32_t(attr.Ctime)
	info.nlink = C.uint32_t(attr.Nlink)
	info.length = C.uint64_t(attr.Length)
}

//export jfs_stat
func jfs_stat(pid int64, h int64, cpath *C.char, info *C.fileInfo) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	fi, err := w.Stat(w.withPid(pid), C.GoString(cpath))
	if err != 0 {
		return errno(err)
	}
	info.inode = C.uint64_t(fi.Inode())
	attrToInfo(fi, info)
	return 0
}

//export jfs_lstat
func jfs_lstat(pid int64, h int64, cpath *C.char, info *C.fileInfo) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	fi, err := w.Lstat(w.withPid(pid), C.GoString(cpath))
	if err != 0 {
		return errno(err)
	}
	info.inode = C.uint64_t(fi.Inode())
	attrToInfo(fi, info)
	return 0
}

//export jfs_summary
func jfs_summary(pid int64, h int64, cpath *C.char, buf uintptr) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	ctx := w.withPid(pid)
	f, err := w.Open(ctx, C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	defer f.Close(ctx)
	summary, err := f.Summary(ctx, true, true)
	if err != 0 {
		return errno(err)
	}
	wb := utils.NewNativeBuffer(toBuf(buf, 40))
	wb.Put64(summary.Length)
	wb.Put64(summary.Files)
	wb.Put64(summary.Dirs)

	// quota
	quota, _ := f.GetQuota(ctx)
	if quota != nil {
		wb.Put64(uint64(quota.MaxInodes))
		wb.Put64(uint64(quota.MaxSpace))
	} else {
		wb.Put64(0)
		wb.Put64(0)
	}
	return 40
}

//export jfs_info
func jfs_info(pid int64, h int64, cpath *C.char, p_buf **byte, recursive, strict bool) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	ctx := w.withPid(pid)
	f, err := w.Open(ctx, C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	defer f.Close(ctx)
	info, err := f.Summary(ctx, recursive, strict)
	if err != 0 {
		return errno(err)
	}
	res, err2 := json.Marshal(info)
	if err2 != nil {
		return EINVAL
	}
	if *p_buf != nil {
		return EINVAL
	}

	*p_buf = (*byte)(C.malloc(C.size_t(len(res))))

	buf := unsafe.Slice(*p_buf, len(res))
	return int32(copy(buf, res))
}

//export jfs_gettreesummary
func jfs_gettreesummary(pid, h int64, cpath *C.char, depth, entries uint8, p_buf **byte) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	ctx := w.withPid(pid)
	f, err := w.Open(ctx, C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	summary, err := f.GetTreeSummary(ctx, depth, entries, true)
	if err != 0 {
		return errno(err)
	}
	res, err2 := json.Marshal(summary)
	if err2 != nil {
		return EINVAL
	}
	if *p_buf != nil {
		return EINVAL
	}

	*p_buf = (*byte)(C.malloc(C.size_t(len(res))))

	buf := unsafe.Slice(*p_buf, len(res))
	return int32(copy(buf, res))
}

//export jfs_quota
func jfs_quota(pid int64, h int64, cpath *C.char, cmd uint8, cap, inodes uint64, strict, repair, create bool, p_buf **byte) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	if *p_buf != nil {
		return EINVAL
	}

	qs, err := w.HandleQuota(w.withPid(pid), C.GoString(cpath), cmd, cap, inodes, strict, repair, create)
	if err != 0 {
		return errno(err)
	}
	res, err2 := json.Marshal(qs)
	if err2 != nil {
		return EINVAL
	}

	*p_buf = (*byte)(C.malloc(C.size_t(len(res))))
	buf := unsafe.Slice(*p_buf, len(res))
	return int32(copy(buf, res))
}

//export jfs_statvfs
func jfs_statvfs(pid int64, h int64, buf uintptr) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	total, avail := w.StatFS(w.withPid(pid))
	wb := utils.NewNativeBuffer(toBuf(buf, 16))
	wb.Put64(total)
	wb.Put64(avail)
	return 0
}

//export jfs_chmod
func jfs_chmod(pid int64, h int64, cpath *C.char, mode C.mode_t) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	f, err := w.Open(w.withPid(pid), C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	defer f.Close(w.withPid(pid))
	return errno(f.Chmod(w.withPid(pid), uint16(mode)))
}

//export jfs_chown
func jfs_chown(pid int64, h int64, cpath *C.char, uid uint32, gid uint32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	f, err := w.Open(w.withPid(pid), C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	return errno(f.Chown(w.withPid(pid), uid, gid))
}

//export jfs_utime
func jfs_utime(pid int64, h int64, cpath *C.char, mtime, atime int64) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	f, err := w.Open(w.withPid(pid), C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	defer f.Close(w.withPid(pid))
	return errno(f.Utime(w.withPid(pid), atime, mtime))
}

//export jfs_setOwner
func jfs_setOwner(pid int64, h int64, cpath *C.char, owner *C.char, group *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return setOwner(w, w.withPid(pid), C.GoString(cpath), C.GoString(owner), C.GoString(group))
}

func setOwner(w *wrapper, ctx meta.Context, path string, owner, group string) int32 {
	f, err := w.Open(ctx, path, 0)
	if err != 0 {
		return errno(err)
	}
	defer f.Close(ctx)
	st, _ := f.Stat()
	uid := uint32(st.(*fs.FileStat).Uid())
	gid := uint32(st.(*fs.FileStat).Gid())
	if owner != "" {
		uid = w.lookupUid(owner)
	}
	if group != "" {
		gid = w.lookupGid(group)
	}
	return errno(f.Chown(ctx, uid, gid))
}

//export jfs_listdir
func jfs_listdir(pid int64, h int64, cpath *C.char, offset int64, buf uintptr, bufsize int32) int32 {
	var ctx meta.Context
	var f *fs.File
	var w *wrapper
	if offset > 0 {
		filesLock.Lock()
		fw := openFiles[int32(h)]
		filesLock.Unlock()
		if fw == nil {
			return EINVAL
		}
		freeHandle(int32(h))
		w = fw.w
		f = fw.File
		ctx = w.withPid(pid)
	} else {
		w = F(h)
		if w == nil {
			return EINVAL
		}
		var err syscall.Errno
		ctx = w.withPid(pid)
		f, err = w.Open(ctx, C.GoString(cpath), 0)
		if err != 0 {
			return errno(err)
		}
		st, _ := f.Stat()
		if !st.IsDir() {
			return ENOTDIR
		}
	}

	es, err := f.ReaddirPlus(ctx, int(offset))
	if err != 0 {
		return errno(err)
	}

	wb := utils.NewNativeBuffer(toBuf(buf, bufsize))
	for i, d := range es {
		if wb.Left() < 1+len(d.Name)+1+130+8 {
			wb.Put32(uint32(len(es) - i))
			wb.Put32(uint32(nextFileHandle(f, w)))
			return bufsize - int32(wb.Left()) - 8
		}
		wb.Put8(byte(len(d.Name)))
		wb.Put(d.Name)
		header := wb.Get(1)
		header[0] = uint8(fill_stat(w, wb, fs.AttrToFileInfo(d.Inode, d.Attr)))
	}
	wb.Put32(0)
	return bufsize - int32(wb.Left()) - 4
}

//export jfs_listdir2
func jfs_listdir2(pid int64, h int64, cpath *C.char, plus bool, buf **byte, size *int64) int32 {
	var ctx meta.Context
	var f *fs.File
	w := F(h)
	if w == nil {
		return EINVAL
	}
	var err syscall.Errno
	ctx = w.withPid(pid)
	f, err = w.Open(ctx, C.GoString(cpath), 0)
	if err != 0 {
		return errno(err)
	}
	st, _ := f.Stat()
	if !st.IsDir() {
		return ENOTDIR
	}

	*size = 0
	if plus {
		es, err := f.ReaddirPlus(ctx, 0)
		if err != 0 {
			return errno(err)
		}
		for _, e := range es {
			*size += 2 + int64(len(e.Name)) + 4*11
		}
		*buf = (*byte)(C.malloc(C.size_t(*size)))
		out := utils.FromBuffer(unsafe.Slice(*buf, *size))
		for _, e := range es {
			out.Put16(uint16(len(e.Name)))
			out.Put([]byte(e.Name))
			out.Put32(e.Attr.SMode())
			out.Put64(uint64(e.Inode))
			out.Put32(e.Attr.Nlink)
			out.Put32(e.Attr.Uid)
			out.Put32(e.Attr.Gid)
			out.Put64(e.Attr.Length)
			out.Put32(uint32(e.Attr.Atime))
			out.Put32(uint32(e.Attr.Mtime))
			out.Put32(uint32(e.Attr.Ctime))
		}
	} else {
		es, err := f.Readdir(ctx, 0)
		if err != 0 {
			return errno(err)
		}
		for _, e := range es {
			*size += 2 + int64(len(e.Name()))
		}
		*buf = (*byte)(C.malloc(C.size_t(*size)))
		out := utils.FromBuffer(unsafe.Slice(*buf, *size))
		for _, e := range es {
			out.Put16(uint16(len(e.Name())))
			out.Put([]byte(e.Name()))
		}
	}
	return 0
}

func toBuf(s uintptr, sz int32) []byte {
	return (*[1 << 30]byte)(unsafe.Pointer(s))[:sz:sz]
}

//export jfs_concat
func jfs_concat(pid int64, h int64, _dst *C.char, buf uintptr, bufsize int32) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	dst := C.GoString(_dst)
	ctx := w.withPid(pid)
	df, err := w.Open(ctx, dst, vfs.MODE_MASK_W)
	if err != 0 {
		return errno(err)
	}
	defer df.Close(ctx)
	srcs := strings.Split(string(toBuf(buf, bufsize-1)), "\000")
	var tmp string
	if len(srcs) > 1 {
		tmp = filepath.Join(filepath.Dir(dst), "."+filepath.Base(dst)+".merging")
		fi, err := w.Create(ctx, tmp, 0666, 022)
		if err != 0 {
			return errno(err)
		}
		defer func() { _ = w.Delete(ctx, tmp) }()
		defer fi.Close(ctx)
		var off uint64
		for _, src := range srcs {
			copied, err := w.CopyFileRange(ctx, src, 0, tmp, off, 1<<63)
			if err != 0 {
				return errno(err)
			}
			off += copied
		}
	} else {
		tmp = srcs[0]
	}

	dfi, _ := df.Stat()
	_, err = w.CopyFileRange(ctx, tmp, 0, dst, uint64(dfi.Size()), 1<<63)
	r := errno(err)
	if r == 0 {
		var wg sync.WaitGroup
		var limit = make(chan bool, 100)
		for _, src := range srcs {
			limit <- true
			wg.Add(1)
			go func(p string) {
				defer func() { <-limit }()
				defer wg.Done()
				if r := w.Delete(ctx, p); r != 0 {
					logger.Errorf("delete source %s: %s", p, r)
				}
			}(src)
		}
		wg.Wait()
	}
	return r
}

//export jfs_clone
func jfs_clone(pid int64, h int64, _src *C.char, _dst *C.char, preserve bool) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	src := C.GoString(_src)
	dst := C.GoString(_dst)
	ctx := w.withPid(pid)
	err := w.Clone(ctx, src, dst, preserve)
	return errno(err)
}

//export jfs_status
func jfs_status(pid int64, h int64, trash bool, session uint64, p_buf **byte) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	ctx := w.withPid(pid)

	var err error
	var output []byte
	if session != 0 {
		s, err := w.Meta().GetSession(session, true)
		if err != nil {
			logger.Errorf("get session %d: %s", session, err)
			return errno(syscall.EIO)
		}
		output, err = json.Marshal(s)
		if err != nil {
			logger.Errorf("marshal session: %v", err)
			return errno(syscall.EIO)
		}
	} else {
		sections := &meta.Sections{}
		err = meta.Status(ctx, w.Meta(), trash, sections)
		if err != nil {
			logger.Errorf("get status: %s", err)
			return errno(syscall.EIO)
		}
		output, err = json.Marshal(sections)
		if err != nil {
			logger.Errorf("marshal sessions: %v", err)
			return errno(syscall.EIO)
		}
	}

	*p_buf = (*byte)(C.malloc(C.size_t(len(output))))
	buf := unsafe.Slice(*p_buf, len(output))
	return int32(copy(buf, output))
}

//export jfs_lseek
func jfs_lseek(pid int64, fd int32, offset int64, whence int64) int64 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if ok {
		filesLock.Unlock()
		off, _ := f.Seek(f.w.withPid(pid), offset, int(whence))
		return off
	}
	filesLock.Unlock()
	return int64(EINVAL)
}

//export jfs_read
func jfs_read(pid int64, fd int32, cbuf uintptr, count int32) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if !ok {
		filesLock.Unlock()
		return EINVAL
	}
	filesLock.Unlock()

	n, err := f.Read(f.w.withPid(pid), toBuf(cbuf, int32(count)))
	if err != nil && err != io.EOF {
		logger.Errorf("read %s: %s", f.Name(), err)
		return errno(err)
	}
	return int32(n)
}

//export jfs_pread
func jfs_pread(pid int64, fd int32, cbuf uintptr, count int32, offset int64) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if !ok {
		filesLock.Unlock()
		return EINVAL
	}
	filesLock.Unlock()

	if count > (1 << 30) {
		count = 1 << 30
	}
	n, err := f.Pread(f.w.withPid(pid), toBuf(cbuf, count), offset)
	if err != nil && err != io.EOF {
		logger.Errorf("read %s: %s", f.Name(), err)
		return errno(err)
	}
	return int32(n)
}

//export jfs_write
func jfs_write(pid int64, fd int32, cbuf uintptr, count int32) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if !ok {
		filesLock.Unlock()
		return EINVAL
	}
	filesLock.Unlock()

	buf := toBuf(cbuf, count)
	n, err := f.Write(f.w.withPid(pid), buf)
	if err != 0 {
		logger.Errorf("write %s: %s", f.Name(), err)
		return errno(err)
	}
	return int32(n)
}

//export jfs_pwrite
func jfs_pwrite(pid int64, fd int32, cbuf uintptr, count int32, offset int64) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if !ok {
		filesLock.Unlock()
		return EINVAL
	}
	filesLock.Unlock()

	buf := toBuf(cbuf, count)
	n, err := f.Pwrite(f.w.withPid(pid), buf, int64(offset))
	if err != 0 {
		logger.Errorf("pwrite %s: %s", f.Name(), err)
		return errno(err)
	}
	return int32(n)
}

//export jfs_ftruncate
func jfs_ftruncate(pid int64, fd int32, size uint64) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	filesLock.Unlock()
	if !ok {
		return EINVAL
	}
	return errno(f.Truncate(f.w.withPid(pid), size))
}

//export jfs_flush
func jfs_flush(pid int64, fd int32) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if !ok {
		filesLock.Unlock()
		return EINVAL
	}
	filesLock.Unlock()

	return errno(f.Flush(f.w.withPid(pid)))
}

//export jfs_fsync
func jfs_fsync(pid int64, fd int32) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	if !ok {
		filesLock.Unlock()
		return EINVAL
	}
	filesLock.Unlock()

	return errno(f.Fsync(f.w.withPid(pid)))
}

//export jfs_ranger_cfg
func jfs_ranger_cfg(cname *C.char, buf uintptr, count int32) int32 {
	name := C.GoString(cname)
	fslock.Lock()
	format := formats[name]
	fslock.Unlock()
	var cfg string
	if format != nil {
		url := format.RangerRestUrl
		name := format.RangerService
		if url != "" && name != "" {
			cfg = fmt.Sprintf("%s?name=%s", url, name)
		}
	}
	copy(toBuf(buf, count), cfg)
	return int32(len(cfg))
}

//export jfs_close
func jfs_close(pid int64, fd int32) int32 {
	filesLock.Lock()
	f, ok := openFiles[fd]
	filesLock.Unlock()
	if !ok {
		return 0
	}
	freeHandle(fd)
	return errno(f.Close(f.w.withPid(pid)))
}

//export jfs_warmup
func jfs_warmup(pid int64, h int64, _paths *C.char, numthreads int32, background, isEvict, isCheck bool, p_buf **byte) int32 {
	resp := &vfs.CacheResponse{Locations: make(map[string]uint64)}

	w := F(h)
	if w == nil {
		return EINVAL
	}
	ctx := w.withPid(pid)

	var paths []string
	err := json.Unmarshal([]byte(C.GoString(_paths)), &paths)
	if err != nil {
		logger.Errorf("invalid json: %s", C.GoString(_paths))
		return EINVAL
	}
	w.Warmup(ctx, paths, int(numthreads), background, isEvict, isCheck, resp)
	res, err := json.Marshal(resp)
	if err != nil {
		logger.Fatalf("json: %s", err)
	}

	*p_buf = (*byte)(C.malloc(C.size_t(len(res))))
	buf := unsafe.Slice(*p_buf, len(res))

	return int32(copy(buf, res))
}

func resolve(hostname string) string {
	if hostname == "" {
		return ""
	}
	start := time.Now()
	ips, err := net.DefaultResolver.LookupIP(context.Background(), "ip4", hostname)
	if err != nil {
		logger.Warningf("Fail to resolve host %s: %s", hostname, err)
		return ""
	}
	var ipStr []string
	for _, ip := range ips {
		ipStr = append(ipStr, ip.To4().String())
	}
	logger.Debugf("resolve %s to %s in %s", hostname, strings.Join(ipStr, ","), time.Since(start))
	return strings.Join(ipStr, ",")
}

func findLocalIP(mask string, iname string) (string, error) {
	for strings.HasSuffix(mask, ".0") {
		mask = mask[:len(mask)-2]
	}
	ifaces, err := net.Interfaces()
	if err != nil {
		return "", err
	}
	for _, iface := range ifaces {
		if iface.Flags&net.FlagUp == 0 && iname == "" && mask == "" {
			continue // interface down
		}
		if iface.Flags&net.FlagLoopback != 0 {
			continue // loopback interface
		}
		if iname != "" && iface.Name != iname && !strings.HasPrefix(iface.Name, iname+".") {
			continue
		}
		addrs, err := iface.Addrs()
		if err != nil {
			return "", err
		}
		for _, addr := range addrs {
			var ip net.IP
			switch v := addr.(type) {
			case *net.IPNet:
				ip = v.IP
			case *net.IPAddr:
				ip = v.IP
			}
			if ip == nil || ip.IsLoopback() {
				continue
			}
			ip = ip.To4()
			if ip == nil {
				continue // not an ipv4 address
			}
			if !strings.HasPrefix(ip.String(), mask) {
				continue
			}
			return ip.String(), nil
		}
	}
	return "", errors.New("are you connected to the network?")
}

//export jfs_get_token
func jfs_get_token(h int64, cname *C.char, buf uintptr, count int32, renewer *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	id, t, eno := kerb.issue(w.ctx, w.Meta(), C.GoString(cname), w.user, C.GoString(renewer))
	if eno != 0 {
		logger.Errorf("get token for %s: %s", w.volname, eno)
		return errno(eno)
	}
	wb := utils.NewNativeBuffer(toBuf(buf, count))
	wb.Put32(id)
	wb.Put64(uint64(t.Issued))
	wb.Put64(uint64(t.Expire))
	wb.Put([]byte(t.Password))
	return int32(wb.Offset())
}

//export jfs_renew_token
func jfs_renew_token(h int64, id uint32, password *C.char) int64 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	expire, eno := kerb.renew(w.ctx, w.Meta(), w.volname, w.user, id, C.GoString(password))
	if eno != 0 {
		logger.Errorf("renew token %d for %s: %s", id, w.volname, eno)
		return int64(errno(eno))
	}
	return expire
}

//export jfs_cancel_token
func jfs_cancel_token(h int64, id uint32, password *C.char) int32 {
	w := F(h)
	if w == nil {
		return EINVAL
	}
	return errno(kerb.cancelToken(w.ctx, w.Meta(), w.user, id, C.GoString(password)))
}

func main() {
}


================================================
FILE: sdk/java/libjfs/remote_write.go
================================================
// Copyright 2025 JuiceFS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package main provides remote write functionality for pushing Prometheus metrics
// to remote write endpoints.

package main

import (
	"bytes"
	"errors"
	"fmt"
	"net/http"
	"strings"
	"time"

	"github.com/golang/snappy"
	"github.com/prometheus/client_golang/prometheus"
	dto "github.com/prometheus/client_model/go"
	"github.com/prometheus/common/expfmt"
	"github.com/prometheus/common/model"
	"github.com/prometheus/prometheus/prompb"
	"google.golang.org/protobuf/proto"
)

const (
	defaultRemoteWriteTimeout = 15 * time.Second
)

// RemoteWriteConfig defines the remote write configuration.
type RemoteWriteConfig struct {
	// The URL to push metrics to. Required.
	URL string

	// Basic authentication string in format "username:password". Optional.
	Auth string

	// The interval to use for pushing data. Defaults to 15 seconds.
	Interval time.Duration

	// The timeout for pushing metrics. Defaults to 15 seconds.
	Timeout time.Duration

	// The Gatherer to use for metrics. Defaults to prometheus.DefaultGatherer.
	Gatherer prometheus.Gatherer

	// Common labels to add to all metrics. Optional.
	CommonLabels map[string]string

	// The logger that messages are written to. Defaults to no logging.
	Logger Logger

	// ErrorHandling defines how errors are handled.
	ErrorHandling HandlerErrorHandling
}

// RemoteWriter pushes metrics to the configured remote write endpoint.
type RemoteWriter struct {
	url           string
	gatherer      prometheus.Gatherer
	auth          string
	interval      time.Duration
	timeout       time.Duration
	errorHandling HandlerErrorHandling
	logger        Logger
	commonLabels  map[string]string
	client        *http.Client
}

// NewRemoteWriter returns a pointer to a new RemoteWriter struct.
func NewRemoteWriter(c *RemoteWriteConfig) (*RemoteWriter, error) {
	rw := &RemoteWriter{}

	if c.URL == "" {
		return nil, errors.New("missing URL")
	}
	rw.url = c.URL

	rw.auth = c.Auth

	var z time.Duration
	if c.Interval == z {
		rw.interval = defaultRemoteWriteTimeout
	} else {
		rw.interval = c.Interval
	}

	if c.Timeout == z {
		rw.timeout = defaultRemoteWriteTimeout
	} else {
		rw.timeout = c.Timeout
	}

	if c.Gatherer == nil {
		rw.gatherer = prometheus.DefaultGatherer
	} else {
		rw.gatherer = c.Gatherer
	}

	rw.commonLabels = c.CommonLabels
	rw.logger = c.Logger
	rw.errorHandling = c.ErrorHandling

	rw.client = &http.Client{
		Timeout: rw.timeout,
	}

	return rw, nil
}

// Push pushes Prometheus metrics to the configured remote write endpoint.
func (rw *RemoteWriter) Push() error {
	// Gather metrics from registry
	mfs, err := rw.gatherer.Gather()
	if err == nil && rw.commonLabels != nil {
		for _, mf := range mfs {
			for _, metric := range mf.Metric {
				for k, v := range rw.commonLabels {
					metric.Label = append(metric.Label, &dto.LabelPair{
						Name:  proto.String(k),
						Value: proto.String(v),
					})
				}
			}
		}
	}
	if err != nil || len(mfs) == 0 {
		switch rw.errorHandling {
		case AbortOnError:
			return err
		case ContinueOnError:
			if rw.logger != nil {
				rw.logger.Println("continue on error:", err)
			}
		default:
			return err
		}
	}

	// Convert metrics to TimeSeries
	tsList, err := rw.ConvertMetricsToTimeSeries(mfs)
	if err != nil {
		return fmt.Errorf("convert metrics: %w", err)
	}

	if len(tsList) == 0 {
		return nil // No samples to push
	}

	// Send to remote write endpoint
	wr := &prompb.WriteRequest{Timeseries: tsList}
	data, err := wr.Marshal()
	if err != nil {
		return fmt.Errorf("marshal protobuf: %w", err)
	}

	compressed := snappy.Encode(nil, data)
	req, err := http.NewRequest("POST", rw.url, bytes.NewReader(compressed))
	if err != nil {
		return fmt.Errorf("create request: %w", err)
	}

	req.Header.Set("Content-Encoding", "snappy")
	req.Header.Set("Content-Type", "application/x-protobuf")
	req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")

	if rw.auth != "" {
		if strings.Contains(rw.auth, ":") {
			parts := strings.Split(rw.auth, ":")
			req.SetBasicAuth(parts[0], parts[1])
		}
	}

	resp, err := rw.client.Do(req)
	if err != nil {
		return fmt.Errorf("send request: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode/100 != 2 {
		return fmt.Errorf("remote_write failed: %s", resp.Status)
	}

	return nil
}

// ConvertMetricsToTimeSeries converts Prometheus metric families to TimeSeries.
func (rw *RemoteWriter) ConvertMetricsToTimeSeries(mfs []*dto.MetricFamily) ([]prompb.TimeSeries, error) {
	now := model.Time(time.Now().UnixMilli())
	samples, err := expfmt.ExtractSamples(&expfmt.DecodeOptions{
		Timestamp: now,
	}, mfs...)
	if err != nil {
		return nil, fmt.Errorf("extract samples: %w", err)
	}

	var tsList []prompb.TimeSeries
	for _, sample := range samples {
		// Convert model.Metric to prompb.Label slice
		labels := make([]prompb.Label, 0, len(sample.Metric))
		for name, value := range sample.Metric {
			labels = append(labels, prompb.Label{
				Name:  string(name),
				Value: string(value),
			})
		}

		tsList = append(tsList, prompb.TimeSeries{
			Labels: labels,
			Samples: []prompb.Sample{{
				Value:     float64(sample.Value),
				Timestamp: int64(sample.Timestamp),
			}},
		})
	}

	return tsList, nil
}


================================================
FILE: sdk/java/libjfs/remote_write_test.go
================================================
// Copyright 2025 JuiceFS Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
	"fmt"
	"net/http"
	"net/http/httptest"
	"strings"
	"testing"
	"time"

	"github.com/golang/snappy"
	"github.com/prometheus/client_golang/prometheus"
	dto "github.com/prometheus/client_model/go"
	"github.com/prometheus/prometheus/prompb"
	"google.golang.org/protobuf/proto"
)

// mockLogger implements the Logger interface for testing.
type mockLogger struct {
	messages []string
}

func (m *mockLogger) Println(v ...interface{}) {
	m.messages = append(m.messages, fmt.Sprint(v...))
}

func (m *mockLogger) Warnf(format string, args ...interface{}) {
	m.messages = append(m.messages, fmt.Sprintf(format, args...))
}

func TestNewRemoteWriter(t *testing.T) {
	tests := []struct {
		name    string
		config  *RemoteWriteConfig
		wantErr bool
		errMsg  string
	}{
		{
			name: "valid config",
			config: &RemoteWriteConfig{
				URL: "http://localhost:9090/api/v1/write",
			},
			wantErr: false,
		},
		{
			name: "missing URL",
			config: &RemoteWriteConfig{
				Auth: "user:pass",
			},
			wantErr: true,
			errMsg:  "missing URL",
		},
		{
			name: "with all options",
			config: &RemoteWriteConfig{
				URL:          "http://localhost:9090/api/v1/write",
				Auth:         "user:pass",
				Interval:     5 * time.Second,
				Timeout:      10 * time.Second,
				CommonLabels: map[string]string{"job": "test"},
			},
			wantErr: false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			rw, err := NewRemoteWriter(tt.config)
			if tt.wantErr {
				if err == nil {
					t.Errorf("NewRemoteWriter() expected error but got none")
					return
				}
				if !strings.Contains(err.Error(), tt.errMsg) {
					t.Errorf("NewRemoteWriter() error = %v, want error containing %v", err, tt.errMsg)
				}
				return
			}
			if err != nil {
				t.Errorf("NewRemoteWriter() unexpected error = %v", err)
				return
			}
			if rw == nil {
				t.Errorf("NewRemoteWriter() returned nil")
				return
			}

			// Check defaults
			if rw.url != tt.config.URL {
				t.Errorf("NewRemoteWriter() url = %v, want %v", rw.url, tt.config.URL)
			}
			if tt.config.Timeout == 0 && rw.timeout != defaultRemoteWriteTimeout {
				t.Errorf("NewRemoteWriter() timeout = %v, want %v", rw.timeout, defaultRemoteWriteTimeout)
			}
			if tt.config.Gatherer == nil && rw.gatherer != prometheus.DefaultGatherer {
				t.Errorf("NewRemoteWriter() gatherer should be DefaultGatherer")
			}
		})
	}
}

func TestRemoteWriter_convertMetricsToTimeSeries(t *testing.T) {
	// Create test registry with various metric types
	registry := prometheus.NewRegistry()

	// Counter
	counter := prometheus.NewCounter(prometheus.CounterOpts{
		Name: "test_counter",
		Help: "A test counter",
	})
	counter.Add(5)
	registry.MustRegister(counter)

	// Gauge
	gauge := prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "test_gauge",
		Help: "A test gauge",
	})
	gauge.Set(10)
	registry.MustRegister(gauge)

	// Histogram
	histogram := prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "test_histogram",
		Help:    "A test histogram",
		Buckets: []float64{0.1, 0.5, 1.0, 5.0},
	})
	histogram.Observe(0.3)
	histogram.Observe(0.8)
	histogram.Observe(2.0)
	registry.MustRegister(histogram)

	// Summary
	summary := prometheus.NewSummary(prometheus.SummaryOpts{
		Name:       "test_summary",
		Help:       "A test summary",
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
	})
	summary.Observe(0.2)
	summary.Observe(0.6)
	summary.Observe(1.5)
	registry.MustRegister(summary)

	rw := &RemoteWriter{
		commonLabels: map[string]string{"job": "test"},
	}

	mfs, err := registry.Gather()
	if err == nil && rw.commonLabels != nil {
		for _, mf := range mfs {
			for _, metric := range mf.Metric {
				for k, v := range rw.commonLabels {
					metric.Label = append(metric.Label, &dto.LabelPair{
						Name:  proto.String(k),
						Value: proto.String(v),
					})
				}
			}
		}
	}
	if err != nil {
		t.Fatalf("Failed to gather metrics: %v", err)
	}

	tsList, err := rw.ConvertMetricsToTimeSeries(mfs)
	if err != nil {
		t.Fatalf("convertMetricsToTimeSeries() error = %v", err)
	}

	if len(tsList) == 0 {
		t.Fatalf("convertMetricsToTimeSeries() returned empty time series")
	}

	// Check that we have the expected metrics
	metricNames := make(map[string]bool)
	for _, ts := range tsList {
		for _, label := range ts.Labels {
			if label.Name == "__name__" {
				metricNames[label.Value] = true
				break
			}
		}
	}

	expectedMetrics := []string{"test_counter", "test_gauge", "test_histogram_bucket", "test_histogram_sum", "test_histogram_count", "test_summary", "test_summary_sum", "test_summary_count"}
	for _, expected := range expectedMetrics {
		if !metricNames[expected] {
			t.Errorf("Expected metric %s not found in time series", expected)
		}
	}

	// Check that common labels are added
	for _, ts := range tsList {
		hasJobLabel := false
		for _, label := range ts.Labels {
			if label.Name == "job" && label.Value == "test" {
				hasJobLabel = true
				break
			}
		}
		if !hasJobLabel {
			t.Errorf("Common label 'job=test' not found in time series")
		}
	}
}

func TestRemoteWriter_Push(t *testing.T) {
	// Create a test server
	var receivedData []byte
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		if r.Method != "POST" {
			t.Errorf("Expected POST request, got %s", r.Method)
		}
		if r.Header.Get("Content-Encoding") != "snappy" {
			t.Errorf("Expected snappy encoding")
		}
		if r.Header.Get("Content-Type") != "application/x-protobuf" {
			t.Errorf("Expected protobuf content type")
		}

		// Read and decompress the body
		buf := make([]byte, r.ContentLength)
		r.Body.Read(buf)
		receivedData, _ = snappy.Decode(nil, buf)

		w.WriteHeader(http.StatusOK)
	}))
	defer server.Close()

	// Create test registry
	registry := prometheus.NewRegistry()
	// Counter
	counter := prometheus.NewCounter(prometheus.CounterOpts{
		Name: "test_counter",
		Help: "A test counter",
	})
	counter.Add(5)
	registry.MustRegister(counter)

	// Gauge
	gauge := prometheus.NewGauge(prometheus.GaugeOpts{
		Name: "test_gauge",
		Help: "A test gauge",
	})
	gauge.Set(10)
	registry.MustRegister(gauge)

	// Histogram
	histogram := prometheus.NewHistogram(prometheus.HistogramOpts{
		Name:    "test_histogram",
		Help:    "A test histogram",
		Buckets: []float64{0.1, 0.5, 1.0, 5.0},
	})
	histogram.Observe(0.3)
	histogram.Observe(0.8)
	histogram.Observe(2.0)
	registry.MustRegister(histogram)

	// Summary
	summary := prometheus.NewSummary(prometheus.SummaryOpts{
		Name:       "test_summary",
		Help:       "A test summary",
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
	})
	summary.Observe(0.2)
	summary.Observe(0.6)
	summary.Observe(1.5)
	registry.MustRegister(summary)

	logger := &mockLogger{}
	rw, err := NewRemoteWriter(&RemoteWriteConfig{
		URL:      server.URL,
		Gatherer: registry,
		Logger:   logger,
	})
	if err != nil {
		t.Fatalf("NewRemoteWriter() error = %v", err)
	}

	err = rw.Push()
	if err != nil {
		t.Errorf("Push() error = %v", err)
	}

	if len(receivedData) == 0 {
		t.Errorf("No data received by server")
	}

	// Verify the received data can be unmarshaled
	var wr prompb.WriteRequest
	if err := wr.Unmarshal(receivedData); err != nil {
		t.Errorf("Failed to unmarshal received data: %v", err)
	}

	if len(wr.Timeseries) == 0 {
		t.Errorf("No time series in received data")
	}
}

func TestRemoteWriter_PushWithAuth(t *testing.T) {
	var authHeader string
	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
		authHeader = r.Header.Get("Authorization")
		w.WriteHeader(http.StatusOK)
	}))
	defer server.Close()

	registry := prometheus.NewRegistry()
	counter := prometheus.NewCounter(prometheus.CounterOpts{
		Name: "test_metric",
		Help: "A test metric",
	})
	counter.Add(1)
	registry.MustRegister(counter)

	rw, err := NewRemoteWriter(&RemoteWriteConfig{
		URL:      server.URL,
		Auth:     "testuser:testpass",
		Gatherer: registry,
	})
	if err != nil {
		t.Fatalf("NewRemoteWriter() error = %v", err)
	}

	err = rw.Push()
	if err != nil {
		t.Errorf("Push() error = %v", err)
	}

	if !strings.Contains(authHeader, "Basic") {
		t.Errorf("Expected Basic auth header, got: %s", authHeader)
	}
}


================================================
FILE: sdk/java/pom.xml
================================================
<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>io.juicefs</groupId>
	<name>juicefs-hadoop</name>
	<url>https://github.com/juicedata/juicefs</url>
	<description>Hadoop FileSystem implementation for JuiceFS</description>
	<artifactId>juicefs-hadoop</artifactId>
	<version>1.4-dev</version>
	<packaging>jar</packaging>
	<properties>
		<hadoop.version>3.1.4</hadoop.version>
		<flink.version>1.16.3</flink.version>
		<argLine>-Djdk.net.URLClassPath.disableClassPathURLCheck=true
			-Djava.library.path=${project.basedir}/../mount/libjfs:${java.library.path}
			-Djdk.attach.allowAttachSelf=true</argLine>
	</properties>

	<developers>
		<developer>
			<name>Juicedata</name>
			<email>team@juicedata.io</email>
		</developer>
	</developers>

	<licenses>
		<license>
			<name>The Apache Software License, Version 2.0</name>
			<url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
			<distribution>repo</distribution>
		</license>
	</licenses>

	<scm>
		<url>https://github.com/juicedata/juicefs</url>
		<connection>https://github.com/juicedata/juicefs</connection>
		<developerConnection>scm:git:https://github.com/juicedata/juicefs</developerConnection>
	</scm>

	<build>
		<plugins>
			<plugin>
				<artifactId>maven-surefire-plugin</artifactId>
				<version>2.19.1</version>
				<configuration>
					<argLine>${argLine}</argLine>
					<trimStackTrace>false</trimStackTrace>
					<systemProperties>
						<test.cache.data>${project.build.directory}/test-classes</test.cache.data>
					</systemProperties>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-shade-plugin</artifactId>
				<executions>
					<execution>
						<phase>package</phase>
						<goals>
							<goal>shade</goal>
						</goals>
					</execution>
				</executions>
				<configuration>
					<finalName>${artifactId}-${version}</finalName>
					<relocations>
						<relocation>
							<pattern>org.objectweb.asm</pattern>
							<shadedPattern>io.juicefs.shaded.org.objectweb.asm</shadedPattern>
						</relocation>
						<relocation>
							<pattern>com.beust</pattern>
							<shadedPattern>io.juicefs.shaded.com.beust</shadedPattern>
						</relocation>
						<relocation>
							<pattern>org.json</pattern>
							<shadedPattern>io.juicefs.shaded.org.json</shadedPattern>
						</relocation>
						<relocation>
							<pattern>org.javassist</pattern>
							<shadedPattern>io.juicefs.shaded.org.javassist</shadedPattern>
						</relocation>
						<relocation>
							<pattern>com.google.common</pattern>
							<shadedPattern>io.juicefs.shaded.com.google.common</shadedPattern>
						</relocation>
						<relocation>
							<pattern>org.apache.commons.lang</pattern>
							<shadedPattern>io.juicefs.shaded.org.apache.commons.lang</shadedPattern>
						</relocation>
						<relocation>
							<pattern>com.kstruct.gethostname4j</pattern>
							<shadedPattern>io.juicefs.shaded.com.kstruct.gethostname4j</shadedPattern>
						</relocation>
					</relocations>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<configuration>
					<source>8</source>
					<target>8</target>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.sonatype.central</groupId>
				<artifactId>central-publishing-maven-plugin</artifactId>
				<version>0.8.0</version>
				<extensions>true</extensions>
				<configuration>
					<publishingServerId>central</publishingServerId>
					<autoPublish>false</autoPublish>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-javadoc-plugin</artifactId>
				<version>2.9.1</version>
				<configuration>
					<source>8</source>
				</configuration>
				<executions>
					<execution>
						<id>attach-javadocs</id>
						<goals>
							<goal>jar</goal>
						</goals>
					</execution>
				</executions>
			</plugin>

			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-source-plugin</artifactId>
				<version>2.2.1</version>
				<configuration>
					<excludeResources>true</excludeResources>
				</configuration>
				<executions>
					<execution>
						<id>attach-sources</id>
						<goals>
							<goal>jar-no-fork</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-gpg-plugin</artifactId>
				<version>1.6</version>
				<executions>
					<execution>
						<id>sign-artifacts</id>
						<phase>verify</phase>
						<goals>
							<goal>sign</goal>
						</goals>
					</execution>
				</executions>
				<configuration>
					<!-- Prevent gpg from using pinentry programs -->
					<gpgArguments>
						<arg>--pinentry-mode</arg>
						<arg>loopback</arg>
					</gpgArguments>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-jar-plugin</artifactId>
				<configuration>
					<archive>
						<manifest>
							<addClasspath>true</addClasspath>
							<mainClass>io.juicefs.Main</mainClass>
						</manifest>
					</archive>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.jacoco</groupId>
				<artifactId>jacoco-maven-plugin</artifactId>
				<version>0.8.7</version>
				<executions>
					<execution>
						<goals>
							<goal>prepare-agent</goal>
						</goals>
					</execution>
					<execution>
						<id>report</id>
						<phase>test</phase>
						<goals>
							<goal>report</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
			<plugin>
				<groupId>org.pitest</groupId>
				<artifactId>pitest-maven</artifactId>
				<version>1.9.11</version>
				<configuration>
					<targetClasses>
						<param>io.juicefs.JuiceFileSystemImpl*</param>
					</targetClasses>
					<targetTests>
						<param>io.juicefs.JuiceFileSystemTest</param>
					</targetTests>
					<timeoutConstant>1000</timeoutConstant>
					<avoidCallsTo>
						<avoidCallsTo>org.apache.log4j</avoidCallsTo>
						<avoidCallsTo>org.slf4j</avoidCallsTo>
						<avoidCallsTo>org.apache.commons.logging</avoidCallsTo>
					</avoidCallsTo>
				</configuration>
			</plugin>
			<plugin>
				<groupId>io.github.git-commit-id</groupId>
				<artifactId>git-commit-id-maven-plugin</artifactId>
				<version>4.9.9</version>
				<executions>
					<execution>
						<id>get-the-git-infos</id>
						<goals>
							<goal>revision</goal>
						</goals>
						<phase>initialize</phase>
					</execution>
				</executions>
				<configuration>
					<generateGitPropertiesFile>true</generateGitPropertiesFile>
					<generateGitPropertiesFilename>
						${project.build.outputDirectory}/juicefs-ver.properties</generateGitPropertiesFilename>
					<includeOnlyProperties>
						<includeOnlyProperty>^git.build.(time|version)$</includeOnlyProperty>
						<includeOnlyProperty>^git.commit.id.(abbrev|full)$</includeOnlyProperty>
					</includeOnlyProperties>
					<commitIdGenerationMode>full</commitIdGenerationMode>
				</configuration>
			</plugin>
		</plugins>
		<resources>
			<resource>
				<directory>libjfs/target</directory>
			</resource>
			<resource>
				<directory>src/main/resources</directory>
			</resource>
		</resources>
		<testResources>
			<testResource>
				<directory>conf</directory>
			</testResource>
			<testResource>
				<directory>src/test/resources</directory>
			</testResource>
		</testResources>
	</build>
	<dependencies>
		<dependency>
			<groupId>com.github.jnr</groupId>
			<artifactId>jnr-ffi</artifactId>
			<version>2.2.12</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>${hadoop.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>${hadoop.version}</version>
			<scope>test</scope>
			<type>test-jar</type>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-client</artifactId>
			<version>${hadoop.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.13.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.json</groupId>
			<artifactId>json</artifactId>
			<version>20180813</version>
		</dependency>
		<dependency>
			<groupId>org.javassist</groupId>
			<artifactId>javassist</artifactId>
			<version>3.25.0-GA</version>
		</dependency>
		<dependency>
			<groupId>com.beust</groupId>
			<artifactId>jcommander</artifactId>
			<version>1.81</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hive</groupId>
			<artifactId>hive-metastore</artifactId>
			<exclusions>
				<exclusion>
					<groupId>org.apache.hadoop</groupId>
					<artifactId>hadoop-annotations</artifactId>
				</exclusion>
			</exclusions>
			<scope>
				provided
			</scope>
			<version>1.2.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-hadoop-fs</artifactId>
			<version>${flink.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-core</artifactId>
			<version>${flink.version}</version>
			<scope>provided</scope>
		</dependency>
		<dependency>
			<groupId>com.google.guava</groupId>
			<artifactId>guava</artifactId>
			<version>32.0.1-jre</version>
		</dependency>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-minicluster</artifactId>
			<version>${hadoop.version}</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.flink</groupId>
			<artifactId>flink-streaming-java</artifactId>
			<version>${flink.version}</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.kitesdk</groupId>
			<artifactId>kite-data-core</artifactId>
			<version>1.1.0</version>
			<scope>provided</scope>
			<exclusions>
				<exclusion>
					<groupId>*</groupId>
					<artifactId>*</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>commons-lang</groupId>
			<artifactId>commons-lang</artifactId>
			<version>2.6</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ranger</groupId>
			<artifactId>ranger-plugins-common</artifactId>
			<version>2.3.0</version>
			<exclusions>
				<exclusion>
					<groupId>*</groupId>
					<artifactId>*</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>com.kstruct</groupId>
			<artifactId>gethostname4j</artifactId>
			<version>0.0.2</version>
		</dependency>
		<dependency>
			<groupId>com.sun.jersey</groupId>
			<artifactId>jersey-bundle</artifactId>
			<version>1.19.3</version>
		</dependency>
		<dependency>
			<groupId>org.codehaus.jackson</groupId>
			<artifactId>jackson-jaxrs</artifactId>
			<version>1.9.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ranger</groupId>
			<artifactId>ranger-plugins-audit</artifactId>
			<version>2.3.0</version>
			<exclusions>
				<exclusion>
					<groupId>*</groupId>
					<artifactId>*</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
	</dependencies>

</project>

================================================
FILE: sdk/java/src/main/java/io/juicefs/FlinkFileSystemFactory.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.runtime.fs.hdfs.HadoopFileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;

public class FlinkFileSystemFactory implements org.apache.flink.core.fs.FileSystemFactory {
  private static final Logger LOG = LoggerFactory.getLogger(FlinkFileSystemFactory.class);
  private org.apache.hadoop.conf.Configuration conf;

  private static final String[] FLINK_CONFIG_PREFIXES = {"fs.", "juicefs."};
  private String scheme;

  @Override
  public void configure(Configuration config) {
    conf = new org.apache.hadoop.conf.Configuration();
    if (config != null) {
      for (String key : config.keySet()) {
        for (String prefix : FLINK_CONFIG_PREFIXES) {
          if (key.startsWith(prefix)) {
            String value = config.getString(key, null);
            if (value != null) {
              if ("io.juicefs.JuiceFileSystem".equals(value.trim())) {
                this.scheme = key.split("\\.")[1];
              }
              conf.set(key, value);
            }
          }
        }
      }
    }
  }

  @Override
  public String getScheme() {
    if (scheme == null) {
      return "jfs";
    }
    return scheme;
  }

  @Override
  public FileSystem create(URI fsUri) throws IOException {
    JuiceFileSystem fs = new JuiceFileSystem();
    fs.initialize(fsUri, conf);
    return new HadoopFileSystem(fs);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/JuiceFS.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import org.apache.hadoop.fs.FileSystem;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

public class JuiceFS extends DelegateToFileSystem {
  JuiceFS(final URI uri, final Configuration conf) throws IOException, URISyntaxException {
    super(uri, FileSystem.get(uri, conf), conf, uri.getScheme(), false);
  }

  @Override
  public int getUriDefaultPort() {
    return -1;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/JuiceFileSystem.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import io.juicefs.utils.BgTaskUtil;
import io.juicefs.utils.PatchUtil;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.concurrent.TimeUnit;

/****************************************************************
 * Implement the FileSystem API for JuiceFS
 *****************************************************************/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class JuiceFileSystem extends FilterFileSystem {
  private static final Logger LOG = LoggerFactory.getLogger(JuiceFileSystem.class);

  private static boolean fileChecksumEnabled = false;
  private static boolean distcpPatched = false;

  static {
    PatchUtil.patchBefore("org.apache.flink.runtime.fs.hdfs.HadoopRecoverableFsDataOutputStream",
            "waitUntilLeaseIsRevoked",
            new String[]{"org.apache.hadoop.fs.FileSystem", "org.apache.hadoop.fs.Path"},
            "if (fs instanceof io.juicefs.JuiceFileSystem) {\n" +
                    "            return ((io.juicefs.JuiceFileSystem)fs).isFileClosed(path);\n" +
                    "        }");
  }

  private synchronized static void patchDistCpChecksum() {
    if (distcpPatched)
      return;
    PatchUtil.patchBefore("org.apache.hadoop.tools.mapred.RetriableFileCopyCommand",
            "compareCheckSums",
            null,
            "if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {return ;}");
    distcpPatched = true;
  }

  @Override
  public void initialize(URI uri, Configuration conf) throws IOException {
    super.initialize(uri, conf);
    fileChecksumEnabled = Boolean.parseBoolean(getConf(conf, "file.checksum", "false"));
    boolean asBgTask = conf.getBoolean("juicefs.internal-bg-task", false);
    if (!asBgTask && !Boolean.parseBoolean(getConf(conf, "disable-trash-emptier", "false"))) {
      BgTaskUtil.startTrashEmptier(uri.getHost(), () -> {
        runTrashEmptier(uri, conf);
      }, 10, TimeUnit.MINUTES);
    }
  }

  private void runTrashEmptier(URI uri, final Configuration conf) {
    try {
      Configuration newConf = new Configuration(conf);
      newConf.setBoolean("juicefs.internal-bg-task", true);
      UserGroupInformation superUser = UserGroupInformation.createRemoteUser(getConf(conf, "superuser", "hdfs"));
      FileSystem emptierFs = superUser.doAs((PrivilegedExceptionAction<FileSystem>) () -> {
        JuiceFileSystemImpl fs = new JuiceFileSystemImpl();
        fs.initialize(uri, newConf);
        return fs;
      });
      new Trash(emptierFs, newConf).getEmptier().run();
    } catch (Exception e) {
      LOG.warn("run trash emptier for {} failed", uri.getHost(), e);
    }
  }

  private String getConf(Configuration conf, String key, String value) {
    String name = fs.getUri().getHost();
    String v = conf.get("juicefs." + key, value);
    if (name != null && !name.equals("")) {
      v = conf.get("juicefs." + name + "." + key, v);
    }
    if (v != null)
      v = v.trim();
    return v;
  }

  public JuiceFileSystem() {
    super(new JuiceFileSystemImpl());
  }

  @Override
  public String getScheme() {
    StackTraceElement[] elements = Thread.currentThread().getStackTrace();
    if (elements[2].getClassName().equals("org.apache.flink.runtime.fs.hdfs.HadoopRecoverableWriter") &&
        (elements[2].getMethodName().equals("<init>") || elements[2].getMethodName().equals("checkSupportedFSSchemes"))) {
      return "hdfs";
    }
    return fs.getScheme();
  }

  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
    return fs.create(f, FsPermission.getFileDefault(), overwrite, bufferSize, replication, blockSize, progress);
  }

  public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
    return fs.createNonRecursive(f, FsPermission.getFileDefault(), overwrite, bufferSize, replication, blockSize, progress);
  }

  @Override
  public ContentSummary getContentSummary(Path f) throws IOException {
    return fs.getContentSummary(f);
  }

  public boolean isFileClosed(final Path src) throws IOException {
    FileStatus st = fs.getFileStatus(src);
    return st.getLen() > 0;
  }

  @Override
  public FileChecksum getFileChecksum(Path f, long length) throws IOException {
    if (!fileChecksumEnabled)
      return null;
    patchDistCpChecksum();
    return super.getFileChecksum(f, length);
  }

  @Override
  public FileChecksum getFileChecksum(Path f) throws IOException {
    if (!fileChecksumEnabled)
      return null;
    patchDistCpChecksum();
    return super.getFileChecksum(f);
  }

  public Token<?> getDelegationToken(String renewer) throws IOException {
    return fs.getDelegationToken(renewer);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/JuiceFileSystemImpl.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import com.google.common.collect.Lists;
import com.kenai.jffi.internal.StubLoader;
import io.juicefs.exception.QuotaExceededException;
import io.juicefs.kerberos.AuthCredential;
import io.juicefs.kerberos.JuiceFSDelegationTokenIdentifier;
import io.juicefs.kerberos.KerberosUtil;
import io.juicefs.metrics.JuiceFSInstrumentation;
import io.juicefs.permission.RangerConfig;
import io.juicefs.permission.RangerPermissionChecker;
import io.juicefs.utils.*;
import jnr.ffi.LibraryLoader;
import jnr.ffi.Memory;
import jnr.ffi.Pointer;
import jnr.ffi.Runtime;
import jnr.ffi.annotations.Delegate;
import jnr.ffi.annotations.In;
import jnr.ffi.annotations.Out;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.*;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.HadoopKerberosName;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.DirectBufferPool;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.VersionInfo;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.jar.JarFile;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;

/****************************************************************
 * Implement the FileSystem API for JuiceFS
 *****************************************************************/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class JuiceFileSystemImpl extends FileSystem {

  public static final Logger LOG = LoggerFactory.getLogger(JuiceFileSystemImpl.class);
  public static final String gitVer = loadVersion();

  static String loadVersion() {
    try (InputStream in = JuiceFileSystemImpl.class.getClassLoader().getResourceAsStream("juicefs-ver.properties")) {
      Properties prop = new Properties();
      prop.load(in);
      return prop.getProperty("git.commit.id.abbrev");
    } catch (IOException e) {
      LOG.warn("Failed to load juicefs-ver.properties", e);
      return "unknown";
    }
  }

  private Path workingDir;
  private String name;
  private String user;
  private String superuser;
  private String supergroup;
  private URI uri;
  private long blocksize;
  private int minBufferSize;
  private int cacheReplica;
  private boolean fileChecksumEnabled;
  private final boolean isSuperGroupFileSystem;
  private boolean isBackGroundTask = false;

  private JuiceFileSystemImpl superGroupFileSystem;
  private RangerPermissionChecker rangerPermissionChecker;
  private boolean dtEnabled; // whether delegation token was enabled
  private static Libjfs lib = loadLibrary();

  private long handle;
  private UserGroupInformation ugi;
  private String homeDirPrefix = "/user";
  private String discoverNodesUrl;
  private static final Map<String, Map<String, String>> cachedHostsForName = new ConcurrentHashMap<>(); // (name -> (ip -> hostname))
  private static final Map<String, ConsistentHash<String>> hashForName = new ConcurrentHashMap<>(); // (name -> consistentHash)
  private static final Map<String, FileStatus> lastFileStatus = new ConcurrentHashMap<>();

  private FsPermission uMask;
  private String hflushMethod;

  private static final DirectBufferPool directBufferPool = new DirectBufferPool();

  private boolean metricsEnable = false;

  /*
   * hadoop compatibility
   */
  private boolean withStreamCapability;
  private Constructor<FileStatus> fileStatusConstructor;

  // constructor for BufferedFSOutputStreamWithStreamCapabilities
  private Constructor<?> constructor;
  private Method setStorageIds;
  private String[] storageIds;
  private Random random = new Random();

  private static final String USERNAME_UID_PATTERN = "[a-zA-Z0-9_-]+:[0-9]+";
  private static final String GROUPNAME_GID_USERNAMES_PATTERN = "[a-zA-Z0-9_-]+:[0-9]+:[,a-zA-Z0-9_-]+";

  /*
    go call back
  */
  private static Libjfs.LogCallBack callBack;

  public static interface Libjfs {
    long jfs_init(Pointer credential, int size, String name, String jsonConf, String user, String group, String superuser, String supergroup);

    void jfs_update_uid_grouping(String name, String uidstr, String grouping);

    int jfs_term(long pid, long h);

    int jfs_open(long pid, long h, String path, @Out ByteBuffer fileLen, int flags);

    int jfs_access(long pid, long h, String path, int flags);

    long jfs_lseek(long pid, int fd, long pos, int whence);

    int jfs_pread(long pid, int fd, @Out ByteBuffer b, int len, long offset);

    int jfs_write(long pid, int fd, @In ByteBuffer b, int len);

    int jfs_flush(long pid, int fd);

    int jfs_fsync(long pid, int fd);

    int jfs_close(long pid, int fd);

    int jfs_create(long pid, long h, String path, short mode, short umask);

    int jfs_truncate(long pid, long h, String path, long length);

    int jfs_delete(long pid, long h, String path);

    int jfs_rmr(long pid, long h, String path);

    int jfs_mkdir(long pid, long h, String path, short mode, short umask);

    int jfs_rename(long pid, long h, String src, String dst);

    int jfs_stat1(long pid, long h, String path, Pointer buf);

    int jfs_lstat1(long pid, long h, String path, Pointer buf);

    int jfs_summary(long pid, long h, String path, Pointer buf);

    int jfs_statvfs(long pid, long h, Pointer buf);

    int jfs_chmod(long pid, long h, String path, int mode);

    int jfs_setOwner(long pid, long h, String path, String user, String group);

    int jfs_utime(long pid, long h, String path, long mtime, long atime);

    int jfs_listdir(long pid, long h, String path, int offset, Pointer buf, int size);

    int jfs_concat(long pid, long h, String path, Pointer buf, int bufsize);

    int jfs_setXattr(long pid, long h, String path, String name, Pointer value, int vlen, int mode);

    int jfs_getXattr(long pid, long h, String path, String name, Pointer buf, int size);

    int jfs_listXattr(long pid, long h, String path, Pointer buf, int size);

    int jfs_removeXattr(long pid, long h, String path, String name);

    int jfs_getfacl(long pid, long h, String path, int acltype, Pointer b, int len);

    int jfs_setfacl(long pid, long h, String path, int acltype, Pointer b, int len);

    int jfs_getGroups(String volName, String user, Pointer buf, int len);

    int jfs_ranger_cfg(String volName, Pointer buf, int size);

    int jfs_is_superuser(long h, String user, String group);

    void jfs_set_callback(LogCallBack callBack);

    int jfs_get_token(long h, String name, Pointer buf, int bufSize, String renewer);

    long jfs_renew_token(long h, int id, String password);

    int jfs_cancel_token(long h, int id, String password);

    interface LogCallBack {
      @Delegate
      void call(String msg);
    }
  }

  static class LogCallBackImpl implements Libjfs.LogCallBack {
    Libjfs lib;

    public LogCallBackImpl(Libjfs lib) {
      this.lib = lib;
    }

    @Override
    public void call(String msg){
      try {
        // 2022/12/20 14:48:30.808303 juicefs[80976] <ERROR>: error msg [main.go:357]
        msg = msg.trim();
        String[] items = msg.split("\\s+", 5);
        if (items.length > 4) {
          switch (items[3]) {
            case "<DEBUG>:":
              LOG.debug(msg);
              break;
            case "<INFO>:":
              LOG.info(msg);
              break;
            case "<WARNING>:":
              LOG.warn(msg);
              break;
            case "<ERROR>:":
              LOG.error(msg);
              break;
          }
        }
      } catch (Throwable ignored){}
    }

    @Override
    protected void finalize() throws Throwable {
      lib.jfs_set_callback(null);
    }
  }

  static int EPERM = -0x01;
  static int ENOENT = -0x02;
  static int EINTR = -0x04;
  static int EIO = -0x05;
  static int EACCESS = -0xd;
  static int EEXIST = -0x11;
  static int ENOTDIR = -0x14;
  static int EINVAL = -0x16;
  static int ENOSPACE = -0x1c;
  static int EDQUOT = -0x45;
  static int EROFS = -0x1e;
  static int ENOTEMPTY = -0x27;
  static int ENODATA = -0x3d;
  static int ENOATTR = -0x5d;
  static int ENOTSUP = -0x5f;

  static int MODE_MASK_R = 4;
  static int MODE_MASK_W = 2;
  static int MODE_MASK_X = 1;

  private IOException error(int errno, Path p) {
    String pStr = p == null ? "" : p.toString();
    if (errno == EPERM) {
      return new PathPermissionException(pStr);
    } else if (errno == ENOTDIR) {
      return new ParentNotDirectoryException();
    } else if (errno == ENOENT) {
      return new FileNotFoundException(pStr+ ": not found");
    } else if (errno == EACCESS) {
      try {
        FileStatus stat = getFileStatusInternalNoException(p);
        if (stat != null) {
          FsPermission perm = stat.getPermission();
          return new AccessControlException(String.format("Permission denied: user=%s, path=\"%s\":%s:%s:%s%s", user, p,
                  stat.getOwner(), stat.getGroup(), stat.isDirectory() ? "d" : "-", perm));
        }
      } catch (Exception e) {
        LOG.warn("fail to generate better error message", e);
      }
      return new AccessControlException("Permission denied: " + pStr);
    } else if (errno == EEXIST) {
      return new FileAlreadyExistsException();
    } else if (errno == EINVAL) {
      return new InvalidRequestException("Invalid parameter");
    } else if (errno == ENOTEMPTY) {
      return new PathIsNotEmptyDirectoryException(pStr);
    } else if (errno == EINTR) {
      return new InterruptedIOException();
    } else if (errno == ENOTSUP) {
      return new PathOperationException(pStr);
    } else if (errno == ENOSPACE) {
      return new IOException("No space");
    } else if (errno == EDQUOT) {
      return new QuotaExceededException("Quota exceeded");
    } else if (errno == EROFS) {
      return new IOException("Read-only Filesystem");
    } else if (errno == EIO) {
      return new IOException(pStr);
    } else {
      return new IOException("errno: " + errno + " " + pStr);
    }
  }

  public JuiceFileSystemImpl() {
    this.isSuperGroupFileSystem = false;
  }

  @Override
  public long getDefaultBlockSize() {
    return blocksize;
  }

  private String normalizePath(Path path) {
    return makeQualified(path).toUri().getPath();
  }

  @Override
  public String getScheme() {
    return uri.getScheme();
  }

  @Override
  public String toString() {
    return uri.toString();
  }

  @Override
  public URI getUri() {
    return uri;
  }

  private String getConf(Configuration conf, String key, String value) {
    String v = conf.get("juicefs." + key, value);
    if (name != null && !name.equals("")) {
      v = conf.get("juicefs." + name + "." + key, v);
    }
    if (v != null)
      v = v.trim();
    return v;
  }

  @Override
  public void initialize(URI uri, Configuration conf) throws IOException {
    super.initialize(uri, conf);
    setConf(conf);

    this.uri = uri;
    name = conf.get("juicefs.name", uri.getHost());
    if (null == name) {
      throw new IOException("name is required");
    }

    blocksize = conf.getLongBytes("juicefs.block.size", conf.getLongBytes("dfs.blocksize", 128 << 20));
    minBufferSize = conf.getInt("juicefs.min-buffer-size", 128 << 10);
    cacheReplica = Integer.parseInt(getConf(conf, "cache-replica", "1"));
    fileChecksumEnabled = Boolean.parseBoolean(getConf(conf, "file.checksum", "false"));

    this.ugi = UserGroupInformation.getCurrentUser();
    user = ugi.getShortUserName();
    String groupStr = "nogroup";
    if (ugi.getGroupNames().length > 0) {
      groupStr = String.join(",", ugi.getGroupNames());
    }
    superuser = getConf(conf, "superuser", "hdfs");
    supergroup = getConf(conf, "supergroup", conf.get("dfs.permissions.superusergroup", "supergroup"));
    isBackGroundTask = conf.getBoolean("juicefs.internal-bg-task", false);
    boolean asSuperFs = isSuperGroupFileSystem || isBackGroundTask;

    synchronized (JuiceFileSystemImpl.class) {
      if (callBack == null) {
        callBack = new LogCallBackImpl(lib);
        lib.jfs_set_callback(callBack);
      }
    }

    JSONObject obj = new JSONObject();
    String spn = SecurityUtil.getServerPrincipal(getConf(conf, "server-principal", ""), name);
    if (spn.contains("@")) {
      spn = spn.split("@")[0];
    }
    AuthCredential authCredential = buildAuthCredential(spn);
    Pointer credential = null;
    int crdSize = 0;
    if (authCredential != null) {
      crdSize = authCredential.getCredential().length;
      credential = Memory.allocate(Runtime.getRuntime(lib), crdSize);
      credential.put(0, authCredential.getCredential(), 0, crdSize);
    }

    if (authCredential != null) {
      obj.put("authMethod", authCredential.getMethod());
    }
    if (ugi.getRealUser() != null) {
      obj.put("realUser", ugi.getRealUser().getShortUserName());
    }

    String[] keys = new String[]{"meta",};
    for (String key : keys) {
      obj.put(key, getConf(conf, key, ""));
    }
    String[] bkeys = new String[]{"debug", "writeback"};
    for (String key : bkeys) {
      obj.put(key, Boolean.valueOf(getConf(conf, key, "false")));
    }
    String subdir = getConf(conf, "subdir", "");
    if (!subdir.isEmpty()) {
      // Support multiple subdirs separated by comma
      String[] subdirs = subdir.split(",");
      List<String> normalizedSubdirs = new ArrayList<>();
      for (String sd : subdirs) {
        sd = sd.trim();
        if (sd.isEmpty() || sd.equals("/")) {
          continue;  // skip empty string or root
        }
        if (!sd.startsWith("/")) {
          sd = "/" + sd;
        }
        sd = sd.replaceAll("/+$", "");
        normalizedSubdirs.add(sd);
      }
      if (normalizedSubdirs.isEmpty()) {
        subdir = "";
      } else {
        subdir = String.join(",", normalizedSubdirs);
        LOG.debug("subdir {} is enabled", subdir);
      }
    }
    obj.put("bucket", getConf(conf, "bucket", ""));
    obj.put("storageClass", getConf(conf, "storage-class", ""));
    obj.put("readOnly", Boolean.valueOf(getConf(conf, "read-only", "false")));
    obj.put("noSession", Boolean.valueOf(getConf(conf, "no-session", "false")));
    obj.put("noBGJob", Boolean.valueOf(getConf(conf, "no-bgjob", "false")));
    obj.put("cacheDir", getConf(conf, "cache-dir", "memory"));
    obj.put("cacheSize", getConf(conf, "cache-size", "100"));
    obj.put("cacheItems", Integer.valueOf(getConf(conf, "cache-items", "0")));
    obj.put("openCache", getConf(conf, "open-cache", "0.0"));
    obj.put("backupMeta", getConf(conf, "backup-meta", "3600"));
    obj.put("backupSkipTrash", Boolean.valueOf(getConf(conf, "backup-skip-trash", "false")));
    obj.put("heartbeat", getConf(conf, "heartbeat", "12"));
    obj.put("attrTimeout", getConf(conf, "attr-cache", "0.0"));
    obj.put("entryTimeout", getConf(conf, "entry-cache", "0.0"));
    obj.put("dirEntryTimeout", getConf(conf, "dir-entry-cache", "0.0"));
    obj.put("cacheFullBlock", Boolean.valueOf(getConf(conf, "cache-full-block", "true")));
    obj.put("cacheChecksum", getConf(conf, "verify-cache-checksum", "extend"));
    obj.put("cacheEviction", getConf(conf, "cache-eviction", "2-random"));
    obj.put("cacheScanInterval", getConf(conf, "cache-scan-interval", "300"));
    obj.put("cacheExpire", getConf(conf, "cache-expire", "0"));
    obj.put("autoCreate", Boolean.valueOf(getConf(conf, "auto-create-cache-dir", "true")));
    obj.put("maxUploads", Integer.valueOf(getConf(conf, "max-uploads", "20")));
    obj.put("maxDownloads", Integer.valueOf(getConf(conf, "max-downloads", "200")));
    obj.put("maxDeletes", Integer.valueOf(getConf(conf, "max-deletes", "10")));
    obj.put("skipDirNlink", Integer.valueOf(getConf(conf, "skip-dir-nlink", "20")));
    obj.put("skipDirMtime", getConf(conf, "skip-dir-mtime", "100ms"));
    obj.put("uploadLimit", getConf(conf, "upload-limit", "0"));
    obj.put("downloadLimit", getConf(conf, "download-limit", "0"));
    obj.put("ioRetries", Integer.valueOf(getConf(conf, "io-retries", "10")));
    obj.put("getTimeout", getConf(conf, "get-timeout", getConf(conf, "object-timeout", "5")));
    obj.put("putTimeout", getConf(conf, "put-timeout", getConf(conf, "object-timeout", "60")));
    obj.put("memorySize", getConf(conf, "memory-size", "300"));
    obj.put("prefetch", Integer.valueOf(getConf(conf, "prefetch", "1")));
    obj.put("readahead", getConf(conf, "max-readahead", "0"));
    obj.put("pushGateway", getConf(conf, "push-gateway", ""));
    obj.put("pushInterval", getConf(conf, "push-interval", "10"));
    obj.put("pushAuth", getConf(conf, "push-auth", ""));
    obj.put("pushLabels", getConf(conf, "push-labels", ""));
    obj.put("pushGraphite", getConf(conf, "push-graphite", ""));
    obj.put("pushRemoteWrite", getConf(conf, "push-remote-write", ""));
    obj.put("pushRemoteWriteAuth", getConf(conf, "push-remote-write-auth", ""));
    obj.put("fastResolve", Boolean.valueOf(getConf(conf, "fast-resolve", "true")));
    obj.put("noUsageReport", Boolean.valueOf(getConf(conf, "no-usage-report", "false")));
    obj.put("freeSpace", getConf(conf, "free-space", "0.1"));
    obj.put("accessLog", getConf(conf, "access-log", ""));
    obj.put("superFs", asSuperFs);
    obj.put("subdir", subdir);
    String jsonConf = obj.toString(2);
    handle = lib.jfs_init(credential, crdSize, name, jsonConf, user, groupStr, superuser, supergroup);
    if (handle <= 0) {
      throw new IOException("JuiceFS initialized failed for jfs://" + name);
    }
    if (isBackGroundTask) {
      LOG.debug("background fs {}|({})", name, handle);
    } else {
      BgTaskUtil.register(name, handle);
    }
    discoverNodesUrl = getConf(conf, "discover-nodes-url", null);
    homeDirPrefix = conf.get("dfs.user.home.dir.prefix", "/user");
    this.workingDir = getHomeDirectory();

    // hadoop29 and above check
    try {
      Class.forName("org.apache.hadoop.fs.StreamCapabilities");
      withStreamCapability = true;
    } catch (ClassNotFoundException e) {
      withStreamCapability = false;
    }
    if (withStreamCapability) {
      try {
        constructor = Class.forName("io.juicefs.JuiceFileSystemImpl$BufferedFSOutputStreamWithStreamCapabilities")
                .getConstructor(OutputStream.class, Integer.TYPE, String.class);
      } catch (ClassNotFoundException | NoSuchMethodException e) {
        throw new RuntimeException(e);
      }
    }
    // for hadoop compatibility
    boolean hasAclMtd = ReflectionUtil.hasMethod(FileStatus.class.getName(), "hasAcl", (String[]) null);
    if (hasAclMtd) {
      fileStatusConstructor = ReflectionUtil.getConstructor(FileStatus.class,
          long.class, boolean.class, int.class, long.class, long.class,
          long.class, FsPermission.class, String.class, String.class, Path.class,
          Path.class, boolean.class, boolean.class, boolean.class);
      if (fileStatusConstructor == null) {
        throw new IOException("incompatible hadoop version");
      }
    }

    String umaskStr = getConf(conf, "umask", null);
    if (!isEmpty(umaskStr)) {
      conf.set("fs.permissions.umask-mode", umaskStr);
      LOG.debug("override fs.permissions.umask-mode to {}", umaskStr);
    }
    uMask = FsPermission.getUMask(conf);

    hflushMethod = getConf(conf, "hflush", "writeback");
    initializeStorageIds(conf);

    if ("true".equalsIgnoreCase(getConf(conf, "enable-metrics", "false"))) {
      metricsEnable = true;
      JuiceFSInstrumentation.init(this, statistics);
    }

    RangerConfig rangerConfig = checkAndGetRangerParams(conf);
    if (rangerConfig != null && !isSuperGroupFileSystem && !isBackGroundTask) {
        Configuration superConf = new Configuration(conf);
        superConf.set("juicefs.internal-bg-task", "true");
        superGroupFileSystem = new JuiceFileSystemImpl(true);
        superGroupFileSystem.initialize(uri, superConf);
        rangerPermissionChecker = RangerPermissionChecker.acquire(name, handle, superGroupFileSystem, rangerConfig);
    }

    if (!isBackGroundTask && !isSuperGroupFileSystem) {
      // use juicefs.users and juicefs.groups for global mapping
      String uidFile = getConf(conf, "users", null);
      String groupFile = getConf(conf, "groups", null);
      if (!isEmpty(uidFile) || !isEmpty(groupFile)) {
        BgTaskUtil.putTask(name, "Refresh guid", () -> {
          updateUidAndGrouping(uidFile, groupFile);
        }, 1, 1, TimeUnit.MINUTES);
      }
    }
  }

  public RangerConfig checkAndGetRangerParams(Configuration conf) throws IOException {
    if (System.getenv("JUICEFS_RANGER_TEST") != null) {
      RangerConfig config = new RangerConfig("http://localhost:6080", "ranger_test", 30000);
      config.setImpl("io.juicefs.permission.RangerAdminClientImpl");
      return config;
    }
    int size = 0, r = 1 << 10;
    Pointer buf = null;
    while (r > size) {
      size = r;
      buf = Memory.allocate(Runtime.getRuntime(lib), size);
      r = lib.jfs_ranger_cfg(name, buf, size);
    }
    if (r == 0) {
      return null;
    }
    byte[] rBuf = new byte[r];
    buf.get(0, rBuf, 0, r);
    String cfgStr = new String(rBuf);
    // http://localhost:6080?name=service_name
    String[] split = cfgStr.split("\\?", -1);
    if (split.length != 2) {
      throw new IOException(String.format("wrong ranger config: %s", cfgStr));
    }
    String url = split[0];
    String serviceName = split[1].substring(5);
    if (!url.startsWith("http")) {
      throw new IOException("illegal value for parameter 'ranger-rest-url': " + url);
    }
    if (serviceName.isEmpty()) {
      throw new IOException("illegal value for parameter 'ranger-service': " + serviceName);
    }
    String pollIntervalMs = getConf(conf, "ranger-poll-interval-ms", "30000");
    return new RangerConfig(url, serviceName, Long.parseLong(pollIntervalMs));
  }

  public JuiceFileSystemImpl(boolean isSuperGroupFileSystem) {
    this.isSuperGroupFileSystem = isSuperGroupFileSystem;
  }

  private Set<String> getGroups() {
    String groupsFile = getConf(getConf(), "groups", null);
    if (isEmpty(groupsFile)) {
      return new HashSet<>(ugi.getGroups());
    }

    int size = 0, r = 1 << 10;
    Pointer buf = null;
    while (r > size) {
      size = r;
      buf = Memory.allocate(Runtime.getRuntime(lib), size);
      r = lib.jfs_getGroups(name, user, buf, size);
    }
    if (r == 0) {
      return new HashSet<>(ugi.getGroups());
    }
    byte[] rBuf = new byte[r];
    buf.get(0, rBuf, 0, r);

    return new HashSet<>(Arrays.asList(new String(rBuf).split(",")));
  }

  private boolean isSuperUser() throws IOException {
    int r = lib.jfs_is_superuser(handle, user, String.join(",",  getGroups()));
    if (r < 0) {
      throw new InvalidRequestException("Invalid parameter");
    }
    return r == 1;
  }

  private boolean needCheckPermission() throws IOException {
    return rangerPermissionChecker != null && !isSuperGroupFileSystem && !isBackGroundTask && !isSuperUser() ;
  }

  private boolean checkPathAccess(Path path, FsAction action, String operation) throws IOException {
    return rangerPermissionChecker.checkPermission(path, false, null, null, action, operation, user, getGroups());
  }

  private boolean checkParentPathAccess(Path path, FsAction action, String operation) throws IOException {
    return rangerPermissionChecker.checkPermission(path, false, null, action, null, operation, user, getGroups());
  }

  private boolean checkAncestorAccess(Path path, FsAction action, String operation) throws IOException {
    return rangerPermissionChecker.checkPermission(path, false, action, null, null, operation, user, getGroups());
  }

  private boolean checkOwner(Path path, String operation) throws IOException {
    return rangerPermissionChecker.checkPermission(path, true, null, null, null, operation, user, getGroups());
  }

  private boolean isEmpty(String str) {
    return str == null || str.trim().isEmpty();
  }

  private String readFile(String file) throws IOException {
    Path path = new Path(file);
    FileStatus lastStatus = lastFileStatus.get(file);
    Configuration newConf = new Configuration(getConf());
    newConf.setBoolean("juicefs.internal-bg-task", true);
    try (FileSystem fs = FileSystem.newInstance(path.toUri(), newConf)) {
      FileStatus status = fs.getFileStatus(path);
      if (lastStatus != null && status.getModificationTime() == lastStatus.getModificationTime()
          && status.getLen() == lastStatus.getLen()) {
        return null;
      }
      try (FSDataInputStream in = fs.open(path)) {
        String res = new BufferedReader(new InputStreamReader(in)).lines().collect(Collectors.joining("\n"));
        lastFileStatus.put(file, status);
        return res;
      }
    }
  }

  private String parseUidAndGrouping(String pattern, String input) {
    String result = null;
    if (input == null || "".equals(input.trim())) {
      return result;
    }
    List<String> matched = new ArrayList<>();
    Matcher matcher = Pattern.compile(pattern).matcher(input);
    while (matcher.find()) {
      matched.add(matcher.group());
    }
    if (matched.size() > 0) {
      result = String.join("\n", matched);
    }
    return result;
  }

  private void updateUidAndGrouping(String uidFile, String groupFile) throws IOException {
    String uidstr = parseUidAndGrouping(USERNAME_UID_PATTERN, uidFile);
    if (uidstr == null && uidFile != null && !"".equals(uidFile.trim())) {
      uidstr = readFile(uidFile);
    }
    String grouping = parseUidAndGrouping(GROUPNAME_GID_USERNAMES_PATTERN, groupFile);
    if (grouping == null && groupFile != null && !"".equals(groupFile.trim())) {
      grouping = readFile(groupFile);
    }

    lib.jfs_update_uid_grouping(name, uidstr, grouping);
  }

  private void initializeStorageIds(Configuration conf) throws IOException {
    try {
      Class<?> clazz = Class.forName("org.apache.hadoop.fs.BlockLocation");
      setStorageIds = clazz.getMethod("setStorageIds", String[].class);
    } catch (ClassNotFoundException e) {
      throw new IllegalStateException(
              "Hadoop version was incompatible, current hadoop version is:\t" + VersionInfo.getVersion());
    } catch (NoSuchMethodException e) {
      setStorageIds = null;
    }
    int vdiskPerCpu = Integer.parseInt(getConf(conf, "vdisk-per-cpu", "4"));
    storageIds = new String[java.lang.Runtime.getRuntime().availableProcessors() * vdiskPerCpu];
    for (int i = 0; i < storageIds.length; i++) {
      storageIds[i] = "vd" + i;
    }
  }

  @Override
  public Path getHomeDirectory() {
    return makeQualified(new Path(homeDirPrefix + "/" + user));
  }

  private static void initStubLoader() {
    int loadMaxTime = 30;
    long start = System.currentTimeMillis();
    Class<?> clazz = null;
    // first try
    try {
      clazz = Class.forName("com.kenai.jffi.internal.StubLoader");
    } catch (ClassNotFoundException e) {
    }

    // try try try ...
    while (StubLoader.getFailureCause() != null && (System.currentTimeMillis() - start) < loadMaxTime * 1000) {
      LOG.warn("StubLoader load failed, it'll be retried!");
      try {
        Thread.interrupted();
        Method load = clazz.getDeclaredMethod("load");
        load.setAccessible(true);
        load.invoke(null);

        Field loaded = clazz.getDeclaredField("loaded");
        loaded.setAccessible(true);
        loaded.set(null, true);

        Field failureCause = clazz.getDeclaredField("failureCause");
        failureCause.setAccessible(true);
        failureCause.set(null, null);
      } catch (Throwable e) {
      }
    }

    if (StubLoader.getFailureCause() != null) {
      throw new RuntimeException("StubLoader load failed", StubLoader.getFailureCause());
    }
  }

  public static Libjfs loadLibrary() {
    initStubLoader();

    LibraryLoader<Libjfs> libjfsLibraryLoader = LibraryLoader.create(Libjfs.class);
    libjfsLibraryLoader.failImmediately();

    String osId = "so";
    String archId = "amd64";
    String resourceFormat = "libjfs-%s.%s.gz";
    String nameFormat = "libjfs-%s.%s.%s";

    File dir = new File("/tmp");
    String os = System.getProperty("os.name");
    String arch = System.getProperty("os.arch");
    if (arch.contains("aarch64")) {
      archId = "arm64";
    }
    if (os.toLowerCase().contains("windows")) {
      osId = "dll";
      dir = new File(System.getProperty("java.io.tmpdir"));
    } else if (os.toLowerCase().contains("mac")) {
      osId = "dylib";
    }

    String resource = String.format(resourceFormat, archId, osId);
    String name = String.format(nameFormat, archId, gitVer, osId);

    File libFile = new File(dir, name);

    InputStream ins;
    long soTime;
    URL location = JuiceFileSystemImpl.class.getProtectionDomain().getCodeSource().getLocation();
    if (location == null) {
      // jar may changed
      return loadExistLib(libjfsLibraryLoader, dir, name, libFile);
    }
    URLConnection con;
    try {
      try {
        con = location.openConnection();
      } catch (FileNotFoundException e) {
        // jar may changed
        return loadExistLib(libjfsLibraryLoader, dir, name, libFile);
      }
      if (location.getProtocol().equals("jar") && (con instanceof JarURLConnection)) {
        LOG.debug("juicefs-hadoop.jar is a nested jar");
        JarURLConnection connection = (JarURLConnection) con;
        JarFile jfsJar = connection.getJarFile();
        ZipEntry entry = jfsJar.getJarEntry(resource);
        soTime = entry.getLastModifiedTime().toMillis();
        ins = jfsJar.getInputStream(entry);
      } else {
        URI locationUri;
        try {
          locationUri = location.toURI();
        } catch (URISyntaxException e) {
          return loadExistLib(libjfsLibraryLoader, dir, name, libFile);
        }
        if (Files.isDirectory(Paths.get(locationUri))) { // for debug: sdk/java/target/classes
          soTime = con.getLastModified();
          ins = JuiceFileSystemImpl.class.getClassLoader().getResourceAsStream(resource);
        } else {
          JarFile jfsJar;
          try {
            jfsJar = new JarFile(locationUri.getPath());
          } catch (FileNotFoundException fne) {
            return loadExistLib(libjfsLibraryLoader, dir, name, libFile);
          }
          ZipEntry entry = jfsJar.getJarEntry(resource);
          soTime = entry.getLastModifiedTime().toMillis();
          ins = jfsJar.getInputStream(entry);
        }
      }

      synchronized (JuiceFileSystemImpl.class) {
        if (!libFile.exists() || libFile.lastModified() < soTime) {
          // try the name for current user
          libFile = new File(dir, System.getProperty("user.name") + "-" + name);
          if (!libFile.exists() || libFile.lastModified() < soTime) {
            InputStream reader = new GZIPInputStream(ins);
            File tmp = File.createTempFile(name, null, dir);
            FileOutputStream writer = new FileOutputStream(tmp);
            byte[] buffer = new byte[128 << 10];
            int bytesRead = 0;
            while ((bytesRead = reader.read(buffer)) != -1) {
              writer.write(buffer, 0, bytesRead);
            }
            writer.close();
            reader.close();
            tmp.setLastModified(soTime);
            tmp.setReadable(true, false);
            try {
              File org = new File(dir, name);
              Files.move(tmp.toPath(), org.toPath(), StandardCopyOption.ATOMIC_MOVE);
              libFile = org;
            } catch (Exception ade) {
              Files.move(tmp.toPath(), libFile.toPath(), StandardCopyOption.ATOMIC_MOVE);
            }
          }
        }
      }
      ins.close();
    } catch (Exception e) {
      throw new RuntimeException("Init libjfs failed", e);
    }
    return libjfsLibraryLoader.load(libFile.getAbsolutePath());
  }

  private static Libjfs loadExistLib(LibraryLoader<Libjfs> libjfsLibraryLoader, File dir, String name, File libFile) {
    File currentUserLib = new File(dir, System.getProperty("user.name") + "-" + name);
    if (currentUserLib.exists()) {
      return libjfsLibraryLoader.load(currentUserLib.getAbsolutePath());
    } else {
      return libjfsLibraryLoader.load(libFile.getAbsolutePath());
    }
  }

  private void initCache() {
    try {
      List<String> newNodes = discoverNodes(discoverNodesUrl);
      Map<String, String> newCachedHosts = new HashMap<>();
      for (String newNode : newNodes) {
        try {
          newCachedHosts.put(InetAddress.getByName(newNode).getHostAddress(), newNode);
        } catch (UnknownHostException e) {
          LOG.warn("unknown host: " + newNode);
        }
      }

      // if newCachedHosts are not changed, skip
      if (!newCachedHosts.equals(cachedHostsForName.get(name))) {
        List<String> ips = new ArrayList<>(newCachedHosts.keySet());
        LOG.debug("update nodes to: " + String.join(",", ips));
        hashForName.put(name, new ConsistentHash<>(100, ips));
        cachedHostsForName.put(name, newCachedHosts);
      }
    } catch (Throwable e) {
      LOG.warn("failed to discover nodes", e);
    }
  }

  private List<String> discoverNodes(String urls) {
    LOG.debug("fetching nodes from {}", urls);
    Configuration newConf = new Configuration(getConf());
    newConf.setBoolean("juicefs.internal-bg-task", true);
    NodesFetcher fetcher = NodesFetcherBuilder.buildFetcher(urls, name, newConf);
    List<String> fetched = fetcher.fetchNodes(urls);
    if (fetched == null) {
      fetched = new ArrayList<>();
    }
    LOG.debug("fetched nodes: {}", fetched);
    return fetched;
  }

  private BlockLocation makeLocation(long code, long start, long len) {
    long index = (start + len / 2) / blocksize / 4;
    BlockLocation blockLocation;
    String[] ns = new String[cacheReplica];
    String[] hs = new String[cacheReplica];

    Map<String, String> cachedHosts = cachedHostsForName.get(name);
    ConsistentHash<String> hash = hashForName.get(name);
    for (int i = 0; i < cacheReplica; i++) {
      String h = "localhost";
      if (cachedHosts != null && hash != null) {
        h = cachedHosts.getOrDefault(hash.get(code + "-" + (index + i)), "localhost");
      }
      ns[i] = h + ":50010";
      hs[i] = h;
    }
    blockLocation = new BlockLocation(ns, hs, null, null, start, len, false);
    if (setStorageIds != null) {
      try {
        setStorageIds.invoke(blockLocation, (Object) getStorageIds());
      } catch (IllegalAccessException | InvocationTargetException e) {
        throw new RuntimeException(e);
      }
    }
    return blockLocation;
  }

  private String[] getStorageIds() {
    String[] res = new String[cacheReplica];
    for (int i = 0; i < cacheReplica; i++) {
      res[i] = storageIds[random.nextInt(storageIds.length)];
    }
    return res;
  }

  private void setStorageId(BlockLocation bl) {
    if (setStorageIds != null) {
      try {
        setStorageIds.invoke(bl, (Object) getStorageIds());
      } catch (IllegalAccessException | InvocationTargetException e) {
        throw new RuntimeException(e);
      }
    }
  }

  @Override
  public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
    if (needCheckPermission() && !checkPathAccess(file.getPath(), FsAction.READ, "getFileBlockLocations")) {
      return superGroupFileSystem.getFileBlockLocations(file, start, len);
    }

    if (isEmpty(discoverNodesUrl) || cacheReplica <= 0) {
      BlockLocation[] bls = super.getFileBlockLocations(file, start, len);
      if (bls != null) {
        for (BlockLocation bl : bls) {
          setStorageId(bl);
        }
      }
      return bls;
    }

    if (file == null) {
      return null;
    }

    if (start < 0 || len < 0) {
      throw new IllegalArgumentException("Invalid start or len parameter");
    }
    if (file.getLen() <= start) {
      return new BlockLocation[0];
    }
    if (cacheReplica <= 0) {
      String[] name = new String[]{"localhost:50010"};
      String[] host = new String[]{"localhost"};
      return new BlockLocation[]{new BlockLocation(name, host, 0L, file.getLen())};
    }
    BgTaskUtil.putTask(name, "Node fetcher", this::initCache, 10, 10, TimeUnit.MINUTES);
    if (file.getLen() <= start + len) {
      len = file.getLen() - start;
    }
    long code = normalizePath(file.getPath()).hashCode();
    BlockLocation[] locs = new BlockLocation[(int) (len / blocksize) + 2];
    int indx = 0;
    while (len > 0) {
      long blen = len < blocksize ? len : blocksize - start % blocksize;
      locs[indx] = makeLocation(code, start, blen);
      start += blen;
      len -= blen;
      indx++;
    }
    // merge the last block
    if (indx > 1 && locs[indx - 1].getLength() < blocksize / 10) {
      locs[indx - 2].setLength(locs[indx - 2].getLength() + locs[indx - 1].getLength());
      indx--;
    }
    // merge the first block
    if (indx > 1 && locs[0].getLength() < blocksize / 10) {
      locs[1].setOffset(locs[0].getOffset());
      locs[1].setLength(locs[0].getLength() + locs[1].getLength());
      locs = Arrays.copyOfRange(locs, 1, indx);
      indx--;
    }
    return Arrays.copyOfRange(locs, 0, indx);
  }

  /*******************************************************
   * For open()'s FSInputStream.
   *******************************************************/
  class FileInputStream extends FSInputStream implements ByteBufferReadable {
    private int fd;
    private final Path path;

    private ByteBuffer buf;
    private long position;
    private long fileLen;

    public FileInputStream(Path f, int fd, int size, long fileLen) throws IOException {
      path = f;
      this.fd = fd;
      buf = directBufferPool.getBuffer(size);
      buf.limit(0);
      position = 0;
      this.fileLen = fileLen;
    }

    @Override
    public synchronized long getPos() throws IOException {
      if (buf == null)
        throw new IOException("stream was closed");
      return position - buf.remaining();
    }

    @Override
    public boolean seekToNewSource(long targetPos) throws IOException {
      return false;
    }

    @Override
    public synchronized int available() throws IOException {
      if (buf == null)
        throw new IOException("stream was closed");
      long remaining = fileLen - position + buf.remaining();
      if (remaining > Integer.MAX_VALUE) {
        return Integer.MAX_VALUE;
      }
      return (int)remaining;
    }

    @Override
    public boolean markSupported() {
      return false;
    }

    @Override
    public synchronized int read() throws IOException {
      if (buf == null)
        throw new IOException("stream was closed");
      if (!buf.hasRemaining() && !refill())
        return -1; // EOF
      assert buf.hasRemaining();
      statistics.incrementBytesRead(1);
      return buf.get() & 0xFF;
    }

    @Override
    public synchronized int read(byte[] b, int off, int len) throws IOException {
      if (off < 0 || len < 0 || b.length - off < len)
        throw new IndexOutOfBoundsException();
      return read(ByteBuffer.wrap(b, off, len));
    }

    private boolean refill() throws IOException {
      buf.clear();
      int read = read(position, buf);
      if (read <= 0) {
        buf.limit(0);
        return false; // EOF
      }
      buf.position(0);
      buf.limit(read);
      position += read;
      return true;
    }

    @Override
    public synchronized int read(long pos, byte[] b, int off, int len) throws IOException {
      if (b == null || off < 0 || len < 0 || b.length - off < len) {
        throw new IllegalArgumentException("arguments: " + off + " " + len);
      }
      int got = read(pos, ByteBuffer.wrap(b, off, len));
      statistics.incrementBytesRead(got);
      return got;
    }

    @Override
    public synchronized int read(ByteBuffer b) throws IOException {
      if (!b.hasRemaining())
        return 0;
      if (buf == null)
        throw new IOException("stream was closed");
      if (!buf.hasRemaining() && b.remaining() <= buf.capacity() && !refill()) {
        return -1;
      }
      ByteBuffer srcBuf = buf.duplicate();
      int got = Math.min(b.remaining(), srcBuf.remaining());
      if (got > 0) {
        srcBuf.limit(srcBuf.position() + got);
        b.put(srcBuf);
        buf.position(srcBuf.position());
        statistics.incrementBytesRead(got);
      }
      int more = read(position, b);
      if (more <= 0)
        return got > 0 ? got : -1;
      position += more;
      statistics.incrementBytesRead(more);
      buf.position(0);
      buf.limit(0);
      return got + more;
    }

    private synchronized int read(long pos, ByteBuffer b) throws IOException {
      if (pos < 0)
        throw new EOFException("position is negative");
      if (!b.hasRemaining())
        return 0;
      int got;
      int startPos = b.position();
      got = lib.jfs_pread(Thread.currentThread().getId(), fd, b, b.remaining(), pos);
      if (got == EINVAL)
        throw new IOException("stream was closed");
      if (got < 0)
        throw error(got, path);
      if (got == 0)
        return -1;
      b.position(startPos + got);
      return got;
    }

    @Override
    public synchronized void seek(long p) throws IOException {
      if (p < 0) {
        throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK);
      }
      if (buf == null)
        throw new IOException("stream was closed");
      if (p < position && p >= position - buf.limit()) {
        buf.position((int) (p - (position - buf.limit())));
      } else {
        buf.position(0);
        buf.limit(0);
        position = p;
      }
    }

    public synchronized void skipNBytes(long n) throws IOException {
      if (buf == null) {
        throw new IOException("stream was closed");
      }

      if (n <= 0) {
        return;
      }

      long np = position + n;
      if (np > fileLen) {
        throw new EOFException(String.format("Unable to skip %s bytes (position=%s, fileSize=%s): %s", n, position, fileLen, np));
      }
      position = np;
    }
    @Override
    public synchronized long skip(long n) throws IOException {
      if (n < 0)
        return -1;
      if (buf == null)
        throw new IOException("stream was closed");
      long pos = getPos();
      if (pos + n > fileLen) {
        n = fileLen - pos;
      }
      seek(pos + n);
      return n;
    }

    @Override
    public synchronized void close() throws IOException {
      if (buf == null) {
        return; // already closed
      }
      directBufferPool.returnBuffer(buf);
      buf = null;
      int r = lib.jfs_close(Thread.currentThread().getId(), fd);
      fd = 0;
      if (r < 0)
        throw error(r, path);
    }
  }

  @Override
  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
    if (needCheckPermission() && !checkPathAccess(f, FsAction.READ, "open")) {
      return superGroupFileSystem.open(f, bufferSize);
    }
    statistics.incrementReadOps(1);
    ByteBuffer fileLen = ByteBuffer.allocate(8);
    fileLen.order(ByteOrder.nativeOrder());
    int fd = lib.jfs_open(Thread.currentThread().getId(), handle, normalizePath(f), fileLen, MODE_MASK_R);
    if (fd < 0) {
      throw error(fd, f);
    }
    long len = fileLen.getLong();
    return new FSDataInputStream(new FileInputStream(f, fd, checkBufferSize(bufferSize), len));
  }

  @Override
  public void access(Path path, FsAction mode) throws IOException {
    if (needCheckPermission() && !checkPathAccess(path, mode, "access")) {
      superGroupFileSystem.access(path, mode);
      return;
    }
    int r = lib.jfs_access(Thread.currentThread().getId(), handle, normalizePath(path), mode.ordinal());
    if (r < 0)
      throw error(r, path);
  }

  /*********************************************************
   * For create()'s FSOutputStream.
   *********************************************************/
  class FSOutputStream extends OutputStream {
    private int fd;
    private Path path;

    private FSOutputStream(int fd, Path p) throws IOException {
      this.fd = fd;
      this.path = p;
    }

    @Override
    public void close() throws IOException {
      int r = lib.jfs_close(Thread.currentThread().getId(), fd);
      if (r < 0)
        throw error(r, path);
    }

    @Override
    public void flush() throws IOException {
    }

    public void hflush() throws IOException {
      int r = lib.jfs_flush(Thread.currentThread().getId(), fd);
      if (r == EINVAL)
        throw new IOException("stream was closed");
      if (r < 0)
        throw error(r, path);
    }

    public void fsync() throws IOException {
      int r = lib.jfs_fsync(Thread.currentThread().getId(), fd);
      if (r == EINVAL)
        throw new IOException("stream was closed");
      if (r < 0)
        throw error(r, path);
    }

    @Override
    public void write(byte[] b, int off, int len) throws IOException {
      if (b.length - off < len) {
        throw new IndexOutOfBoundsException();
      }
      int done = lib.jfs_write(Thread.currentThread().getId(), fd, ByteBuffer.wrap(b, off, len), len);
      if (done == EINVAL)
        throw new IOException("stream was closed");
      if (done < 0)
        throw error(done, path);
      if (done < len) {
        throw new IOException("write");
      }
    }

    @Override
    public void write(int b) throws IOException {
      int done = lib.jfs_write(Thread.currentThread().getId(), fd, ByteBuffer.wrap(new byte[]{(byte) b}), 1);
      if (done == EINVAL)
        throw new IOException("stream was closed");
      if (done < 0)
        throw error(done, path);
      if (done < 1)
        throw new IOException("write");
    }
  }

  static class BufferedFSOutputStream extends BufferedOutputStream implements Syncable {
    private String hflushMethod;
    private boolean closed;

    public BufferedFSOutputStream(OutputStream out) {
      super(out);
      hflushMethod = "writeback";
    }

    public BufferedFSOutputStream(OutputStream out, int size, String hflushMethod) {
      super(out, size);
      this.hflushMethod = hflushMethod;
    }

    public void sync() throws IOException {
      hflush();
    }

    @Override
    public synchronized void write(int b) throws IOException {
      if (closed) {
        throw new IOException("stream was closed");
      }
      super.write(b);
    }

    @Override
    public synchronized void write(byte[] b, int off, int len) throws IOException {
      if (closed) {
        throw new IOException("stream was closed");
      }
      super.write(b, off, len);
    }

    @Override
    public synchronized void flush() throws IOException {
      if (closed) {
        throw new IOException("stream was closed");
      }
      super.flush();
    }

    @Override
    public synchronized void hflush() throws IOException {
      if (closed) {
        throw new IOException("stream was closed");
      }
      flush();
      if (hflushMethod.equals("writeback")) {
        ((FSOutputStream) out).hflush();
      } else if (hflushMethod.equals("sync") || hflushMethod.equals("fsync")) {
        ((FSOutputStream) out).fsync();
      } else {
        // nothing
      }
    }

    @Override
    public synchronized void hsync() throws IOException {
      if (closed) {
        throw new IOException("stream was closed");
      }
      flush();
      ((FSOutputStream) out).fsync();
    }

    @Override
    public synchronized void close() throws IOException {
      if (closed) {
        return;
      }
      super.close();
      closed = true;
    }

    public OutputStream getOutputStream() {
      return out;
    }
  }

  static class BufferedFSOutputStreamWithStreamCapabilities extends BufferedFSOutputStream
          implements StreamCapabilities {
    public BufferedFSOutputStreamWithStreamCapabilities(OutputStream out) {
      super(out);
    }

    public BufferedFSOutputStreamWithStreamCapabilities(OutputStream out, int size, String hflushMethod) {
      super(out, size, hflushMethod);
    }

    @Override
    public boolean hasCapability(String capability) {
      return capability.equalsIgnoreCase("hsync") || capability.equalsIgnoreCase(("hflush"));
    }
  }

  @Override
  public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
    if (needCheckPermission() && !checkPathAccess(f, FsAction.WRITE, "append")) {
      return superGroupFileSystem.append(f, bufferSize, progress);
    }
    statistics.incrementWriteOps(1);
    int fd = lib.jfs_open(Thread.currentThread().getId(), handle, normalizePath(f), null, MODE_MASK_W);
    if (fd < 0)
      throw error(fd, f);
    long r = lib.jfs_lseek(Thread.currentThread().getId(), fd, 0, 2);
    if (r < 0)
      throw error((int) r, f);
    return createFsDataOutputStream(f, bufferSize, fd, r);
  }

  @Override
  public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
                                   short replication, long blockSize, Progressable progress) throws IOException {
    if (needCheckPermission() && !checkAncestorAccess(f, FsAction.WRITE, "create")) {
      if (!overwrite || !superGroupFileSystem.exists(f)) {
        return superGroupFileSystem.create(f, permission, overwrite, bufferSize, replication, blockSize, progress);
      } else if (!checkPathAccess(f, FsAction.WRITE, "create")) {
        return superGroupFileSystem.create(f, permission, overwrite, bufferSize, replication, blockSize, progress);
      }
    }
    statistics.incrementWriteOps(1);
    while (true) {
      int fd = lib.jfs_create(Thread.currentThread().getId(), handle, normalizePath(f), permission.toShort(), uMask.toShort());
      if (fd == ENOENT) {
        Path parent = makeQualified(f).getParent();
        try {
          mkdirs(parent, FsPermission.getDirDefault());
        } catch (FileAlreadyExistsException e) {
        }
        continue;
      }
      if (fd == EEXIST) {
        if (!overwrite || isDirectory(f)) {
          throw new FileAlreadyExistsException("Path already exists: " + f);
        }
        delete(f, false);
        continue;
      }
      if (fd < 0) {
        throw error(fd, makeQualified(f).getParent());
      }
      return createFsDataOutputStream(f, bufferSize, fd, 0L);
    }
  }

  private int checkBufferSize(int size) {
    if (size < minBufferSize) {
      size = minBufferSize;
    }
    return size;
  }

  @Override
  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet<CreateFlag> flag,
                                               int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
    if (needCheckPermission() && !checkAncestorAccess(f, FsAction.WRITE, "createNonRecursive")) {
      if (!flag.contains(CreateFlag.OVERWRITE) || !superGroupFileSystem.exists(f)) {
        return superGroupFileSystem.createNonRecursive(f, permission, flag, bufferSize, replication, blockSize, progress);
      } else if (!checkPathAccess(f, FsAction.WRITE, "createNonRecursive")) {
        return superGroupFileSystem.createNonRecursive(f, permission, flag, bufferSize, replication, blockSize, progress);
      }
    }
    statistics.incrementWriteOps(1);
    int fd = lib.jfs_create(Thread.currentThread().getId(), handle, normalizePath(f), permission.toShort(), uMask.toShort());
    while (fd == EEXIST) {
      if (!flag.contains(CreateFlag.OVERWRITE) || isDirectory(f)) {
        throw new FileAlreadyExistsException("File already exists: " + f);
      }
      delete(f, false);
      fd = lib.jfs_create(Thread.currentThread().getId(), handle, normalizePath(f), permission.toShort(), uMask.toShort());
    }
    if (fd < 0) {
      throw error(fd, makeQualified(f).getParent());
    }
    return createFsDataOutputStream(f, bufferSize, fd, 0L);
  }

  private FSDataOutputStream createFsDataOutputStream(Path f, int bufferSize, int fd, long startPosition) throws IOException {
    FSOutputStream out = new FSOutputStream(fd, f);
    if (withStreamCapability) {
      try {
        return new FSDataOutputStream(
                (OutputStream) constructor.newInstance(out, checkBufferSize(bufferSize), hflushMethod), statistics, startPosition);
      } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) {
        throw new RuntimeException(e);
      }
    } else {
      return new FSDataOutputStream(new BufferedFSOutputStream(out, checkBufferSize(bufferSize), hflushMethod),
              statistics, startPosition);
    }
  }

  @Override
  public FileChecksum getFileChecksum(Path f, long length) throws IOException {
    if (needCheckPermission() && !checkPathAccess(f, FsAction.READ, "getFileChecksum")) {
      return superGroupFileSystem.getFileChecksum(f, length);
    }
    statistics.incrementReadOps(1);
    if (!fileChecksumEnabled)
      return null;
    String combineMode = getConf().get("dfs.checksum.combine.mode", "MD5MD5CRC");
    if (!combineMode.equals("MD5MD5CRC"))
      return null;
    DataChecksum.Type ctype = DataChecksum.Type.valueOf(getConf().get("dfs.checksum.type", "CRC32C"));
    if (ctype.size != 4)
      return null;

    int bytesPerCrc = getConf().getInt("io.bytes.per.checksum", 512);
    DataChecksum summer = DataChecksum.newDataChecksum(ctype, bytesPerCrc);

    DataOutputBuffer checksumBuf = new DataOutputBuffer();
    DataOutputBuffer crcBuf = new DataOutputBuffer();
    byte[] buf = new byte[bytesPerCrc];
    FSDataInputStream in = open(f, 1 << 20);
    boolean eof = false;
    long got = 0;
    while (got < length && !eof) {
      for (int i = 0; i < blocksize / bytesPerCrc && got < length; i++) {
        int n;
        if (length < bytesPerCrc) {
          n = in.read(buf, 0, (int) length);
        } else {
          n = in.read(buf);
        }
        if (n <= 0) {
          eof = true;
          break;
        } else {
          summer.update(buf, 0, n);
          summer.writeValue(crcBuf, true);
          got += n;
        }
      }
      if (crcBuf.getLength() > 0) {
        MD5Hash blockMd5 = MD5Hash.digest(crcBuf.getData(), 0, crcBuf.getLength());
        blockMd5.write(checksumBuf);
        crcBuf.reset();
      }
    }
    in.close();
    if (checksumBuf.getLength() == 0) { // empty file
      return new MD5MD5CRC32GzipFileChecksum(0, 0, MD5Hash.digest(new byte[32]));
    }
    MD5Hash md5 = MD5Hash.digest(checksumBuf.getData());
    long crcPerBlock = 0;
    if (got > blocksize) { // more than one block
      crcPerBlock = blocksize / bytesPerCrc;
    }
    if (ctype == DataChecksum.Type.CRC32C) {
      return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCrc, crcPerBlock, md5);
    } else {
      return new MD5MD5CRC32GzipFileChecksum(bytesPerCrc, crcPerBlock, md5);
    }
  }

  @Override
  public void concat(final Path dst, final Path[] srcs) throws IOException {
    if (needCheckPermission()) {
      access(dst.getParent(), FsAction.WRITE);
      access(dst, FsAction.WRITE);
      for (Path src : srcs) {
        access(src, FsAction.READ);
      }
      superGroupFileSystem.concat(dst, srcs);
      return;
    }
    statistics.incrementWriteOps(1);
    if (srcs.length == 0) {
      throw new IllegalArgumentException("No sources given");
    }
    Path dp = makeQualified(dst).getParent();
    for (Path src : srcs) {
      if (!makeQualified(src).getParent().equals(dp)) {
        throw new HadoopIllegalArgumentException("Source file " + normalizePath(src)
                + " is not in the same directory with the target "
                + normalizePath(dst));
      }
    }
    byte[][] srcbytes = new byte[srcs.length][];
    int bufsize = 0;
    for (int i = 0; i < srcs.length; i++) {
      srcbytes[i] = normalizePath(srcs[i]).getBytes("UTF-8");
      bufsize += srcbytes[i].length + 1;
    }
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), bufsize);
    long offset = 0;
    for (int i = 0; i < srcs.length; i++) {
      buf.put(offset, srcbytes[i], 0, srcbytes[i].length);
      buf.putByte(offset + srcbytes[i].length, (byte) 0);
      offset += srcbytes[i].length + 1;
    }
    int r = lib.jfs_concat(Thread.currentThread().getId(), handle, normalizePath(dst), buf, bufsize);
    if (r < 0) {
      if (r == ENOENT) {
        if (!exists(dst)) {
          throw error(r, dst);
        } else {
          throw new FileNotFoundException("one of srcs is missing");
        }
      }
      throw error(r, dst);
    }
  }

  @Override
  public boolean rename(Path src, Path dst) throws IOException {
    if (needCheckPermission()) {
      if (!superGroupFileSystem.exists(src)) {
        return false;
      }
      access(src.getParent(), FsAction.WRITE);
      Path dstAncestor = rangerPermissionChecker.getAncestor(dst).getPath();
      access(dstAncestor, FsAction.WRITE);
      return superGroupFileSystem.rename(src, dst);
    }
    statistics.incrementWriteOps(1);
    String srcStr = makeQualified(src).toUri().getPath();
    String dstStr = makeQualified(dst).toUri().getPath();
    if (src.equals(dst)) {
      FileStatus st = getFileStatus(src);
      return st.isFile();
    }
    if (dstStr.startsWith(srcStr) && (dstStr.charAt(srcStr.length()) == Path.SEPARATOR_CHAR)) {
      return false;
    }
    int r = lib.jfs_rename(Thread.currentThread().getId(), handle, normalizePath(src), normalizePath(dst));
    if (r == EEXIST) {
      try {
        FileStatus st = getFileStatus(dst);
        if (st.isDirectory()) {
          dst = new Path(dst, src.getName());
          r = lib.jfs_rename(Thread.currentThread().getId(), handle, normalizePath(src), normalizePath(dst));
        } else {
          return false;
        }
      } catch (FileNotFoundException ignored) {
      }
    }
    if (r == ENOENT || r == EEXIST)
      return false;
    if (r == EACCESS) {
      this.access(makeQualified(src).getParent(), FsAction.WRITE.or(FsAction.EXECUTE));
      this.access(makeQualified(dst).getParent(), FsAction.WRITE.or(FsAction.EXECUTE));
    }
    if (r < 0)
      throw error(r, src);
    return true;
  }

  @Override
  public boolean truncate(Path f, long newLength) throws IOException {
    if (needCheckPermission() && !checkPathAccess(f, FsAction.WRITE, "truncate")) {
      return superGroupFileSystem.truncate(f, newLength);
    }
    int r = lib.jfs_truncate(Thread.currentThread().getId(), handle, normalizePath(f), newLength);
    if (r < 0)
      throw error(r, f);
    return true;
  }

  private boolean rmr(Path p) throws IOException {
    int r = lib.jfs_rmr(Thread.currentThread().getId(), handle, normalizePath(p));
    if (r == ENOENT) {
      return false;
    }
    if (r < 0) {
      throw error(r, p);
    }
    return true;
  }

  @Override
  public boolean delete(Path p, boolean recursive) throws IOException {
    if (needCheckPermission()) {
      try {
        if (!checkParentPathAccess(p, FsAction.WRITE_EXECUTE, "delete")) {
          return superGroupFileSystem.delete(p, recursive);
        }
      } catch (Exception e) {
        if (!checkPathAccess(p, FsAction.WRITE_EXECUTE, "delete")) {
          return superGroupFileSystem.delete(p, recursive);
        }
      }
    }
    statistics.incrementWriteOps(1);
    if (recursive)
      return rmr(p);
    int r = lib.jfs_delete(Thread.currentThread().getId(), handle, normalizePath(p));
    if (r == ENOENT) {
      return false;
    }
    if (r < 0) {
      throw error(r, p);
    }
    return true;
  }

  @Override
  public ContentSummary getContentSummary(Path f) throws IOException {
    if (needCheckPermission() && !checkPathAccess(f, FsAction.READ_EXECUTE, "getContentSummary")) {
      return superGroupFileSystem.getContentSummary(f);
    }
    statistics.incrementReadOps(1);
    String path = normalizePath(f);
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 40);
    int r = lib.jfs_summary(Thread.currentThread().getId(), handle, path, buf);
    if (r < 0) {
      throw error(r, f);
    }
    long size = buf.getLongLong(0);
    long files = buf.getLongLong(8);
    long dirs = buf.getLongLong(16);
    long quota = buf.getLongLong(24);
    long spaceQuota = buf.getLongLong(32);
    quota = quota == 0 ? -1L : quota;
    spaceQuota = spaceQuota == 0 ? -1L : spaceQuota;
    return new ContentSummary(size, files, dirs, quota, size, spaceQuota);
  }

  private FileStatus newFileStatus(Path p, Pointer buf, int size, boolean readlink) throws IOException {
    int mode = buf.getInt(0);
    boolean isdir = ((mode >>> 31) & 1) == 1; // Go
    int stickybit = (mode >>> 20) & 1;
    boolean hasAcl = (mode >> 18 & 1) == 1;
    FsPermission perm = new FsPermission((short) ((mode & 0777) | (stickybit << 9)));
    perm = new FsPermissionExtension(perm, hasAcl, false);
    long length = buf.getLongLong(4);
    long mtime = buf.getLongLong(12);
    long atime = buf.getLongLong(20);
    String user = buf.getString(28);
    String group = buf.getString(28 + user.length() + 1);
    assert (30 + user.length() + group.length() == size);

    if (fileStatusConstructor == null) {
      return new FileStatus(length, isdir, 1, blocksize, mtime, atime, perm, user, group, p);
    } else {
      try {
        return fileStatusConstructor.newInstance(length, isdir, 1, blocksize, mtime, atime, perm, user, group, null, p, hasAcl, false, false);
      } catch (Exception e) {
        throw new IOException("construct fileStatus failed", e);
      }
    }
  }

  @Override
  public FileStatus[] listStatus(Path f) throws IOException {
    if (needCheckPermission() && !checkPathAccess(f, FsAction.READ_EXECUTE, "listStatus")) {
      return superGroupFileSystem.listStatus(f);
    }
    statistics.incrementReadOps(1);
    int bufsize = 32 << 10;
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), bufsize); // TODO: smaller buff
    String path = normalizePath(f);
    int r = lib.jfs_listdir(Thread.currentThread().getId(), handle, path, 0, buf, bufsize);
    if (r == ENOENT) {
      throw new FileNotFoundException(f.toString());
    }
    if (r == ENOTDIR) {
      return new FileStatus[]{getFileStatus(f)};
    }

    FileStatus[] results;
    results = new FileStatus[1024];
    int j = 0;
    while (r > 0) {
      long offset = 0;
      while (offset < r) {
        int len = buf.getByte(offset) & 0xff;
        byte[] name = new byte[len];
        buf.get(offset + 1, name, 0, len);
        offset += 1 + len;
        int size = buf.getByte(offset) & 0xff;
        if (j == results.length) {
          FileStatus[] rs = new FileStatus[results.length * 2];
          System.arraycopy(results, 0, rs, 0, j);
          results = rs;
        }
        Path p = makeQualified(new Path(f, new String(name)));
        FileStatus st = newFileStatus(p, buf.slice(offset + 1), size, false);
        results[j] = st;
        offset += 1 + size;
        j++;
      }
      int left = buf.getInt(offset);
      if (left == 0)
        break;
      int fd = buf.getInt(offset + 4);
      r = lib.jfs_listdir(Thread.currentThread().getId(), fd, path, j, buf, bufsize);
    }
    if (r < 0) {
      throw error(r, f);
    }
    statistics.incrementReadOps(j);

    FileStatus[] sorted = Arrays.copyOf(results, j);
    Arrays.sort(sorted, (p1, p2) -> p1.getPath().compareTo(p2.getPath()));
    return sorted;
  }

  @Override
  public void setWorkingDirectory(Path newDir) {
    workingDir = fixRelativePart(newDir);
    checkPath(workingDir);
  }

  @Override
  public Path getWorkingDirectory() {
    return workingDir;
  }

  @Override
  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
    if (needCheckPermission() && !checkAncestorAccess(f, FsAction.WRITE, "mkdirs")) {
      return superGroupFileSystem.mkdirs(f, permission);
    }
    statistics.incrementWriteOps(1);
    if (f == null) {
      throw new IllegalArgumentException("mkdirs path arg is null");
    }
    String path = normalizePath(f);
    if ("/".equals(path))
      return true;
    int r = lib.jfs_mkdir(Thread.currentThread().getId(), handle, path, permission.toShort(), uMask.toShort());
    if (r == 0 || r == EEXIST && !isFile(f)) {
      return true;
    } else if (r == ENOENT) {
      Path parent = makeQualified(f).getParent();
      if (parent != null) {
        return mkdirs(parent, permission) && mkdirs(f, permission);
      }
    }
    throw error(r, makeQualified(f).getParent());
  }

  @Override
  public FileStatus getFileStatus(Path f) throws IOException {
    if (needCheckPermission() && !checkParentPathAccess(f, FsAction.EXECUTE, "getFileStatus")) {
      return superGroupFileSystem.getFileStatus(f);
    }
    statistics.incrementReadOps(1);
    try {
      return getFileStatusInternal(f, true);
    } catch (ParentNotDirectoryException e) {
      throw new FileNotFoundException(f.toString());
    }
  }

  private FileStatus getFileStatusInternal(final Path f, boolean dereference) throws IOException {
    String path = normalizePath(f);
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 130);
    int r;
    if (dereference) {
      r = lib.jfs_stat1(Thread.currentThread().getId(), handle, path, buf);
    } else {
      r = lib.jfs_lstat1(Thread.currentThread().getId(), handle, path, buf);
    }
    if (r < 0) {
      throw error(r, f);
    }
    return newFileStatus(makeQualified(f), buf, r, !dereference);
  }

  private FileStatus getFileStatusInternalNoException(final Path f) throws IOException {
    String path = normalizePath(f);
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 130);
    int r = lib.jfs_lstat1(Thread.currentThread().getId(), handle, path, buf);
    if (r < 0) {
      return null;
    }
    return newFileStatus(makeQualified(f), buf, r, false);
  }

  @Override
  public boolean supportsSymlinks() {
    return false;
  }

  @Override
  public FsStatus getStatus(Path p) throws IOException {
    if (needCheckPermission() && !checkParentPathAccess(p, FsAction.EXECUTE, "getStatus")) {
      return superGroupFileSystem.getStatus(p);
    }
    statistics.incrementReadOps(1);
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 16);
    int r = lib.jfs_statvfs(Thread.currentThread().getId(), handle, buf);
    if (r != 0)
      throw error(r, p);
    long capacity = buf.getLongLong(0);
    long remaining = buf.getLongLong(8);
    return new FsStatus(capacity, capacity - remaining, remaining);
  }

  @Override
  public void setPermission(Path p, FsPermission permission) throws IOException {
    if (needCheckPermission() && !checkOwner(p, "setPermission")) {
      superGroupFileSystem.setPermission(p, permission);
      return;
    }
    statistics.incrementWriteOps(1);
    int r = lib.jfs_chmod(Thread.currentThread().getId(), handle, normalizePath(p), permission.toShort());
    if (r != 0)
      throw error(r, p);
  }

  @Override
  public void setOwner(Path p, String username, String groupname) throws IOException {
    if (needCheckPermission()) {
      if (username == null) {
        throw new AccessControlException(
            "User can not be null");
      }
      if (!superuser.equals(username)) {
        throw new AccessControlException(
            "Only SuperUser can do setOwner Action, the current user is " + username);
      }
      superGroupFileSystem.setOwner(p, username, groupname);
      return;
    }
    statistics.incrementWriteOps(1);
    int r = lib.jfs_setOwner(Thread.currentThread().getId(), handle, normalizePath(p), username, groupname);
    if (r != 0)
      throw error(r, p);
  }

  @Override
  public void setTimes(Path p, long mtime, long atime) throws IOException {
    if (needCheckPermission() && !checkPathAccess(p, FsAction.WRITE, "setTimes")) {
      superGroupFileSystem.setTimes(p, mtime, atime);
      return;
    }
    statistics.incrementWriteOps(1);
    int r = lib.jfs_utime(Thread.currentThread().getId(), handle, normalizePath(p), mtime >= 0 ? mtime : -1,
        atime >= 0 ? atime : -1);
    if (r != 0)
      throw error(r, p);
  }

  @Override
  public void close() throws IOException {
    super.close();
    RangerPermissionChecker.release(name, handle);
    BgTaskUtil.unregister(name, handle, () -> {
      cachedHostsForName.clear();
      hashForName.clear();
      lastFileStatus.clear();
    });
    LOG.debug("close {}({})", name, handle);
    lib.jfs_term(Thread.currentThread().getId(), handle);
    if (metricsEnable) {
      JuiceFSInstrumentation.close();
    }
  }

  @Override
  public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag) throws IOException {
    if (needCheckPermission() && !checkPathAccess(path, FsAction.WRITE, "setXAttr")) {
      superGroupFileSystem.setXAttr(path, name, value, flag);
      return;
    }
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), value.length);
    buf.put(0, value, 0, value.length);
    int mode = 0; // create or replace
    if (flag.contains(XAttrSetFlag.CREATE) && flag.contains(XAttrSetFlag.REPLACE)) {
      mode = 0;
    } else if (flag.contains(XAttrSetFlag.CREATE)) {
      mode = 1;
    } else if (flag.contains(XAttrSetFlag.REPLACE)) {
      mode = 2;
    }
    int r = lib.jfs_setXattr(Thread.currentThread().getId(), handle, normalizePath(path), name, buf, value.length,
        mode);
    if (r < 0)
      throw error(r, path);
  }

  @Override
  public byte[] getXAttr(Path path, String name) throws IOException {
    if (needCheckPermission() && !checkPathAccess(path, FsAction.READ, "getXAttr")) {
      return superGroupFileSystem.getXAttr(path, name);
    }
    Pointer buf;
    int bufsize = 16 << 10;
    int r;
    do {
      bufsize *= 2;
      buf = Memory.allocate(Runtime.getRuntime(lib), bufsize);
      r = lib.jfs_getXattr(Thread.currentThread().getId(), handle, normalizePath(path), name, buf, bufsize);
    } while (r == bufsize);
    if (r == ENOATTR || r == ENODATA)
      return null; // attr not found
    if (r < 0)
      throw error(r, path);
    byte[] value = new byte[r];
    buf.get(0, value, 0, r);
    return value;
  }

  @Override
  public Map<String, byte[]> getXAttrs(Path path) throws IOException {
    return getXAttrs(path, listXAttrs(path));
  }

  @Override
  public Map<String, byte[]> getXAttrs(Path path, List<String> names) throws IOException {
    if (needCheckPermission() && !checkPathAccess(path, FsAction.READ, "getXAttrs")) {
      return superGroupFileSystem.getXAttrs(path, names);
    }
    Map<String, byte[]> result = new HashMap<String, byte[]>();
    for (String n : names) {
      byte[] value = getXAttr(path, n);
      if (value != null) {
        result.put(n, value);
      }
    }
    return result;
  }

  @Override
  public List<String> listXAttrs(Path path) throws IOException {
    if (needCheckPermission() && !checkPathAccess(path, FsAction.READ, "listXAttrs")) {
      return superGroupFileSystem.listXAttrs(path);
    }
    Pointer buf;
    int bufsize = 1024;
    int r;
    do {
      bufsize *= 2;
      buf = Memory.allocate(Runtime.getRuntime(lib), bufsize);
      r = lib.jfs_listXattr(Thread.currentThread().getId(), handle, normalizePath(path), buf, bufsize);
    } while (r == bufsize);
    if (r < 0)
      throw error(r, path);
    List<String> result = new ArrayList<String>();
    int off = 0, last = 0;
    while (off < r) {
      if (buf.getByte(off) == 0) {
        byte[] arr = new byte[off - last];
        buf.get(last, arr, 0, arr.length);
        result.add(new String(arr));
        last = off + 1;
      }
      off++;
    }
    return result;
  }

  @Override
  public void removeXAttr(Path path, String name) throws IOException {
    if (needCheckPermission() && !checkPathAccess(path, FsAction.WRITE, "removeXAttr")) {
      superGroupFileSystem.removeXAttr(path, name);
      return;
    }
    int r = lib.jfs_removeXattr(Thread.currentThread().getId(), handle, normalizePath(path), name);
    if (r == ENOATTR || r == ENODATA) {
      throw new IOException("No matching attributes found for remove operation");
    }
    if (r < 0)
      throw error(r, path);
  }

  @Override
  public void modifyAclEntries(Path path, List<AclEntry> aclSpec) throws IOException {
    if (needCheckPermission() && !checkOwner(path, "modifyAclEntries")) {
      superGroupFileSystem.modifyAclEntries(path, aclSpec);
      return;
    }
    List<AclEntry> existingEntries = getAllAclEntries(path);
    List<AclEntry> newAcl = AclTransformation.mergeAclEntries(existingEntries, aclSpec);
    setAclInternal(path, newAcl);
  }

  @Override
  public void removeAclEntries(Path path, List<AclEntry> aclSpec) throws IOException {
    if (needCheckPermission() && !checkOwner(path, "removeAclEntries")) {
      superGroupFileSystem.removeAclEntries(path, aclSpec);
      return;
    }
    List<AclEntry> existingEntries = getAllAclEntries(path);
    List<AclEntry> newAcl = AclTransformation.filterAclEntriesByAclSpec(existingEntries, aclSpec);
    setAclInternal(path, newAcl);
  }

  @Override
  public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException {
    if (needCheckPermission() && !checkOwner(path, "setAcl")) {
      superGroupFileSystem.setAcl(path, aclSpec);
      return;
    }
    List<AclEntry> existingEntries = getAllAclEntries(path);
    List<AclEntry> newAcl = AclTransformation.replaceAclEntries(existingEntries, aclSpec);
    setAclInternal(path, newAcl);
  }

  private void setAclInternal(Path path, List<AclEntry> aclSpec) throws IOException {
    List<AclEntry> aclEntries = AclTransformation.buildAndValidateAcl(Lists.newArrayList(aclSpec));
    ScopedAclEntries scoped = new ScopedAclEntries(aclEntries);
    setAclInternal(path, AclEntryScope.ACCESS, scoped.getAccessEntries());
    setAclInternal(path, AclEntryScope.DEFAULT, scoped.getDefaultEntries());
  }

  private void removeAclInternal(Path path, AclEntryScope scope) throws IOException {
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 6 * 2);
    buf.putShort(0, (short) -1);
    buf.putShort(2, (short) -1);
    buf.putShort(4, (short) -1);
    buf.putShort(6, (short) -1);
    buf.putShort(8, (short) 0);
    buf.putShort(10, (short) 0);
    int r = lib.jfs_setfacl(Thread.currentThread().getId(), handle, normalizePath(path), scope.ordinal() + 1, buf,
        6 * 2);
    if (r == ENOATTR || r == ENODATA)
      return;
    if (r < 0)
      throw error(r, path);
  }

  @Override
  public void removeDefaultAcl(Path path) throws IOException {
    if (needCheckPermission() && !checkOwner(path, "removeDefaultAcl")) {
      superGroupFileSystem.removeDefaultAcl(path);
      return;
    }
    removeAclInternal(path, AclEntryScope.DEFAULT);
  }

  @Override
  public void removeAcl(Path path) throws IOException {
    if (needCheckPermission() && !checkOwner(path, "removeAcl")) {
      superGroupFileSystem.removeAcl(path);
      return;
    }
    removeAclInternal(path, AclEntryScope.ACCESS);
    removeAclInternal(path, AclEntryScope.DEFAULT);
  }

  private void setAclInternal(Path path, AclEntryScope scope, List<AclEntry> aclSpec) throws IOException {
    if (aclSpec.size() == 0)
      return;
    short userperm = -1, groupperm = -1, otherperm = -1, maskperm = -1;
    short namedusers = 0, namedgroups = 0;
    int namedaclsize = 0;
    for (AclEntry e : aclSpec) {
      if (e.getName() != null) {
        if (e.getType() == AclEntryType.USER) {
          namedusers++;
        } else {
          namedgroups++;
        }
        namedaclsize += e.getName().getBytes("utf8").length + 2;
      } else {
        short perm = (short) e.getPermission().ordinal();
        switch (e.getType()) {
          case USER:
            userperm = perm;
            break;
          case GROUP:
            groupperm = perm;
            break;
          case OTHER:
            otherperm = perm;
            break;
          case MASK:
            maskperm = perm;
            break;
        }
      }
    }
    Pointer buf = Memory.allocate(Runtime.getRuntime(lib), 12 + namedaclsize);
    buf.putShort(0, userperm);
    buf.putShort(2, groupperm);
    buf.putShort(4, otherperm);
    buf.putShort(6, maskperm);
    buf.putShort(8, namedusers);
    buf.putShort(10, namedgroups);
    int off = 12;
    for (AclEntry e : aclSpec) {
      String name = e.getName();
      if (name != null && e.getType() == AclEntryType.USER) {
        byte[] nb = name.getBytes("utf8");
        buf.putByte(off, (byte) nb.length);
        buf.put(off + 1, nb, 0, nb.length);
        off += 1 + nb.length;
        buf.putByte(off, (byte) e.getPermission().ordinal());
        off += 1;
      }
    }
    for (AclEntry e : aclSpec) {
      String name = e.getName();
      if (name != null && e.getType() == AclEntryType.GROUP) {
        byte[] nb = name.getBytes("utf8");
        buf.putByte(off, (byte) nb.length);
        buf.put(off + 1, nb, 0, nb.length);
        off += 1 + nb.length;
        buf.putByte(off, (byte) e.getPermission().ordinal());
        off += 1;
      }
    }
    int r = lib.jfs_setfacl(Thread.currentThread().getId(), handle, normalizePath(path), scope.ordinal() + 1, buf,
        12 + namedaclsize);
    if (r == ENOTSUP) {
      throw new IOException("Invalid ACL: only directories may have a default ACL");
    }
    if (r < 0)
      throw error(r, path);
  }

  private List<AclEntry> getAclEntries(Path path, AclEntryScope scope) throws IOException {
    int bufsize = 1024;
    int r;
    Pointer buf;
    do {
      bufsize *= 2;
      buf = Memory.allocate(Runtime.getRuntime(lib), bufsize);
      r = lib.jfs_getfacl(Thread.currentThread().getId(), handle, normalizePath(path), scope.ordinal() + 1, buf,
          bufsize);
    } while (r == -100);
    if (r == ENOATTR || r == ENODATA) {
      return Lists.newArrayList();
    }
    if (r < 0)
      throw error(r, path);

    int off = 0;
    short userperm = buf.getShort(0);
    short groupperm = buf.getShort(2);
    short otherperm = buf.getShort(4);
    short maskperm = buf.getShort(6);
    short namedusers = buf.getShort(8);
    short namedgroups = buf.getShort(10);
    off += 12;

    List<AclEntry> entries = new ArrayList<>();
    AclEntry.Builder builder = new AclEntry.Builder().setScope(scope);
    if (userperm != -1) {
      entries.add(builder.setType(AclEntryType.USER).setPermission(FsAction.values()[userperm]).build());
    }
    if (groupperm != -1) {
      entries.add(builder.setType(AclEntryType.GROUP).setPermission(FsAction.values()[groupperm]).build());
    }
    if (otherperm != -1) {
      entries.add(builder.setType(AclEntryType.OTHER).setPermission(FsAction.values()[otherperm]).build());
    }
    if (maskperm != -1) {
      entries.add(builder.setType(AclEntryType.MASK).setPermission(FsAction.values()[maskperm]).build());
    }

    for (int i = 0; i < namedusers + namedgroups; i++) {
      String name = buf.getString(off);
      off += name.length() + 1;
      short perm = buf.getShort(off);
      off += 2;
      entries.add(builder.setType(i < namedusers ? AclEntryType.USER : AclEntryType.GROUP).setName(name)
          .setPermission(FsAction.values()[perm]).build());
    }
    Collections.sort(entries, AclTransformation.ACL_ENTRY_COMPARATOR);
    return entries;
  }

  /**
   * include acl entries from permission
   */
  private List<AclEntry> getAllAclEntries(Path path) throws IOException {
    List<AclEntry> entries = getAclEntries(path, AclEntryScope.ACCESS);
    if (entries.size() == 0) {
      FsPermission perm = getFileStatus(path).getPermission();
      entries = AclUtil.getAclFromPermAndEntries(perm, entries);
    }
    entries.addAll(getAclEntries(path, AclEntryScope.DEFAULT));
    return entries;
  }

  /**
   * exclude acl entries from permission
   */
  private List<AclEntry> getAclEntries(Path path) throws IOException {
    List<AclEntry> res = new ArrayList<>();
    List<AclEntry> accessEntries = getAclEntries(path, AclEntryScope.ACCESS);
    // minimal 3 acls for ugo
    if (accessEntries.size() != 0 && accessEntries.size() != 3) {
      res.addAll(accessEntries.subList(1, accessEntries.size() - 2));
    }
    res.addAll(getAclEntries(path, AclEntryScope.DEFAULT));
    return res;
  }

  @Override
  public AclStatus getAclStatus(Path path) throws IOException {
    if (needCheckPermission() && !checkOwner(path, "getAclStatus")) {
      return superGroupFileSystem.getAclStatus(path);
    }
    FileStatus st = getFileStatus(path);
    List<AclEntry> entries = getAclEntries(path);
    AclStatus.Builder builder = new AclStatus.Builder().owner(st.getOwner()).group(st.getGroup())
        .stickyBit(st.getPermission().getStickyBit()).addEntries(entries);
    try {
      Class<AclStatus.Builder> ab = AclStatus.Builder.class;
      Method abm = ab.getDeclaredMethod("setPermission", FsPermission.class);
      abm.setAccessible(true);
      abm.invoke(builder, st.getPermission());
    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException ignored) {
    }
    return builder.build();
  }

  public AuthCredential buildAuthCredential(String spn) throws IOException {
    // auth use kerberos
    if (UserGroupInformation.getLoginUser().hasKerberosCredentials()) {
      dtEnabled = true;
      byte[] cred;
      try {
        cred = KerberosUtil.genApReq(spn);
      } catch (InterruptedException e) {
        throw new IOException("generate kerberos  AP-REQ failed", e);
      }
      return new AuthCredential("kerberos", cred);
    }

    // auth use delegation token
    for (Token<? extends TokenIdentifier> token : ugi.getCredentials().getAllTokens()) {
      if (token.getKind().equals(JuiceFSDelegationTokenIdentifier.TOKEN_KIND) &&
          buildServiceName().equals(token.getService().toString())) {
        dtEnabled = true;

        AbstractDelegationTokenIdentifier identifier = (AbstractDelegationTokenIdentifier) token.decodeIdentifier();
        int id = identifier.getMasterKeyId();
        byte[] password = token.getPassword();
        ByteBuffer buf = ByteBuffer.allocate(8 + password.length);
        buf.putInt(id);
        buf.putInt(password.length);
        buf.put(password);

        return new AuthCredential("token", buf.array());
      }
    }

    return null;
  }

  private String buildServiceName() {
    return getScheme() + "://" + (name == null ? "/" : name);
  }

  @Override
  public String getCanonicalServiceName() {
    return dtEnabled ? buildServiceName() : null;
  }

  @Override
  public Token<?> getDelegationToken(String renewer) throws IOException {
    if (!dtEnabled) {
      return null;
    }
    String owner = ugi.getShortUserName();
    String realUser = ugi.getRealUser() != null ? ugi.getRealUser().getShortUserName() : null;
    int tokenSize = 0, r = 8<<10;
    Pointer tokenBuf = null;
    while (r > tokenSize) {
      tokenSize = r;
      tokenBuf = Memory.allocate(Runtime.getRuntime(lib), tokenSize);
      r = lib.jfs_get_token(handle, name, tokenBuf, tokenSize, (new HadoopKerberosName(renewer)).getShortName());
    }
    if (r < 0) {
      throw new IOException(String.format("get delegation token failed, return code %d", r));
    }
    int id = tokenBuf.getInt(0);
    long issueDate = tokenBuf.getLongLong(4);
    long maxDate = tokenBuf.getLongLong(12);
    int pwdLen = r - 20;
    byte[] pwd = new byte[pwdLen];
    tokenBuf.get(20, pwd, 0, pwdLen);

    JuiceFSDelegationTokenIdentifier identifier =
        new JuiceFSDelegationTokenIdentifier(
            owner,
            renewer,
            realUser);
    identifier.setIssueDate(issueDate);
    identifier.setMaxDate(maxDate);
    identifier.setMasterKeyId(id);

    return new Token<>(
        identifier.getBytes(),
        pwd,
        identifier.getKind(),
        new Text(getCanonicalServiceName()));
  }

  public long renewToken(Token<?> token) throws IOException {
    AbstractDelegationTokenIdentifier identifier = (AbstractDelegationTokenIdentifier) token.decodeIdentifier();
    int id = identifier.getMasterKeyId();
    String pwd = new String(token.getPassword(), StandardCharsets.UTF_8);
    long r = lib.jfs_renew_token(handle, id, pwd);
    if (r == EACCESS) {
      throw new IOException("permission denied");
    }
    if (r < 0) {
      throw new IOException(String.format("renew token failed, return code %d", r));
    }
    return r * 1000;
  }

  public void cancelToken(Token<?> token) throws IOException {
    AbstractDelegationTokenIdentifier identifier = (AbstractDelegationTokenIdentifier) token.decodeIdentifier();
    int id = identifier.getMasterKeyId();
    String pwd = new String(token.getPassword(), StandardCharsets.UTF_8);
    int r = lib.jfs_cancel_token(handle, id, pwd);
    if (r == EACCESS) {
      throw new IOException("permission denied");
    }
    if (r < 0) {
      throw new IOException(String.format("cancel token failed, return code %d", r));
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/KiteDataLoader.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetOperationException;
import org.kitesdk.data.spi.*;
import org.kitesdk.data.spi.filesystem.FileSystemDatasetRepository;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;

public class KiteDataLoader implements Loadable {
  private static class URIBuilder implements OptionBuilder<DatasetRepository> {

    @Override
    public DatasetRepository getFromOptions(Map<String, String> match) {
      String path = match.get("path");
      final Path root = (path == null || path.isEmpty()) ?
              new Path("/") : new Path("/", path);

      Configuration conf = DefaultConfiguration.get();
      FileSystem fs;
      try {
        fs = FileSystem.get(fileSystemURI(match), conf);
      } catch (IOException e) {
        throw new DatasetIOException("Could not get a FileSystem", e);
      }
      return new FileSystemDatasetRepository.Builder()
              .configuration(new Configuration(conf)) // make a modifiable copy
              .rootDirectory(fs.makeQualified(root))
              .build();
    }
  }

  @Override
  public void load() {
    try {
      // load hdfs-site.xml by loading HdfsConfiguration
      FileSystem.getLocal(DefaultConfiguration.get());
    } catch (IOException e) {
      throw new DatasetIOException("Cannot load default config", e);
    }

    OptionBuilder<DatasetRepository> builder = new URIBuilder();
    Registration.register(
            new URIPattern("jfs:/*path"),
            new URIPattern("jfs:/*path/:namespace/:dataset"),
            builder);
  }

  private static URI fileSystemURI(Map<String, String> match) {
    try {
      return new URI(match.get(URIPattern.SCHEME), null,
              match.get(URIPattern.HOST), -1, "/", null, null);
    } catch (URISyntaxException ex) {
      throw new DatasetOperationException("[BUG] Could not build FS URI", ex);
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/Main.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import io.juicefs.bench.NNBench;
import io.juicefs.bench.TestDFSIO;
import io.juicefs.tools.RangerDownloader;
import org.apache.commons.cli.ParseException;

import java.io.Closeable;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

public class Main {
  private static final Map<String, Command> COMMAND = new HashMap<>();

  @Parameter(names = {"--help", "-h", "-help"}, help = true)
  private boolean help = false;

  public abstract static class Command implements Closeable {
    @Parameter(names = {"--help", "-h", "-help"}, help = true)
    public boolean help;

    public Command() {
      COMMAND.put(getCommand(), this);
    }

    public abstract void init() throws IOException;

    public abstract void run() throws IOException;

    public abstract String getCommand();

  }

  public static void main(String[] args) throws ParseException, IOException {
    Main main = new Main();
    Command dfsio = new TestDFSIO();
    Command nnbench = new NNBench();
    Command ranger = new RangerDownloader();
    JCommander jc = JCommander.newBuilder()
        .addObject(main)
        .addCommand(dfsio.getCommand(), dfsio)
        .addCommand(nnbench.getCommand(), nnbench)
        .addCommand(ranger.getCommand(), ranger)
        .build();
    jc.parse(args);

    if (main.help) {
      jc.usage();
      return;
    }

    Command command = COMMAND.get(jc.getParsedCommand());
    if (command.help) {
      jc.getCommands().get(jc.getParsedCommand()).usage();
      return;
    }
    command.init();
    command.run();
    command.close();
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/bench/AccumulatingReducer.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.bench;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

import java.io.IOException;
import java.util.Iterator;

/**
 * Reducer that accumulates values based on their type.
 * <p>
 * The type is specified in the key part of the key-value pair
 * as a prefix to the key in the following way
 * <p>
 * <tt>type:key</tt>
 * <p>
 * The values are accumulated according to the types:
 * <ul>
 * <li><tt>s:</tt> - string, concatenate</li>
 * <li><tt>f:</tt> - float, summ</li>
 * <li><tt>l:</tt> - long, summ</li>
 * </ul>
 */
@SuppressWarnings("deprecation")
public class AccumulatingReducer extends MapReduceBase
        implements Reducer<Text, Text, Text, Text> {
  static final String VALUE_TYPE_LONG = "l:";
  static final String VALUE_TYPE_FLOAT = "f:";
  static final String VALUE_TYPE_STRING = "s:";
  private static final Log LOG = LogFactory.getLog(AccumulatingReducer.class);

  protected String hostName;

  public AccumulatingReducer() {
    try {
      hostName = java.net.InetAddress.getLocalHost().getHostName();
    } catch (Exception e) {
      hostName = "localhost";
    }
    LOG.info("Starting AccumulatingReducer on " + hostName);
  }

  @Override
  public void reduce(Text key,
                     Iterator<Text> values,
                     OutputCollector<Text, Text> output,
                     Reporter reporter
  ) throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + hostName);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
      StringBuffer sSum = new StringBuffer();
      while (values.hasNext())
        sSum.append(values.next().toString()).append(";");
      output.collect(key, new Text(sSum.toString()));
      reporter.setStatus("finished " + field + " ::host = " + hostName);
      return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
      float fSum = 0;
      while (values.hasNext())
        fSum += Float.parseFloat(values.next().toString());
      output.collect(key, new Text(String.valueOf(fSum)));
      reporter.setStatus("finished " + field + " ::host = " + hostName);
      return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
      long lSum = 0;
      while (values.hasNext()) {
        lSum += Long.parseLong(values.next().toString());
      }
      output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + hostName);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/bench/IOMapperBase.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.bench;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

import java.io.Closeable;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;

public abstract class IOMapperBase extends Configured
        implements Mapper<Text, LongWritable, Text, Text> {
  private static final Log LOG = LogFactory.getLog(IOMapperBase.class);

  protected String hostName;
  protected Closeable stream;
  protected int threadsPerMap;
  protected int filesPerThread;
  protected ExecutorService pool;

  public IOMapperBase() {
  }

  @Override
  public void configure(JobConf conf) {
    setConf(conf);

    try {
      hostName = InetAddress.getLocalHost().getHostName();
    } catch (Exception e) {
      hostName = "localhost";
    }
    threadsPerMap = conf.getInt("test.threadsPerMap", 1);
    filesPerThread = conf.getInt("test.filesPerThread", 1);
    pool = Executors.newFixedThreadPool(threadsPerMap, r -> {
      Thread t = new Thread(r);
      t.setDaemon(true);
      return t;
    });
  }

  @Override
  public void close() throws IOException {
    pool.shutdown();
  }

  abstract Long doIO(Reporter reporter,
                     String name,
                     long value,  Closeable stream) throws IOException;


  public Closeable getIOStream(String name) throws IOException {
    return null;
  }

  abstract void collectStats(OutputCollector<Text, Text> output,
                             String name,
                             long execTime,
                             Long doIOReturnValue) throws IOException;

  @Override
  public void map(Text key,
                  LongWritable value,
                  OutputCollector<Text, Text> output,
                  Reporter reporter) throws IOException {
    String name = key.toString();
    long longValue = value.get();

    reporter.setStatus("starting " + name + " ::host = " + hostName);
    AtomicLong execTime = new AtomicLong(0L);
    List<Future<Long>> futures = new ArrayList<>(threadsPerMap);
    for (int i = 0; i < threadsPerMap; i++) {
      int id = i;
      Future<Long> future = pool.submit(() -> {
        long res = 0;
        for (int j = 0; j < filesPerThread; j++) {
          String filePath = String.format("%s/thread-%s/file-%s", name, id, j);
          try (Closeable stream = getIOStream(filePath)) {
            long tStart = System.currentTimeMillis();
            res += doIO(reporter, name, longValue, stream);
            long tEnd = System.currentTimeMillis();
            execTime.addAndGet(tEnd - tStart);
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        }
        return res;
      });
      futures.add(future);
    }

    Long result = 0L;
    try {
      for (Future<Long> future : futures) {
        result += future.get();
      }
    } catch (InterruptedException | ExecutionException e) {
      throw new RuntimeException(e);
    }

    collectStats(output, name, execTime.get(), result);

    reporter.setStatus("finished " + name + " ::host = " + hostName);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/bench/NNBench.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.bench;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import io.juicefs.Main;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

@Parameters(commandDescription = "Distributed create/open/rename/delete meta benchmark")
public class NNBench extends Main.Command {
  private static final Log LOG = LogFactory.getLog(
          NNBench.class);

  protected static String CONTROL_DIR_NAME = "control";
  protected static String OUTPUT_DIR_NAME = "output";
  protected static String DATA_DIR_NAME = "data";

  @Parameter(description = "[create | open | rename | delete]", required = true)
  public static String operation;
  @Parameter(names = {"-maps"}, description = "number of maps")
  public long numberOfMaps = 1l; // default is 1
  @Parameter(names = {"-files"}, description = "number of files per thread")
  public long numberOfFiles = 1l; // default is 1
  @Parameter(names = {"-threads"}, description = "threads per map")
  public int threadsPerMap = 1;
  public long numberOfReduces = 1l; // default is 1
  @Parameter(names = {"-baseDir"}, description = "full path of dir on FileSystem", required = true)
  public String baseDir = "/benchmarks/NNBench";  // default
  @Parameter(names = {"-deleteBeforeRename"}, description = "delete files before or after rename operation")
  public static boolean deleteBeforeRename;
  @Parameter(names = {"-local"}, description = "run in local single process")
  private boolean local;
  @Parameter(names = {"-startTime"}, description = "start time in milliseconds")
  public long startTime = System.currentTimeMillis() + (30 * 1000);

  // Supported operations
  private static final String OP_CREATE = "create";
  private static final String OP_OPEN = "open";
  private static final String OP_RENAME = "rename";
  private static final String OP_DELETE = "delete";

  // To display in the format that matches the NN and DN log format
  // Example: 2007-10-26 00:01:19,853
  static SimpleDateFormat sdf =
          new SimpleDateFormat("yyyy-MM-dd' 'HH:mm:ss','S");

  private static Configuration config = new Configuration();

  /**
   * Clean up the files before a test run
   *
   * @throws IOException on error
   */
  private void cleanupBeforeTestrun() throws IOException {
    FileSystem tempFS = new Path(baseDir).getFileSystem(config);

    // Delete the data directory only if it is the create/write operation
    if (operation.equals(OP_CREATE)) {
      LOG.info("Deleting data directory");
      tempFS.delete(new Path(baseDir, DATA_DIR_NAME), true);
    }
    tempFS.delete(new Path(baseDir, CONTROL_DIR_NAME), true);
    tempFS.delete(new Path(baseDir, OUTPUT_DIR_NAME), true);
  }

  /**
   * Create control files before a test run.
   * Number of files created is equal to the number of maps specified
   *
   * @throws IOException on error
   */
  private void createControlFiles() throws IOException {
    FileSystem tempFS = new Path(baseDir).getFileSystem(config);
    LOG.info("Creating " + numberOfMaps + " control files");

    for (int i = 0; i < numberOfMaps; i++) {
      String strFileName = "NNBench_Controlfile_" + i;
      Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
              strFileName);

      SequenceFile.Writer writer = null;
      try {
        writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class,
                LongWritable.class, CompressionType.NONE);
        writer.append(new Text(strFileName), new LongWritable(i));
      } finally {
        if (writer != null) {
          writer.close();
        }
      }
    }
  }

  /**
   * Analyze the results
   *
   * @throws IOException on error
   */
  private void analyzeResults() throws IOException {
    final FileSystem fs = new Path(baseDir).getFileSystem(config);
    Path reduceFile = new Path(new Path(baseDir, OUTPUT_DIR_NAME),
            "part-00000");

    DataInputStream in;
    in = new DataInputStream(fs.open(reduceFile));

    BufferedReader lines;
    lines = new BufferedReader(new InputStreamReader(in));

    long totalTime = 0l;
    long lateMaps = 0l;
    long numOfExceptions = 0l;
    long successfulFileOps = 0l;

    long mapStartTimeTPmS = 0l;
    long mapEndTimeTPmS = 0l;

    String resultTPSLine1 = null;
    String resultALLine1 = null;

    String line;
    while ((line = lines.readLine()) != null) {
      StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%;");
      String attr = tokens.nextToken();
      if (attr.endsWith(":totalTime")) {
        totalTime = Long.parseLong(tokens.nextToken());
      } else if (attr.endsWith(":latemaps")) {
        lateMaps = Long.parseLong(tokens.nextToken());
      } else if (attr.endsWith(":numOfExceptions")) {
        numOfExceptions = Long.parseLong(tokens.nextToken());
      } else if (attr.endsWith(":successfulFileOps")) {
        successfulFileOps = Long.parseLong(tokens.nextToken());
      } else if (attr.endsWith(":mapStartTimeTPmS")) {
        mapStartTimeTPmS = Long.parseLong(tokens.nextToken());
      } else if (attr.endsWith(":mapEndTimeTPmS")) {
        mapEndTimeTPmS = Long.parseLong(tokens.nextToken());
      }
    }

    // Average latency is the average time to perform 'n' number of
    // operations, n being the number of files
    double avgLatency = (double) totalTime / successfulFileOps;

    double totalTimeTPS =
            (double) (1000 * successfulFileOps) / (mapEndTimeTPmS - mapStartTimeTPmS);

    if (operation.equals(OP_CREATE)) {
      resultTPSLine1 = "                           TPS: Create: " +
              (int) (totalTimeTPS);
      resultALLine1 = "                  Avg Lat (ms): Create: " + avgLatency;
    } else if (operation.equals(OP_OPEN)) {
      resultTPSLine1 = "                             TPS: Open: " +
              (int) totalTimeTPS;
      resultALLine1 = "                     Avg Lat (ms): Open: " + avgLatency;
    } else if (operation.equals(OP_RENAME)) {
      resultTPSLine1 = "                           TPS: Rename: " +
              (int) totalTimeTPS;
      resultALLine1 = "                   Avg Lat (ms): Rename: " + avgLatency;
    } else if (operation.equals(OP_DELETE)) {
      resultTPSLine1 = "                           TPS: Delete: " +
              (int) totalTimeTPS;
      resultALLine1 = "                   Avg Lat (ms): Delete: " + avgLatency;
    }

    String resultLines[] = {
            "-------------- NNBench -------------- : ",
            "                           Date & time: " + sdf.format(new Date(
                    System.currentTimeMillis())),
            "",
            "                        Test Operation: " + operation,
            "                            Start time: " +
                    sdf.format(new Date(startTime)),
            "                           Maps to run: " + numberOfMaps,
            "                       Threads per map: " + threadsPerMap,
            "                      Files per thread: " + numberOfFiles,
            "            Successful file operations: " + successfulFileOps,
            "",
            "        # maps that missed the barrier: " + lateMaps,
            "                          # exceptions: " + numOfExceptions,
            "",
            resultTPSLine1,
            resultALLine1,
            "",
            "              RAW DATA: TPS Total (ms): " + totalTime,
            "           RAW DATA: Job Duration (ms): " + (mapEndTimeTPmS - mapStartTimeTPmS),
            "                   RAW DATA: Late maps: " + lateMaps,
            "             RAW DATA: # of exceptions: " + numOfExceptions,
            ""};

    // Write to a file and also dump to log
    for (int i = 0; i < resultLines.length; i++) {
      LOG.info(resultLines[i]);
    }
  }

  /**
   * Run the test
   *
   * @throws IOException on error
   */
  public void runTests() throws IOException {

    JobConf job = new JobConf(config, NNBench.class);

    job.setJobName("NNBench-" + operation);
    FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);

    // Explicitly set number of max map attempts to 1.
    job.setMaxMapAttempts(1);

    // Explicitly turn off speculative execution
    job.setSpeculativeExecution(false);

    job.setMapperClass(NNBenchMapper.class);
    job.setReducerClass(NNBenchReducer.class);

    FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks((int) numberOfReduces);
    JobClient.runJob(job);
  }

  /**
   * Validate the inputs
   */
  public void validateInputs() {
    // If it is not one of the four operations, then fail
    if (!operation.equals(OP_CREATE) &&
            !operation.equals(OP_OPEN) &&
            !operation.equals(OP_RENAME) &&
            !operation.equals(OP_DELETE)) {
      System.err.println("Error: Unknown operation: " + operation);
      System.exit(-1);
    }

    // If number of maps is a negative number, then fail
    // Hadoop allows the number of maps to be 0
    if (numberOfMaps < 0) {
      System.err.println("Error: Number of maps must be a positive number");
      System.exit(-1);
    }

    // If number of reduces is a negative number or 0, then fail
    if (numberOfReduces <= 0) {
      System.err.println("Error: Number of reduces must be a positive number");
      System.exit(-1);
    }

    // If number of files is a negative number, then fail
    if (numberOfFiles < 0) {
      System.err.println("Error: Number of files must be a positive number");
      System.exit(-1);
    }
  }

  @Override
  public void init() throws IOException {
    LOG.info("Test Inputs: ");
    LOG.info("           Test Operation: " + operation);
    LOG.info("               Start time: " + sdf.format(new Date(startTime)));
    if (!local) {
      LOG.info("           Number of maps: " + numberOfMaps);
    }
    LOG.info("Number of threads per map: " + threadsPerMap);
    LOG.info("          Number of files: " + numberOfFiles);
    LOG.info("                 Base dir: " + baseDir);

    // Set user-defined parameters, so the map method can access the values
    config.set("test.nnbench.operation", operation);
    config.setLong("test.nnbench.maps", numberOfMaps);
    config.setLong("test.nnbench.reduces", numberOfReduces);
    config.setLong("test.nnbench.starttime", startTime);
    config.setLong("test.nnbench.numberoffiles", numberOfFiles);
    config.set("test.nnbench.basedir", baseDir);
    config.setInt("test.nnbench.threadsPerMap", threadsPerMap);
    config.setBoolean("test.nnbench.deleteBeforeRename", deleteBeforeRename);
    config.setBoolean("test.nnbench.local", local);

    config.set("test.nnbench.datadir.name", DATA_DIR_NAME);
    config.set("test.nnbench.outputdir.name", OUTPUT_DIR_NAME);
    config.set("test.nnbench.controldir.name", CONTROL_DIR_NAME);
  }

  @Override
  public void run() throws IOException {
    validateInputs();
    cleanupBeforeTestrun();
    if (local) {
      localRun();
      return;
    }
    createControlFiles();
    runTests();
    analyzeResults();
  }

  private void localRun() {
    NNBenchMapper mapper = new NNBenchMapper();
    mapper.configure(new JobConf(config));

    ExecutorService pool = Executors.newFixedThreadPool(threadsPerMap, r -> {
      Thread t = new Thread(r);
      t.setDaemon(true);
      return t;
    });

    long start = System.currentTimeMillis();
    for (int i = 0; i < threadsPerMap; i++) {
      int threadNum = i;
      pool.submit(() -> {
        try {
          mapper.doMap(Collections.synchronizedList(new ArrayList<>()), 0, threadNum);
        } catch (IOException e) {
          e.printStackTrace();
          System.exit(1);
          throw new RuntimeException(e);
        }
      });
    }
    pool.shutdown();
    try {
      pool.awaitTermination(1, TimeUnit.DAYS);
    } catch (InterruptedException ignored) {
    }
    long end = System.currentTimeMillis();
    double totalTimeTPS =
            (double) (1000 * threadsPerMap * numberOfFiles) / (end - start);
    String[] resultLines = {
            "-------------- NNBench -------------- : ",
            "                           Date & time: " + sdf.format(new Date(
                    System.currentTimeMillis())),
            "",
            "                        Test Operation: " + operation,
            "                            Start time: " +
                    sdf.format(new Date(startTime)),
            "                               Threads: " + threadsPerMap,
            "                      Files per thread: " + numberOfFiles,
            "            Successful file operations: " + threadsPerMap * numberOfFiles,
            "",
            "                                   TPS: " + (int) (totalTimeTPS),
            "                          Avg Lat (ms): " + String.format("%.2f", (double) (end - start) / (threadsPerMap * numberOfFiles)),
            "",
            "           RAW DATA: Job Duration (ms): " + (end - start),
            ""};

    for (int i = 0; i < resultLines.length; i++) {
      LOG.info(resultLines[i]);
    }
  }

  @Override
  public String getCommand() {
    return "nnbench";
  }

  @Override
  public void close() throws IOException {

  }

  /**
   * Mapper class
   */
  static class NNBenchMapper extends Configured
          implements Mapper<Text, LongWritable, Text, Text> {
    FileSystem filesystem = null;

    long numberOfFiles = 1l;
    boolean beforeRename = false;
    String baseDir = null;
    String dataDirName = null;
    String op = null;
    final int MAX_OPERATION_EXCEPTIONS = 1000;
    int threadsPerMap = 1;
    boolean local;

    ExecutorService executorService;

    // Data to collect from the operation

    /**
     * Constructor
     */
    public NNBenchMapper() {
    }


    /**
     * Mapper base implementation
     */
    public void configure(JobConf conf) {
      setConf(conf);
      local = conf.getBoolean("test.nnbench.local", false);
      try {
        baseDir = conf.get("test.nnbench.basedir");
        filesystem = new Path(baseDir).getFileSystem(conf);
      } catch (Exception e) {
        throw new RuntimeException("Cannot get file system.", e);
      }

      numberOfFiles = conf.getLong("test.nnbench.numberoffiles", 1l);
      dataDirName = conf.get("test.nnbench.datadir.name");
      op = conf.get("test.nnbench.operation");
      beforeRename = conf.getBoolean("test.nnbench.deleteBeforeRename", false);

      threadsPerMap = conf.getInt("test.nnbench.threadsPerMap", 1);
      executorService = Executors.newFixedThreadPool(threadsPerMap, r -> {
        Thread t = new Thread(r);
        t.setDaemon(true);
        return t;
      });
    }

    /**
     * Mapper base implementation
     */
    public void close() throws IOException {
    }

    /**
     * Returns when the current number of seconds from the epoch equals
     * the command line argument given by <code>-startTime</code>.
     * This allows multiple instances of this program, running on clock
     * synchronized nodes, to start at roughly the same time.
     *
     * @return true if the method was able to sleep for <code>-startTime</code>
     * without interruption; false otherwise
     */
    private boolean barrier() {
      if (local) {
        return true;
      }
      long startTime = getConf().getLong("test.nnbench.starttime", 0l);
      long currentTime = System.currentTimeMillis();
      long sleepTime = startTime - currentTime;
      boolean retVal = false;

      // If the sleep time is greater than 0, then sleep and return
      if (sleepTime > 0) {
        LOG.info("Waiting in barrier for: " + sleepTime + " ms");

        try {
          Thread.sleep(sleepTime);
          retVal = true;
        } catch (Exception e) {
          retVal = false;
        }
      }

      return retVal;
    }

    /**
     * Map method
     */
    public void map(Text key,
                    LongWritable value,
                    OutputCollector<Text, Text> output,
                    Reporter reporter) throws IOException {


      List<Entry> res = Collections.synchronizedList(new ArrayList<>());

      for (int i = 0; i < threadsPerMap; i++) {
        int threadNum = i;
        executorService.submit(() -> {
          try {
            doMap(res, value.get(), threadNum);
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        });
      }

      executorService.shutdown();
      try {
        executorService.awaitTermination(1, TimeUnit.DAYS);
      } catch (InterruptedException e) {
        throw new RuntimeException(e);
      }

      long successOps = 0L;
      for (Entry entry : res) {
        if (entry.key.toString().contains("successfulFileOps")) {
          successOps += Long.parseLong(entry.value.toString());
        }
        output.collect(entry.key, entry.value);
      }
      reporter.setStatus("Finish " + successOps + " files");
    }

    static class Entry {
      Text key;
      Text value;

      Entry(Text key, Text value) {
        this.key = key;
        this.value = value;
      }
    }

    private void doMap(List<Entry> res, long mapId, int threadNum) throws IOException {
      long startTimeTPmS = 0l;
      long endTimeTPms = 0l;

      AtomicLong successfulFileOps = new AtomicLong(0L);
      AtomicInteger numOfExceptions = new AtomicInteger(0);
      AtomicLong totalTime = new AtomicLong(0L);

      if (barrier()) {
        startTimeTPmS = System.currentTimeMillis();
        if (op.equals(OP_CREATE)) {
          doCreate(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum);
        } else if (op.equals(OP_OPEN)) {
          doOpen(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum);
        } else if (op.equals(OP_RENAME)) {
          doRenameOp(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum);
        } else if (op.equals(OP_DELETE)) {
          doDeleteOp(mapId, successfulFileOps, numOfExceptions, totalTime, threadNum);
        }

        endTimeTPms = System.currentTimeMillis();
      } else {
        res.add(new Entry(new Text("l:latemaps"), new Text("1")));
      }

      // collect after the map end time is measured
      res.add(new Entry(new Text("l:totalTime"),
              new Text(String.valueOf(totalTime.get()))));
      res.add(new Entry(new Text("l:numOfExceptions"),
              new Text(String.valueOf(numOfExceptions.get()))));
      res.add(new Entry(new Text("l:successfulFileOps"),
              new Text(String.valueOf(successfulFileOps.get()))));
      res.add(new Entry(new Text("min:mapStartTimeTPmS"),
              new Text(String.valueOf(startTimeTPmS))));
      res.add(new Entry(new Text("max:mapEndTimeTPmS"),
              new Text(String.valueOf(endTimeTPms))));
    }

    /**
     * Create operation.
     */
    private void doCreate(long mapId,
                          AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException {
      FSDataOutputStream out;

      for (long l = 0L; l < numberOfFiles; l++) {
        Path filePath = new Path(new Path(baseDir, dataDirName),
                new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l)));
        boolean successfulOp = false;
        while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) {
          try {
            // Set up timer for measuring AL (transaction #1)
            long startTime = System.currentTimeMillis();
            // Create the file
            out = filesystem.create(filePath, false);
            out.close();
            totalTime.addAndGet(System.currentTimeMillis() - startTime);
            successfulFileOps.getAndIncrement();
            successfulOp = true;
          } catch (IOException e) {
            LOG.info("Exception recorded in op: " +
                    "Create", e);
            numOfExceptions.getAndIncrement();
            throw e;
          }
        }
      }
    }

    /**
     * Open operation
     */
    private void doOpen(long mapId,
                        AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException {
      FSDataInputStream input;

      for (long l = 0L; l < numberOfFiles; l++) {
        Path filePath = new Path(new Path(baseDir, dataDirName),
                new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l)));

        boolean successfulOp = false;
        while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) {
          try {
            // Set up timer for measuring AL
            long startTime = System.currentTimeMillis();
            input = filesystem.open(filePath);
            input.close();
            totalTime.addAndGet(System.currentTimeMillis() - startTime);
            successfulFileOps.getAndIncrement();
            successfulOp = true;
          } catch (IOException e) {
            LOG.info("Exception recorded in op: OpenRead " + e);
            numOfExceptions.getAndIncrement();
            throw e;
          }
        }
      }
    }

    /**
     * Rename operation
     */
    private void doRenameOp(long mapId,
                            AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException {
      for (long l = 0L; l < numberOfFiles; l++) {
        Path filePath = new Path(new Path(baseDir, dataDirName),
                new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l)));
        Path filePathR = new Path(new Path(baseDir, dataDirName),
                new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_r_" + l)));

        boolean successfulOp = false;
        while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) {
          try {
            // Set up timer for measuring AL
            long startTime = System.currentTimeMillis();
            filesystem.rename(filePath, filePathR);
            totalTime.addAndGet(System.currentTimeMillis() - startTime);
            successfulFileOps.getAndIncrement();
            successfulOp = true;
          } catch (IOException e) {
            LOG.info("Exception recorded in op: Rename");
            numOfExceptions.getAndIncrement();
            throw e;
          }
        }
      }
    }

    /**
     * Delete operation
     */
    private void doDeleteOp(long mapId,
                            AtomicLong successfulFileOps, AtomicInteger numOfExceptions, AtomicLong totalTime, int threadNum) throws IOException {
      for (long l = 0L; l < numberOfFiles; l++) {
        Path filePath;
        if (beforeRename) {
          filePath = new Path(new Path(baseDir, dataDirName),
                  new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_" + l)));
        } else {
          filePath = new Path(new Path(baseDir, dataDirName),
                  new Path(String.valueOf(mapId), new Path(String.valueOf(threadNum), "file_r_" + l)));
        }

        boolean successfulOp = false;
        while (!successfulOp && numOfExceptions.get() < MAX_OPERATION_EXCEPTIONS) {
          try {
            // Set up timer for measuring AL
            long startTime = System.currentTimeMillis();
            filesystem.delete(filePath, false);
            totalTime.addAndGet(System.currentTimeMillis() - startTime);
            successfulFileOps.getAndIncrement();
            successfulOp = true;
          } catch (IOException e) {
            LOG.info("Exception in recorded op: Delete");
            numOfExceptions.getAndIncrement();
            throw e;
          }
        }
      }
    }
  }

  /**
   * Reducer class
   */
  static class NNBenchReducer extends MapReduceBase
          implements Reducer<Text, Text, Text, Text> {

    protected String hostName;

    public NNBenchReducer() {
      LOG.info("Starting NNBenchReducer !!!");
      try {
        hostName = java.net.InetAddress.getLocalHost().getHostName();
      } catch (Exception e) {
        hostName = "localhost";
      }
      LOG.info("Starting NNBenchReducer on " + hostName);
    }

    /**
     * Reduce method
     */
    public void reduce(Text key,
                       Iterator<Text> values,
                       OutputCollector<Text, Text> output,
                       Reporter reporter
    ) throws IOException {
      String field = key.toString();

      reporter.setStatus("starting " + field + " ::host = " + hostName);

      // sum long values
      if (field.startsWith("l:")) {
        long lSum = 0;
        while (values.hasNext()) {
          lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
      }

      if (field.startsWith("min:")) {
        long minVal = -1;
        while (values.hasNext()) {
          long value = Long.parseLong(values.next().toString());

          if (minVal == -1) {
            minVal = value;
          } else {
            if (value != 0 && value < minVal) {
              minVal = value;
            }
          }
        }
        output.collect(key, new Text(String.valueOf(minVal)));
      }

      if (field.startsWith("max:")) {
        long maxVal = -1;
        while (values.hasNext()) {
          long value = Long.parseLong(values.next().toString());

          if (maxVal == -1) {
            maxVal = value;
          } else {
            if (value > maxVal) {
              maxVal = value;
            }
          }
        }
        output.collect(key, new Text(String.valueOf(maxVal)));
      }

      reporter.setStatus("finished " + field + " ::host = " + hostName);
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/bench/TestDFSIO.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.bench;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import io.juicefs.Main;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.ReflectionUtils;

import java.io.*;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.Locale;
import java.util.StringTokenizer;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;


@Parameters(commandDescription = "Distributed i/o benchmark")
public class TestDFSIO extends Main.Command {
  // Constants
  private static final Log LOG = LogFactory.getLog(TestDFSIO.class);
  private static final String BASE_FILE_NAME = "test_io_";
  private static final long MEGA = ByteMultiple.MB.value();

  @Parameter(description = "[-read | -write]", required = true)
  private String testType;
  @Parameter(names = {"-random"}, description = "random read")
  private boolean random;
  @Parameter(names = {"-backward"}, description = "backward read")
  private boolean backward;
  @Parameter(names = {"-skip"}, description = "skip read")
  private boolean skip;
  @Parameter(names = {"-local"}, description = "run in local single process")
  private boolean local;

  @Parameter(names = {"-baseDir"}, description = "full path of dir on FileSystem", required = true)
  private String baseDir = "/benchmarks/DFSIO";

  @Parameter(names = {"-bufferSize"}, description = "bufferSize[B|KB|MB|GB|TB]")
  private String bufferSize = "1MB";
  @Parameter(names = {"-size"}, description = "per file size[B|KB|MB|GB|TB]")
  private String size = "1GB";
  @Parameter(names = {"-maps"}, description = "number of maps")
  private int maps = 1;
  @Parameter(names = {"-threads"}, description = "threads per map")
  private int threadsPerMap = 1;
  @Parameter(names = {"-files"}, description = "number of files per thread")
  private int filesPerThread = 1;
  @Parameter(names = {"-skipSize"}, description = "skipSize[B|KB|MB|GB|TB]")
  private String skipSize;
  @Parameter(names = {"-compression"}, description = "codecClassName")
  String compression = null;
  @Parameter(names = {"-randomBytes"}, description = "generate randomBytes")
  boolean randomBytes = false;

  private FileSystem fs;
  private TestType type;
  private Configuration config;

  @Override
  public void close() throws IOException {
    this.fs.close();
  }

  private enum TestType {
    TEST_TYPE_READ("read"),
    TEST_TYPE_WRITE("write"),
    TEST_TYPE_CLEANUP("cleanup"),
    TEST_TYPE_APPEND("append"),
    TEST_TYPE_READ_RANDOM("random read"),
    TEST_TYPE_READ_BACKWARD("backward read"),
    TEST_TYPE_READ_SKIP("skip read"),
    TEST_TYPE_TRUNCATE("truncate");

    private String type;

    TestType(String t) {
      type = t;
    }

    @Override // String
    public String toString() {
      return type;
    }
  }

  static enum ByteMultiple {
    B(1L),
    KB(0x400L),
    MB(0x100000L),
    GB(0x40000000L),
    TB(0x10000000000L);

    private long multiplier;

    private ByteMultiple(long mult) {
      multiplier = mult;
    }

    long value() {
      return multiplier;
    }

    static ByteMultiple parseString(String sMultiple) {
      if (sMultiple == null || sMultiple.isEmpty()) // MB by default
      {
        return MB;
      }
      String sMU = sMultiple.toUpperCase(Locale.ENGLISH);
      if (B.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) {
        return B;
      }
      if (KB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) {
        return KB;
      }
      if (MB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) {
        return MB;
      }
      if (GB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) {
        return GB;
      }
      if (TB.name().toUpperCase(Locale.ENGLISH).endsWith(sMU)) {
        return TB;
      }
      throw new IllegalArgumentException("Unsupported ByteMultiple " + sMultiple);
    }
  }

  public TestDFSIO() {
    this.config = new Configuration();
  }

  @Override
  public void init() throws IOException {
    this.config = new Configuration();
    config.setBoolean("dfs.support.append", true);
    this.fs = new Path(baseDir).getFileSystem(config);

    checkArgs();
    switch (testType) {
      case "-read":
        type = TestType.TEST_TYPE_READ;
        break;
      case "-write":
        type = TestType.TEST_TYPE_WRITE;
        break;
      case "-append":
        type = TestType.TEST_TYPE_APPEND;
        break;
      case "-truncate":
        type = TestType.TEST_TYPE_TRUNCATE;
        break;
      case "-clean":
        type = TestType.TEST_TYPE_CLEANUP;
        break;
      default:
        throw new IllegalArgumentException("wrong type");
    }
    if (random) {
      type = TestType.TEST_TYPE_READ_RANDOM;
    } else if (backward) {
      type = TestType.TEST_TYPE_READ_BACKWARD;
    } else if (skip) {
      type = TestType.TEST_TYPE_READ_SKIP;
    }
    int bufferSizeBytes = (int) parseSize(bufferSize);
    long sizeInBytes = parseSize(size);
    long skipSizeInBytes = skipSize == null ? 0 : parseSize(skipSize);
    if (type == TestType.TEST_TYPE_READ_BACKWARD) {
      skipSizeInBytes = -bufferSizeBytes;
    } else if (type == TestType.TEST_TYPE_READ_SKIP && skipSizeInBytes == 0) {
      skipSizeInBytes = bufferSizeBytes;
    }

    config.setInt("test.io.file.buffer.size", bufferSizeBytes);
    config.setLong("test.io.skip.size", skipSizeInBytes);
    config.setBoolean("dfs.support.append", true);
    config.setInt("test.threadsPerMap", threadsPerMap);
    config.setInt("test.filesPerThread", filesPerThread);
    config.set("test.basedir", baseDir);
    config.setBoolean("test.randomBytes", randomBytes);

    LOG.info("type = " + type);
    if (!local) {
      LOG.info("maps = " + maps);
    }
    LOG.info("threads = " + threadsPerMap);
    LOG.info("files = " + filesPerThread);
    LOG.info("randomBytes = " + randomBytes);
    LOG.info("fileSize (MB) = " + TestDFSIO.toMB(sizeInBytes));
    LOG.info("bufferSize = " + bufferSize);
    if (skipSizeInBytes > 0)
      LOG.info("skipSize = " + skipSize);
    LOG.info("baseDir = " + baseDir);

    createControlFile(fs, sizeInBytes, maps);
    if (compression != null) {
      LOG.info("compressionClass = " + compression);
    }
  }

  private void checkArgs() {
    if (!testType.equals("-read")) {
      if (random || backward || skip) {
        throw new IllegalArgumentException("random, backward, skip are only valid under read");
      }
    } else {
      boolean[] conds = {random, backward, skip};
      int trueCount = 0;
      for (boolean cond : conds) {
        if (cond) {
          trueCount++;
          if (trueCount > 1) {
            throw new IllegalArgumentException("random, backward, skip are mutually exclusive");
          }
        }
      }
    }
  }

  private void localRun(TestType testType) throws IOException {
    IOStatMapper ioer;
    switch (testType) {
      case TEST_TYPE_READ:
        ioer = new ReadMapper();
        break;
      case TEST_TYPE_WRITE:
        ioer = new WriteMapper();
        fs.delete(getDataDir(config), true);
        break;
      case TEST_TYPE_APPEND:
        ioer = new AppendMapper();
        break;
      case TEST_TYPE_READ_RANDOM:
      case TEST_TYPE_READ_BACKWARD:
      case TEST_TYPE_READ_SKIP:
        ioer = new RandomReadMapper();
        break;
      case TEST_TYPE_TRUNCATE:
        ioer = new TruncateMapper();
        break;
      default:
        return;
    }
    ExecutorService pool = Executors.newFixedThreadPool(threadsPerMap, r -> {
      Thread t = new Thread(r);
      t.setDaemon(true);
      return t;
    });

    ioer.configure(new JobConf(config));
    AtomicLong sizeProcessed = new AtomicLong();
    long start = System.currentTimeMillis();
    for (int i = 0; i < threadsPerMap; i++) {
      int id = i;
      pool.execute(() -> {
        for (int j = 0; j < filesPerThread; j++) {
          String name = String.format("%s/thread-%s/file-%s", getFileName(0), id, j);
          try {
            Long res = ioer.doIO(Reporter.NULL, name, parseSize(size), ioer.getIOStream(name));
            sizeProcessed.addAndGet(res);
          } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
          }
        }
      });

    }
    pool.shutdown();
    try {
      pool.awaitTermination(1, TimeUnit.DAYS);
    } catch (InterruptedException ignored) {
    }
    long end = System.currentTimeMillis();

    DecimalFormat df = new DecimalFormat("#.##");
    String resultLines[] = {
            "----- TestClient ----- : " + testType,
            "            Date & time: " + new Date(System.currentTimeMillis()),
            "      Number of threads: " + threadsPerMap,
            "Number files per thread: " + filesPerThread,
            "            Total files: " + threadsPerMap * filesPerThread,
            " Total MBytes processed: " + df.format(toMB(sizeProcessed.get())),
            "Total Throughput MB/sec: " + df.format(toMB(sizeProcessed.get()) / msToSecs(end - start)),
            "     Test exec time sec: " + df.format(msToSecs(end - start)),
            ""};

    for (String resultLine : resultLines) {
      LOG.info(resultLine);
    }
  }

  @Override
  public void run() throws IOException {
    if (type == TestType.TEST_TYPE_CLEANUP) {
      cleanup(fs);
      return;
    }
    if (local) {
      localRun(type);
      return;
    }
    long tStart = System.currentTimeMillis();
    switch (type) {
      case TEST_TYPE_WRITE:
        writeTest(fs);
        break;
      case TEST_TYPE_READ:
        readTest(fs);
        break;
      case TEST_TYPE_APPEND:
        appendTest(fs);
        break;
      case TEST_TYPE_READ_RANDOM:
      case TEST_TYPE_READ_BACKWARD:
      case TEST_TYPE_READ_SKIP:
        randomReadTest(fs);
        break;
      case TEST_TYPE_TRUNCATE:
        truncateTest(fs);
        break;
      default:
    }
    long execTime = System.currentTimeMillis() - tStart;

    analyzeResult(fs, type, execTime);
  }

  @Override
  public String getCommand() {
    return "dfsio";
  }

  private String getBaseDir(Configuration conf) {
    return baseDir;
  }

  private Path getControlDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_control");
  }

  private Path getWriteDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_write");
  }

  private Path getReadDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_read");
  }

  private Path getAppendDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_append");
  }

  private Path getRandomReadDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_random_read");
  }

  private Path getTruncateDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_truncate");
  }

  private Path getDataDir(Configuration conf) {
    return new Path(getBaseDir(conf), "io_data");
  }


  @SuppressWarnings("deprecation")
  private void createControlFile(FileSystem fs,
                                 long nrBytes, // in bytes
                                 int maps
  ) throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + maps + " files");
    final int maxDirItems = config.getInt("dfs.namenode.fs-limits.max-directory-items", 1024 * 1024);
    Path controlDir = getControlDir(config);

    if (maps > maxDirItems) {
      final String message = "The directory item limit of " + controlDir +
              " is exceeded: limit=" + maxDirItems + " items=" + maps;
      throw new IOException(message);
    }

    fs.delete(controlDir, true);

    for (int i = 0; i < maps; i++) {
      String name = getFileName(i);
      Path controlFile = new Path(controlDir, "in_file_" + name);
      SequenceFile.Writer writer = null;
      try {
        writer = SequenceFile.createWriter(fs, config, controlFile,
                Text.class, LongWritable.class,
                CompressionType.NONE);
        writer.append(new Text(name), new LongWritable(nrBytes));
      } catch (Exception e) {
        throw new IOException(e.getLocalizedMessage());
      } finally {
        if (writer != null) {
          writer.close();
        }
      }
    }
    LOG.info("created control files for: " + maps + " files");
  }

  private static String getFileName(int fIdx) {
    return BASE_FILE_NAME + fIdx;
  }

  /**
   * Write/Read mapper base class.
   * <p>
   * Collects the following statistics per task:
   * <ul>
   * <li>number of tasks completed</li>
   * <li>number of bytes written/read</li>
   * <li>execution time</li>
   * <li>i/o rate</li>
   * <li>i/o rate squared</li>
   * </ul>
   */
  private abstract static class IOStatMapper extends IOMapperBase {
    protected CompressionCodec compressionCodec;
    private static final ThreadLocalRandom random = ThreadLocalRandom.current();
    private boolean randomBytes;
    protected FileSystem fs;
    protected String baseDir;
    protected ThreadLocal<byte[]> buffer;
    protected int bufferSize;

    IOStatMapper() {
    }

    public byte[] getBuffer() {
      if (randomBytes) {
        random.nextBytes(buffer.get());
      }
      return buffer.get();
    }

    @Override // Mapper
    public void configure(JobConf conf) {
      super.configure(conf);
      bufferSize = conf.getInt("test.io.file.buffer.size", 4096);
      buffer = ThreadLocal.withInitial(() -> new byte[bufferSize]);
      try {
        baseDir = conf.get("test.basedir");
        fs = new Path(baseDir).getFileSystem(conf);
      } catch (IOException e) {
        throw new RuntimeException("Cannot create file system.", e);
      }
      randomBytes = conf.getBoolean("test.randomBytes", false);

      // grab compression
      String compression = getConf().get("test.io.compression.class", null);
      Class<? extends CompressionCodec> codec;

      // try to initialize codec
      try {
        codec = (compression == null) ? null :
                Class.forName(compression).asSubclass(CompressionCodec.class);
      } catch (Exception e) {
        throw new RuntimeException("Compression codec not found: ", e);
      }

      if (codec != null) {
        compressionCodec = (CompressionCodec)
                ReflectionUtils.newInstance(codec, getConf());
      }

    }

    Path getDataDir() {
      return new Path(baseDir, "io_data");
    }

    @Override
      // IOMapperBase
    void collectStats(OutputCollector<Text, Text> output,
                      String name,
                      long execTime,
                      Long objSize) throws IOException {
      long totalSize = objSize;
      float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA);
      LOG.info("Number of bytes processed = " + totalSize);
      LOG.info("Exec time = " + execTime);
      LOG.info("IO rate = " + ioRateMbSec);

      output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"),
              new Text(String.valueOf(threadsPerMap * filesPerThread)));
      output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
              new Text(String.valueOf(totalSize)));
      output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
              new Text(String.valueOf(execTime)));
      output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
              new Text(String.valueOf(ioRateMbSec * 1000 * threadsPerMap)));
      output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
              new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000 * threadsPerMap)));
    }
  }

  /**
   * Write mapper class.
   */
  public static class WriteMapper extends IOStatMapper {

    public WriteMapper() {
    }

    @Override // IOMapperBase
    public Closeable getIOStream(String name) throws IOException {
      // create file
      Path f = new Path(getDataDir(), name);
      fs.mkdirs(f.getParent());
      OutputStream out =
              fs.create(f, false, bufferSize);
      if (compressionCodec != null) {
        out = compressionCodec.createOutputStream(out);
      }
      LOG.info("out = " + out.getClass().getName());
      return out;
    }

    @Override // IOMapperBase
    public Long doIO(Reporter reporter,
                     String name,
                     long totalSize, // in bytes
                     Closeable stream) throws IOException {
      OutputStream out = (OutputStream) stream;

      // write to the file
      long nrRemaining;
      for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
        int curSize = (bufferSize < nrRemaining) ? bufferSize : (int) nrRemaining;
        out.write(getBuffer(), 0, curSize);
        reporter.setStatus("writing " + name + "@" +
                (totalSize - nrRemaining) + "/" + totalSize
                + " ::host = " + hostName);
      }
      return Long.valueOf(totalSize);
    }
  }

  private long writeTest(FileSystem fs) throws IOException {
    Path writeDir = getWriteDir(config);
    fs.delete(getDataDir(config), true);
    fs.delete(writeDir, true);
    long tStart = System.currentTimeMillis();
    runIOTest(WriteMapper.class, writeDir);
    long execTime = System.currentTimeMillis() - tStart;
    return execTime;
  }

  private void runIOTest(
          Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass,
          Path outputDir) throws IOException {
    JobConf job = new JobConf(config, TestDFSIO.class);
    job.setBoolean("mapreduce.output.fileoutputformat.compress", false);

    FileInputFormat.setInputPaths(job, getControlDir(config));
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(mapperClass);
    job.setReducerClass(AccumulatingReducer.class);

    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1);
    JobClient.runJob(job);
  }

  /**
   * Append mapper class.
   */
  public static class AppendMapper extends IOStatMapper {

    public AppendMapper() {
    }

    @Override // IOMapperBase
    public Closeable getIOStream(String name) throws IOException {
      // open file for append
      OutputStream out =
              fs.append(new Path(getDataDir(), name), bufferSize);
      if (compressionCodec != null)
        out = compressionCodec.createOutputStream(out);
      LOG.info("out = " + out.getClass().getName());
      return out;
    }

    @Override // IOMapperBase
    public Long doIO(Reporter reporter,
                     String name,
                     long totalSize, // in bytes
                     Closeable stream) throws IOException {
      OutputStream out = (OutputStream) stream;
      // write to the file
      long nrRemaining;
      for (nrRemaining = totalSize; nrRemaining > 0; nrRemaining -= bufferSize) {
        int curSize = (bufferSize < nrRemaining) ? bufferSize : (int) nrRemaining;
        out.write(getBuffer(), 0, curSize);
        reporter.setStatus("writing " + name + "@" +
                (totalSize - nrRemaining) + "/" + totalSize
                + " ::host = " + hostName);
      }
      return totalSize;
    }


  }

  private long appendTest(FileSystem fs) throws IOException {
    Path appendDir = getAppendDir(config);
    fs.delete(appendDir, true);
    long tStart = System.currentTimeMillis();
    runIOTest(AppendMapper.class, appendDir);
    return System.currentTimeMillis() - tStart;
  }

  /**
   * Read mapper class.
   */
  public static class ReadMapper extends IOStatMapper {

    public ReadMapper() {
    }

    @Override // IOMapperBase
    public Closeable getIOStream(String name) throws IOException {
      // open file
      InputStream in = fs.open(new Path(getDataDir(), name));
      if (compressionCodec != null) {
        in = compressionCodec.createInputStream(in);
      }
      LOG.info("in = " + in.getClass().getName());
      return in;
    }

    @Override // IOMapperBase
    public Long doIO(Reporter reporter,
                     String name,
                     long totalSize, // in bytes
                     Closeable stream) throws IOException {
      InputStream in = (InputStream) stream;
      long actualSize = 0;
      while (actualSize < totalSize) {
        int curSize = in.read(buffer.get(), 0, bufferSize);
        if (curSize < 0) {
          break;
        }
        actualSize += curSize;
        reporter.setStatus("reading " + name + "@" +
                actualSize + "/" + totalSize
                + " ::host = " + hostName);
      }
      return actualSize;
    }
  }

  private long readTest(FileSystem fs) throws IOException {
    Path readDir = getReadDir(config);
    fs.delete(readDir, true);
    long tStart = System.currentTimeMillis();
    runIOTest(ReadMapper.class, readDir);
    return System.currentTimeMillis() - tStart;
  }

  public static class RandomReadMapper extends IOStatMapper {
    private ThreadLocalRandom rnd;
    private long fileSize;
    private long skipSize;

    @Override // Mapper
    public void configure(JobConf conf) {
      super.configure(conf);
      skipSize = conf.getLong("test.io.skip.size", 0);
    }

    public RandomReadMapper() {
      rnd = ThreadLocalRandom.current();
    }

    @Override // IOMapperBase
    public Closeable getIOStream(String name) throws IOException {
      Path filePath = new Path(getDataDir(), name);
      this.fileSize = fs.getFileStatus(filePath).getLen();
      InputStream in = fs.open(filePath);
      if (compressionCodec != null)
        in = new FSDataInputStream(compressionCodec.createInputStream(in));
      LOG.info("in = " + in.getClass().getName());
      LOG.info("skipSize = " + skipSize);
      return in;
    }

    @Override // IOMapperBase
    public Long doIO(Reporter reporter,
                     String name,
                     long totalSize, // in bytes
                     Closeable stream) throws IOException {
      PositionedReadable in = (PositionedReadable) stream;
      long actualSize = 0;
      for (long pos = nextOffset(-1);
           actualSize < totalSize; pos = nextOffset(pos)) {
        int curSize = in.read(pos, buffer.get(), 0, bufferSize);
        if (curSize < 0) break;
        actualSize += curSize;
        reporter.setStatus("reading " + name + "@" +
                actualSize + "/" + totalSize
                + " ::host = " + hostName);
      }
      return actualSize;
    }

    /**
     * Get next offset for reading.
     * If current < 0 then choose initial offset according to the read type.
     *
     * @param current offset
     * @return
     */
    private long nextOffset(long current) {
      if (skipSize == 0)
        return rnd.nextLong(fileSize);
      if (skipSize > 0)
        return (current < 0) ? 0 : (current + bufferSize + skipSize);
      // skipSize < 0
      return (current < 0) ? Math.max(0, fileSize - bufferSize) :
              Math.max(0, current + skipSize);
    }
  }

  private long randomReadTest(FileSystem fs) throws IOException {
    Path readDir = getRandomReadDir(config);
    fs.delete(readDir, true);
    long tStart = System.currentTimeMillis();
    runIOTest(RandomReadMapper.class, readDir);
    return System.currentTimeMillis() - tStart;
  }

  /**
   * Truncate mapper class.
   * The mapper truncates given file to the newLength, specified by -size.
   */
  public static class TruncateMapper extends IOStatMapper {
    private static final long DELAY = 100L;

    private Path filePath;
    private long fileSize;

    @Override // IOMapperBase
    public Closeable getIOStream(String name) throws IOException {
      filePath = new Path(getDataDir(), name);
      fileSize = fs.getFileStatus(filePath).getLen();
      return null;
    }

    @Override // IOMapperBase
    public Long doIO(Reporter reporter,
                     String name,
                     long newLength, // in bytes
                     Closeable stream) throws IOException {
      boolean isClosed = fs.truncate(filePath, newLength);
      reporter.setStatus("truncating " + name + " to newLength " +
              newLength + " ::host = " + hostName);
      for (int i = 0; !isClosed; i++) {
        try {
          Thread.sleep(DELAY);
        } catch (InterruptedException ignored) {
        }
        FileStatus status = fs.getFileStatus(filePath);
        assert status != null : "status is null";
        isClosed = (status.getLen() == newLength);
        reporter.setStatus("truncate recover for " + name + " to newLength " +
                newLength + " attempt " + i + " ::host = " + hostName);
      }
      return fileSize - newLength;
    }
  }

  private long truncateTest(FileSystem fs) throws IOException {
    Path TruncateDir = getTruncateDir(config);
    fs.delete(TruncateDir, true);
    long tStart = System.currentTimeMillis();
    runIOTest(TruncateMapper.class, TruncateDir);
    return System.currentTimeMillis() - tStart;
  }

  /**
   * Returns size in bytes.
   *
   * @param arg = {d}[B|KB|MB|GB|TB]
   * @return
   */
  static long parseSize(String arg) {
    String[] args = arg.split("\\D", 2);  // get digits
    assert args.length <= 2;
    long nrBytes = Long.parseLong(args[0]);
    String bytesMult = arg.substring(args[0].length()); // get byte multiple
    return nrBytes * ByteMultiple.parseString(bytesMult).value();
  }

  static float toMB(long bytes) {
    return ((float) bytes) / MEGA;
  }

  static float msToSecs(long timeMillis) {
    return timeMillis / 1000.0f;
  }

  private void analyzeResult(FileSystem fs,
                             TestType testType,
                             long execTime
  ) throws IOException {
    Path reduceFile = getReduceFilePath(testType);
    long tasks = 0;
    long size = 0;
    long time = 0;
    float rate = 0;
    float sqrate = 0;
    DataInputStream in = null;
    BufferedReader lines = null;
    try {
      in = new DataInputStream(fs.open(reduceFile));
      lines = new BufferedReader(new InputStreamReader(in));
      String line;
      while ((line = lines.readLine()) != null) {
        StringTokenizer tokens = new StringTokenizer(line, " \t\n\r\f%");
        String attr = tokens.nextToken();
        if (attr.endsWith(":tasks"))
          tasks = Long.parseLong(tokens.nextToken());
        else if (attr.endsWith(":size"))
          size = Long.parseLong(tokens.nextToken());
        else if (attr.endsWith(":time"))
          time = Long.parseLong(tokens.nextToken());
        else if (attr.endsWith(":rate"))
          rate = Float.parseFloat(tokens.nextToken());
        else if (attr.endsWith(":sqrate"))
          sqrate = Float.parseFloat(tokens.nextToken());
      }
    } finally {
      if (in != null) in.close();
      if (lines != null) lines.close();
    }

    double med = rate / 1000 / tasks;
    double stdDev = Math.sqrt(Math.abs(sqrate / 1000 / tasks - med * med));
    DecimalFormat df = new DecimalFormat("#.##");
    String resultLines[] = {
            "----- TestDFSIO ----- : " + testType,
            "            Date & time: " + new Date(System.currentTimeMillis()),
            "        Number of files: " + tasks,
            " Total MBytes processed: " + df.format(toMB(size)),
            "Total Throughput MB/sec: " + df.format(toMB(size) / msToSecs(time) * tasks),
            " Average IO rate MB/sec: " + df.format(med),
            "  IO rate std deviation: " + df.format(stdDev),
            "     Test exec time sec: " + df.format(msToSecs(execTime)),
            ""};
    for (String resultLine : resultLines) {
      LOG.info(resultLine);
    }
  }

  private Path getReduceFilePath(TestType testType) {
    switch (testType) {
      case TEST_TYPE_WRITE:
        return new Path(getWriteDir(config), "part-00000");
      case TEST_TYPE_APPEND:
        return new Path(getAppendDir(config), "part-00000");
      case TEST_TYPE_READ:
        return new Path(getReadDir(config), "part-00000");
      case TEST_TYPE_READ_RANDOM:
      case TEST_TYPE_READ_BACKWARD:
      case TEST_TYPE_READ_SKIP:
        return new Path(getRandomReadDir(config), "part-00000");
      case TEST_TYPE_TRUNCATE:
        return new Path(getTruncateDir(config), "part-00000");
      default:
    }
    return null;
  }

  private void cleanup(FileSystem fs)
          throws IOException {
    LOG.info("Cleaning up test files");
    fs.delete(new Path(getBaseDir(config)), true);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/exception/QuotaExceededException.java
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.exception;

import java.io.IOException;

public class QuotaExceededException extends IOException {
  protected static final long serialVersionUID = 1L;

  public QuotaExceededException() {
  }

  public QuotaExceededException(String msg) {
    super(msg);
  }

  @Override
  public String getMessage() {
    return super.getMessage();
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/kerberos/AuthCredential.java
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.kerberos;

public class AuthCredential {
  private String method;
  private byte[] credential;

  public AuthCredential(String method, byte[] credential) {
    this.method = method;
    this.credential = credential;
  }

  public String getMethod() {
    return method;
  }

  public void setMethod(String method) {
    this.method = method;
  }

  public byte[] getCredential() {
    return credential;
  }

  public void setCredential(byte[] credential) {
    this.credential = credential;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/kerberos/JuiceFSDelegationTokenIdentifier.java
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.kerberos;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;

public class JuiceFSDelegationTokenIdentifier extends AbstractDelegationTokenIdentifier {
  public static final Text TOKEN_KIND = new Text("JUICEFS_DELEGATION_TOKEN");

  public JuiceFSDelegationTokenIdentifier() {
  }

  public JuiceFSDelegationTokenIdentifier(String owner, String renewer, String realUser) {
    super(new Text(owner), new Text(renewer), realUser == null ? null : new Text(realUser));
  }

  @Override
  public Text getKind() {
    return TOKEN_KIND;
  }

  @Override
  public String toString() {
    return "token for " + getUser().getShortUserName() +
        ": " + super.toString();
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/kerberos/JuiceFSTokenRenewer.java
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.kerberos;

import io.juicefs.JuiceFileSystem;
import io.juicefs.JuiceFileSystemImpl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FilterFileSystem;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenRenewer;

import java.io.IOException;
import java.net.URI;

public class JuiceFSTokenRenewer extends TokenRenewer {

  @Override
  public boolean handleKind(Text kind) {
    return JuiceFSDelegationTokenIdentifier.TOKEN_KIND.equals(kind);
  }

  @Override
  public boolean isManaged(Token<?> token) throws IOException {
    return true;
  }

  @Override
  public long renew(Token<?> token, Configuration configuration) throws IOException, InterruptedException {
    String service = token.getService().toString();
    FileSystem fs = FileSystem.get(URI.create(service), configuration);
    if (fs instanceof JuiceFileSystem) {
      return ((JuiceFileSystemImpl) ((FilterFileSystem) fs).getRawFileSystem()).renewToken(token);
    }
    throw new IOException("renew token failed");
  }

  @Override
  public void cancel(Token<?> token, Configuration configuration) throws IOException, InterruptedException {
    String service = token.getService().toString();
    FileSystem fs = FileSystem.get(URI.create(service), configuration);
    if (fs instanceof JuiceFileSystem) {
      ((JuiceFileSystemImpl) ((FilterFileSystem) fs).getRawFileSystem()).cancelToken(token);
      return;
    }
    throw new IOException("cancel token failed");
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/kerberos/KerberosUtil.java
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.kerberos;

import org.apache.hadoop.security.UserGroupInformation;
import org.ietf.jgss.GSSContext;
import org.ietf.jgss.GSSManager;
import org.ietf.jgss.GSSName;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;

public class KerberosUtil {
  public static byte[] genApReq(String spn) throws IOException, InterruptedException {
    UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
    if (UserGroupInformation.isLoginKeytabBased()) {
      loginUser.checkTGTAndReloginFromKeytab();
    } else if (UserGroupInformation.isLoginTicketBased()) {
      loginUser.reloginFromTicketCache();
    }
    return loginUser.doAs((PrivilegedExceptionAction<byte[]>) () -> {
      GSSManager manager = GSSManager.getInstance();
      GSSName serverName = manager.createName(spn, GSSName.NT_USER_NAME, org.apache.hadoop.security.authentication.util.KerberosUtil.GSS_KRB5_MECH_OID);
      GSSContext context = manager.createContext(serverName, org.apache.hadoop.security.authentication.util.KerberosUtil.GSS_KRB5_MECH_OID, null, GSSContext.DEFAULT_LIFETIME);
      byte[] token = new byte[0];
      return context.initSecContext(token, 0, token.length);
    });
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/metrics/JuiceFSInstrumentation.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.metrics;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;


@Metrics(context = "JuiceFileSystem", name = "client")
public final class JuiceFSInstrumentation {
  private static MetricsSystem system;
  private static final String METRIC_NAME = "JuiceFSMetrics";

  private static int numFileSystems;

  private final Map<String, Long> valueState = new HashMap<>();
  private final Map<String, Long> timeState = new HashMap<>();

  static {
    system = DefaultMetricsSystem.initialize("juicefs");
  }

  private final FileSystem fs;
  private final FileSystem.Statistics statistics;

  @Metric("number of bytes read from JuiceFS")
  public long getBytesRead() {
    return statistics.getBytesRead();
  }

  @Metric("number of bytes write to JuiceFS")
  public double getBytesWrite() {
    return statistics.getBytesWritten();
  }

  @Metric("write speed")
  public synchronized double getBytesWritePerSec() {
    return getSpeedPerSec("writeSpeed", statistics.getBytesWritten());
  }


  @Metric("read speed")
  public synchronized double getBytesReadPerSec() {
    return getSpeedPerSec("readSpeed", statistics.getBytesRead());
  }

  @Metric("JuiceFS client num")
  public synchronized int getNumFileSystems() {
    return 1;
  }

  @Metric("JuiceFS used size")
  public synchronized long getUsedSize() {
    try {
      return fs.getStatus(new Path("/")).getUsed();
    } catch (IOException e) {
      return 0;
    }
  }

  @Metric("JuiceFS files")
  public synchronized long getFiles() {
    try {
      return fs.getContentSummary(new Path("/")).getFileCount();
    } catch (IOException e) {
      return 0;
    }
  }

  @Metric("JuiceFS dirs")
  public synchronized long getDirs() {
    try {
      return fs.getContentSummary(new Path("/")).getDirectoryCount();
    } catch (IOException e) {
      return 0;
    }
  }

  public double getSpeedPerSec(String name, long currentValue) {
    double speed = 0;
    long current = System.currentTimeMillis();
    long delta = current - timeState.getOrDefault(name, current);
    if (delta > 0) {
      speed = (currentValue - valueState.getOrDefault(name, currentValue)) / (delta / 1000.0);
    }
    valueState.put(name, currentValue);
    timeState.put(name, current);
    return speed;
  }

  public static synchronized void init(FileSystem fs, FileSystem.Statistics statistics) {
    if (numFileSystems == 0) {
      DefaultMetricsSystem.instance().register(METRIC_NAME, "JuiceFS client metrics",
              new JuiceFSInstrumentation(fs, statistics));
    }
    numFileSystems++;
  }

  private JuiceFSInstrumentation(FileSystem fs, FileSystem.Statistics statistics) {
    this.fs = fs;
    this.statistics = statistics;
  }

  public static synchronized void close() throws IOException {
    if (numFileSystems == 1) {
      system.publishMetricsNow();
      system.unregisterSource(METRIC_NAME);
    }
    numFileSystems--;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerAdminRefresher.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.permission;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.apache.hadoop.fs.*;
import org.apache.ranger.admin.client.RangerAdminClient;
import org.apache.ranger.plugin.contextenricher.RangerTagEnricher;
import org.apache.ranger.plugin.service.RangerBasePlugin;
import org.apache.ranger.plugin.util.RangerRoles;
import org.apache.ranger.plugin.util.RangerServiceNotFoundException;
import org.apache.ranger.plugin.util.ServicePolicies;
import org.apache.ranger.plugin.util.ServiceTags;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Comparator;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;


public class RangerAdminRefresher {
  private static final Logger LOG = LoggerFactory.getLogger(RangerAdminRefresher.class);

  private static final String JFS_RANGER_DIR = "/.sys/ranger";

  private RangerBasePlugin plugIn;
  private Path rangerDir;
  private Path rangerRulePath;
  private long lastMtime;
  private final long pollingIntervalMs;

  private final RangerAdminClient rangerAdmin;
  private final Gson gson = new GsonBuilder().setDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z").create();
  private long lastKnownPolicyVersion = -1L;
  private long lastPolicyActivationTimeInMillis;
  private long lastKnownRoleVersion = -1L;
  private long lastRoleActivationTimeInMillis;
  private long lastKnownTagVersion = -1L;
  private long lastTagActivationTimeInMillis;

  private final FileSystem fs;
  private final ScheduledExecutorService refreshThread;

  public RangerAdminRefresher(RangerBasePlugin plugIn, RangerAdminClient rangerAdmin, FileSystem fs, String rangerUrl, long pollingIntervalMs) {

    this.plugIn = plugIn;
    this.rangerAdmin = rangerAdmin;
    this.fs = fs;
    String serviceName = plugIn.getServiceName();
    URI uri = URI.create(rangerUrl);
    String rangerDirName = uri.getHost().replace(".", "_") + "_" + uri.getPort() + "_" + serviceName;
    this.rangerDir = new Path(JFS_RANGER_DIR, rangerDirName);
    this.rangerRulePath = new Path(rangerDir, "rules");
    this.refreshThread = Executors.newScheduledThreadPool(1, r -> {
      Thread t = new Thread(r, "JuiceFS Ranger Refresher");
      t.setDaemon(true);
      return t;
    });
    this.pollingIntervalMs = pollingIntervalMs;
  }

  public void start() {
    loadRangerItem();
    refreshThread.scheduleAtFixedRate(this::loadRangerItem, pollingIntervalMs, pollingIntervalMs, TimeUnit.MILLISECONDS);
  }

  /**
   * 1. read rules from jfs
   * 2. choose one client to check ranger admin, if updated, download and save rules to jfs
   */
  public void loadRangerItem() {
    RangerRules rangerRules = null;
    // try to load rules from jfs
    try {
      rangerRules = loadRangerRules();
    } catch (IOException e) {
      LOG.debug("Load ranger rules failed", e);
    }

    if (rangerRules != null) {
      if (updateRules(rangerRules.getPolicies(), rangerRules.getTags(), rangerRules.getRoles())) {
        LOG.info("Ranger rules has been updated, use new rules from juicefs");
      }
    }

    boolean checkUpdate = checkUpdate(pollingIntervalMs);
    // load rules from ranger admin
    if (rangerRules == null || checkUpdate) {
      ServicePolicies policiesFromRanger = null;
      ServiceTags tagsFromRanger = null;
      RangerRoles rolesFromRanger = null;
      try {
        policiesFromRanger = rangerAdmin.getServicePoliciesIfUpdated(lastKnownPolicyVersion, lastPolicyActivationTimeInMillis);
        tagsFromRanger = rangerAdmin.getServiceTagsIfUpdated(lastKnownTagVersion, lastTagActivationTimeInMillis);
        rolesFromRanger = rangerAdmin.getRolesIfUpdated(lastKnownRoleVersion, lastRoleActivationTimeInMillis);
      } catch (RangerServiceNotFoundException e) {
        LOG.warn("Ranger service not found", e);
      } catch (Exception e) {
        LOG.warn("Load policies from ranger failed", e);
      }
      if (updateRules(policiesFromRanger, tagsFromRanger, rolesFromRanger)) {
        if (checkUpdate) {
          try {
            ServicePolicies p = rangerRules != null ? rangerRules.getPolicies() : null;
            ServiceTags t = rangerRules != null ? rangerRules.getTags() : null;
            RangerRoles r = rangerRules != null ? rangerRules.getRoles() : null;
            if (policiesFromRanger != null) {
              LOG.info("ServicePolicies updated from Ranger Admin");
              p = policiesFromRanger;
            }
            if (tagsFromRanger != null) {
              LOG.info("ServiceTags updated from Ranger Admin");
              t = tagsFromRanger;
            }
            if (rolesFromRanger != null) {
              LOG.info("RangerRoles updated from Ranger Admin");
              r = rolesFromRanger;
            }
            saveRangerRules(new RangerRules(p, t, r));
          } catch (IOException e) {
            LOG.warn("Save rules to juicefs failed", e);
          }
        }
      }
    }
  }

  private boolean checkUpdate(long pollingIntervalMs) {
    try {
      boolean exists = fs.exists(rangerDir);
      if (!exists) {
        fs.mkdirs(rangerDir);
      }
      FileStatus[] lockFiles = fs.listStatus(rangerDir, path -> {
        String name = path.getName();
        return name.endsWith(".lock");
      });
      String prefix = String.valueOf((System.currentTimeMillis() / pollingIntervalMs) * pollingIntervalMs);
      Path lockPath = new Path(rangerDir, prefix + ".lock");
      if (lockFiles == null || lockFiles.length == 0) {
        try (FSDataOutputStream ignore = fs.create(lockPath, false)) {
          return true;
        }
      } else {
        if (lockFiles.length > 1) {
          Arrays.sort(lockFiles, Comparator.comparing(o -> o.getPath().getName()));
        }
        if (lockFiles[lockFiles.length - 1].getPath().getName().compareTo(lockPath.getName()) >= 0) {
          return false;
        }
        try (FSDataOutputStream ignore = fs.create(lockPath, false)) {
          for (FileStatus lockFile : lockFiles) {
            fs.delete(lockFile.getPath(), false);
          }
          return true;
        }
      }
    } catch (FileAlreadyExistsException ignored) {
      return false;
    }
    catch (IOException e) {
      LOG.warn("Check update failed", e);
      return false;
    }
  }

  private void saveRangerRules(RangerRules rules) throws IOException {
    String rulesJson = gson.toJson(rules, RangerRules.class);
    byte[] bytes = rulesJson.getBytes();
    try (FSDataOutputStream out = fs.create(rangerRulePath)) {
      out.write(bytes);
    } catch (FileNotFoundException e) {
      fs.mkdirs(rangerRulePath.getParent());
      try (FSDataOutputStream out = fs.create(rangerRulePath)) {
        out.write(bytes);
      }
    }
  }

  private RangerRules loadRangerRules() throws IOException {
    FileStatus fileStatus = fs.getFileStatus(rangerRulePath);
    long mtime = fileStatus.getModificationTime();
    if (lastMtime == mtime) {
      return null;
    }
    try (FSDataInputStream in = fs.open(rangerRulePath)) {
      byte[] bytes = new byte[(int) fileStatus.getLen()];
      in.readFully(bytes);
      String rulesJson = new String(bytes);
      RangerRules rangerRules = gson.fromJson(rulesJson, RangerRules.class);
      lastMtime = mtime;
      return rangerRules;
    }
  }

  private boolean updateRules(ServicePolicies newSvcPolicies, ServiceTags newTags, RangerRoles newRangerRoles) {
    boolean updated = false;
    if (newSvcPolicies != null) {
      long policyVersion = newSvcPolicies.getPolicyVersion() == null ? -1 : newSvcPolicies.getPolicyVersion();
      if (lastKnownPolicyVersion != policyVersion) {
        plugIn.setPolicies(newSvcPolicies);
        lastKnownPolicyVersion = policyVersion;
        lastPolicyActivationTimeInMillis = System.currentTimeMillis();
        updated = true;
      }
    }
    if (newTags != null) {
      long tagVersion = newTags.getTagVersion() == null ? -1 : newTags.getTagVersion();
      if (lastKnownTagVersion != tagVersion) {
        RangerTagEnricher tagEnricher = plugIn.getTagEnricher();
        if (tagEnricher != null) {
          tagEnricher.setServiceTags(newTags);
        }
        lastKnownTagVersion = tagVersion;
        lastTagActivationTimeInMillis = System.currentTimeMillis();
        updated = true;
      }
    }
    if (newRangerRoles != null) {
      long roleVersion = newRangerRoles.getRoleVersion() == null ? -1 : newRangerRoles.getRoleVersion();
      if (lastKnownRoleVersion != roleVersion) {
        plugIn.setRoles(newRangerRoles);
        lastKnownRoleVersion = roleVersion;
        lastRoleActivationTimeInMillis = System.currentTimeMillis();
        updated = true;
      }
    }
    return updated;
  }

  public void stop() {
    refreshThread.shutdownNow();
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerConfig.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.permission;

public class RangerConfig {

  public RangerConfig(String rangerRestUrl, String serviceName, long pollIntervalMs) {
    this.rangerRestUrl = rangerRestUrl;
    this.serviceName = serviceName;
    this.pollIntervalMs = pollIntervalMs;
  }

  private String rangerRestUrl;

  private String serviceName;

  private long pollIntervalMs;

  private String impl;

  public String getRangerRestUrl() {
    return rangerRestUrl;
  }

  public void setRangerRestUrl(String rangerRestUrl) {
    this.rangerRestUrl = rangerRestUrl;
  }

  public String getServiceName() {
    return serviceName;
  }

  public void setServiceName(String serviceName) {
    this.serviceName = serviceName;
  }

  public long getPollIntervalMs() {
    return pollIntervalMs;
  }

  public void setPollIntervalMs(long pollIntervalMs) {
    this.pollIntervalMs = pollIntervalMs;
  }

  public void setImpl(String impl) {
    this.impl = impl;
  }

  public String getImpl() {
    return impl;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerJfsAccessRequest.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.permission;

import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl;

import java.util.Date;
import java.util.Set;

class RangerJfsAccessRequest extends RangerAccessRequestImpl {

  RangerJfsAccessRequest(String path, String pathOwner, String accessType, String action, String user,
                         Set<String> groups) {
    setResource(new RangerJfsResource(path, pathOwner));
    setAccessType(accessType);
    setUser(user);
    setUserGroups(groups);
    setAccessTime(new Date());
    setAction(action);
    setForwardedAddresses(null);
  }

}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerJfsPlugin.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.permission;

import io.juicefs.utils.ReflectionUtil;
import org.apache.hadoop.fs.FileSystem;
import org.apache.ranger.admin.client.RangerAdminClient;
import org.apache.ranger.authorization.hadoop.config.RangerPluginConfig;
import org.apache.ranger.plugin.service.RangerBasePlugin;
import org.apache.ranger.plugin.service.RangerChainedPlugin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

public class RangerJfsPlugin extends RangerBasePlugin {
  private static final Logger LOG = LoggerFactory.getLogger(RangerJfsPlugin.class);

  private FileSystem fs;
  private String rangerUrl;
  private RangerAdminRefresher refresher;
  private long pollingIntervalMs;

  public RangerJfsPlugin(FileSystem fs, String serviceName, String rangerUrl, long pollingIntervalMs) {
    super(new RangerPluginCfg("hdfs", serviceName, "jfs", null, null, null));
    this.fs = fs;
    this.rangerUrl = rangerUrl;
    RangerPluginConfig config = getConfig();
    config.addResource(fs.getConf());
    this.pollingIntervalMs = pollingIntervalMs;
  }

  @Override
  public void init() {
    cleanup();
    RangerAdminClient admin = createAdminClient(getConfig());
    refresher = new RangerAdminRefresher(this, admin, fs, rangerUrl, pollingIntervalMs);
    refresher.start();
    List<RangerChainedPlugin> chainedPlugins = null;
    try {
      chainedPlugins = (List<RangerChainedPlugin>) ReflectionUtil.getField(RangerBasePlugin.class.getName(), "chainedPlugins", this);
    } catch (Exception e) {
      LOG.warn("Get field \"chainedPlugins\" failed", e);
    }
    if (chainedPlugins != null) {
      for (RangerChainedPlugin plugin : chainedPlugins) {
        plugin.init();
      }
    }
  }

  @Override
  public void cleanup() {
    super.cleanup();
    if (refresher != null) {
      refresher.stop();
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerJfsResource.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.permission;

import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl;

class RangerJfsResource extends RangerAccessResourceImpl {
  RangerJfsResource(String path, String owner) {
    setValue("path", path);
    setOwnerUser(owner);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerPermissionChecker.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.permission;

import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.security.AccessControlException;
import org.apache.ranger.plugin.policyengine.RangerAccessResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
 * for auth checker
 *
 * @author ming.li2
 **/
public class RangerPermissionChecker {

  private static final Logger LOG = LoggerFactory.getLogger(RangerPermissionChecker.class);

  private static final Map<String, RangerPermissionChecker> pcs = new ConcurrentHashMap<>();
  private static final Map<String, Set<Long>> runningInstance = new HashMap<>();

  private final HashMap<FsAction, Set<String>> fsAction2ActionMapper = new HashMap<FsAction, Set<String>>() {
    {
      put(FsAction.NONE, new HashSet<>());
      put(FsAction.ALL, Sets.newHashSet("read", "write", "execute"));
      put(FsAction.READ, Sets.newHashSet("read"));
      put(FsAction.READ_WRITE, Sets.newHashSet("read", "write"));
      put(FsAction.READ_EXECUTE, Sets.newHashSet("read", "execute"));
      put(FsAction.WRITE, Sets.newHashSet("write"));
      put(FsAction.WRITE_EXECUTE, Sets.newHashSet("write", "execute"));
      put(FsAction.EXECUTE, Sets.newHashSet("execute"));
    }
  };

  private final FileSystem superGroupFileSystem;
  private final RangerJfsPlugin rangerPlugin;

  public RangerPermissionChecker(FileSystem superGroupFileSystem, RangerConfig config) {
    this.superGroupFileSystem = superGroupFileSystem;
    rangerPlugin = new RangerJfsPlugin(superGroupFileSystem, config.getServiceName(), config.getRangerRestUrl(), config.getPollIntervalMs());
    rangerPlugin.getConfig().set("ranger.plugin.hdfs.service.name", config.getServiceName());
    rangerPlugin.getConfig().set("ranger.plugin.hdfs.policy.rest.url", config.getRangerRestUrl());
    // for test use
    if (config.getImpl() != null) {
      rangerPlugin.getConfig().set("ranger.plugin.hdfs.policy.source.impl", config.getImpl());
    }
    rangerPlugin.getConfig().setIsFallbackSupported(true);
    rangerPlugin.init();
  }

  public static RangerPermissionChecker acquire(String volName, long handle, FileSystem superGroupFileSystem, RangerConfig config) throws IOException {
    synchronized (runningInstance) {
      if (!runningInstance.containsKey(volName)) {
        if (pcs.containsKey(volName)) {
          throw new IOException("RangerPermissionChecker for volume: " + volName + " is already created, but no running instance found.");
        }
        RangerPermissionChecker pc = new RangerPermissionChecker(superGroupFileSystem, config);
        pcs.put(volName, pc);
        Set<Long> handles = new HashSet<>();
        handles.add(handle);
        runningInstance.put(volName, handles);
        return pc;
      } else {
        RangerPermissionChecker pc = pcs.get(volName);
        if (pc == null) {
          throw new IOException("RangerPermissionChecker for volume: " + volName + " is already created, but no instance found.");
        }
        runningInstance.get(volName).add(handle);
        return pc;
      }
    }
  }

  public static void release(String volName, long handle) {
    if (handle <= 0) {
      return;
    }
    synchronized (runningInstance) {
      if (!runningInstance.containsKey(volName)) {
        return;
      }
      Set<Long> handles = runningInstance.get(volName);
      boolean removed = handles.remove(handle);
      if (!removed) {
        return;
      }
      if (handles.size() == 0) {
        RangerPermissionChecker pc = pcs.remove(volName);
        pc.cleanUp();
        runningInstance.remove(volName);
      }
    }
  }

  public boolean checkPermission(Path path, boolean checkOwner, FsAction ancestorAccess, FsAction parentAccess,
                                 FsAction access, String operationName, String user, Set<String> groups) throws IOException {
    RangerPermissionContext context = new RangerPermissionContext(user, groups, operationName);
    PathObj obj = path2Obj(path);

    boolean fallback = true;
    AuthzStatus authzStatus = AuthzStatus.ALLOW;

    if (access != null && parentAccess != null
        && parentAccess.implies(FsAction.WRITE) && obj.parent != null && obj.current != null && obj.parent.getPermission().getStickyBit()) {
      if (!StringUtils.equals(obj.parent.getOwner(), user) && !StringUtils.equals(obj.current.getOwner(), user)) {
        authzStatus = AuthzStatus.NOT_DETERMINED;
      }
    }

    if (authzStatus == AuthzStatus.ALLOW && ancestorAccess != null && obj.ancestor != null) {
      authzStatus = isAccessAllowed(obj.ancestor, ancestorAccess, context);
      if (checkResult(authzStatus, user, ancestorAccess.toString(), toPathString(obj.ancestor.getPath()))) {
        return fallback;
      }
    }

    if (authzStatus == AuthzStatus.ALLOW && parentAccess != null && obj.parent != null) {
      authzStatus = isAccessAllowed(obj.parent, parentAccess, context);
      if (checkResult(authzStatus, user, parentAccess.toString(), toPathString(obj.parent.getPath()))) {
        return fallback;
      }
    }

    if (authzStatus == AuthzStatus.ALLOW && access != null && obj.current != null) {
      authzStatus = isAccessAllowed(obj.current, access, context);
      if (checkResult(authzStatus, user, access.toString(), toPathString(obj.current.getPath()))) {
        return fallback;
      }
    }

    if (checkOwner) {
      String owner = null;
      if (obj.current != null) {
        owner = obj.current.getOwner();
      }
      if (!user.equals(owner)) {
        throw new AccessControlException(
            assembleExceptionMessage(user, getFirstNonNullAccess(ancestorAccess, parentAccess, access),
                toPathString(obj.current.getPath())));
      }
    }
    // check access by ranger success
    return !fallback;
  }

  public void cleanUp() {
    try {
      rangerPlugin.cleanup();
    } catch (Exception e) {
      LOG.warn("Error when clean up ranger plugin threads.", e);
    }
    try {
      superGroupFileSystem.close();
    } catch (Exception e) {
      LOG.warn("Error when close super group file system.", e);
    }
  }

  private static boolean checkResult(AuthzStatus authzStatus, String user, String action, String path) throws AccessControlException {
    if (authzStatus == AuthzStatus.DENY) {
      throw new AccessControlException(assembleExceptionMessage(user, action, path));
    } else {
      return authzStatus == AuthzStatus.NOT_DETERMINED;
    }
  }

  private static String assembleExceptionMessage(String user, String action, String path) {
    return "Permission denied: user=" + user + ", access=" + action + ", path=\"" + path + "\"";
  }

  private static String getFirstNonNullAccess(FsAction ancestorAccess, FsAction parentAccess, FsAction access) {
    if (access != null) {
      return access.toString();
    }
    if (parentAccess != null) {
      return parentAccess.toString();
    }
    if (ancestorAccess != null) {
      return ancestorAccess.toString();
    }
    return FsAction.EXECUTE.toString();
  }

  private AuthzStatus isAccessAllowed(FileStatus file, FsAction access, RangerPermissionContext context) {
    String path = toPathString(file.getPath());
    Set<String> accessTypes = fsAction2ActionMapper.getOrDefault(access, new HashSet<>());
    String pathOwner = file.getOwner();
    AuthzStatus authzStatus = null;
    for (String accessType : accessTypes) {
      RangerJfsAccessRequest request = new RangerJfsAccessRequest(path, pathOwner, accessType, context.operationName, context.user, context.userGroups);
      LOG.debug(request.toString());
      RangerAccessResult result = rangerPlugin.isAccessAllowed(request);
      if (result != null) {
        LOG.debug(result.toString());
      }
      if (result == null || !result.getIsAccessDetermined()) {
        authzStatus = AuthzStatus.NOT_DETERMINED;
      } else if (!result.getIsAllowed()) {
        authzStatus = AuthzStatus.DENY;
        break;
      } else {
        if (!AuthzStatus.NOT_DETERMINED.equals(authzStatus)) {
          authzStatus = AuthzStatus.ALLOW;
        }
      }

    }
    if (authzStatus == null) {
      authzStatus = AuthzStatus.NOT_DETERMINED;
    }
    return authzStatus;
  }

  private enum AuthzStatus {ALLOW, DENY, NOT_DETERMINED}

  ;

  private static String toPathString(Path path) {
    return path.toUri().getPath();
  }

  private PathObj path2Obj(Path path) throws IOException {

    FileStatus current = getIfExist(path);
    FileStatus parent = getIfExist(path.getParent());
    FileStatus ancestor = getAncestor(path);

    return new PathObj(ancestor, parent, current);
  }

  private FileStatus getIfExist(Path path) throws IOException {
    try {
      if (path != null) {
        return superGroupFileSystem.getFileStatus(path);
      }
    } catch (FileNotFoundException ignored) {
    }
    return null;
  }

  public FileStatus getAncestor(Path path) throws IOException {
    if (path.getParent() != null) {
      return getIfExist(path.getParent());
    }
    path = path.getParent();
    FileStatus tmp = null;
    while (path != null && tmp == null) {
      tmp = getIfExist(path);
      path = path.getParent();
    }
    return tmp;
  }

  public static class PathObj {

    FileStatus ancestor = null;

    FileStatus parent = null;

    FileStatus current = null;

    public PathObj(FileStatus ancestor, FileStatus parent, FileStatus current) {
      this.ancestor = ancestor;
      this.parent = parent;
      this.current = current;
    }
  }

}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerPermissionContext.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.permission;

import java.util.Set;

public class RangerPermissionContext {

  public final String user;

  public final Set<String> userGroups;

  public final String operationName;

  public RangerPermissionContext(String user, Set<String> groups, String operationName) {
    this.user = user;
    this.userGroups = groups;
    this.operationName = operationName;
  }

}


================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerPluginCfg.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.permission;

import org.apache.ranger.authorization.hadoop.config.RangerConfiguration;
import org.apache.ranger.authorization.hadoop.config.RangerPluginConfig;
import org.apache.ranger.plugin.policyengine.RangerPolicyEngineOptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;

public class RangerPluginCfg extends RangerPluginConfig {
  private static final Logger LOG = LoggerFactory.getLogger(RangerPluginCfg.class);

  @Override
  public boolean addResourceIfReadable(String aResourceName) {
    URL fUrl = this.getFileLocation(aResourceName);
    if (fUrl != null) {
      try {
        this.addResource(fUrl);
      } catch (Exception e) {
        LOG.error("Unable to load the resource name [" + aResourceName + "]. Ignoring the resource:" + fUrl);
      }
    }
    return true;
  }

  public static boolean isEmpty(String str) {
    return str == null || str.length() == 0;
  }

  private URL getFileLocation(String fileName) {
    URL lurl = null;
    if (!isEmpty(fileName)) {
      lurl = RangerConfiguration.class.getClassLoader().getResource(fileName);

      if (lurl == null ) {
        lurl = RangerConfiguration.class.getClassLoader().getResource("/" + fileName);
      }

      if (lurl == null ) {
        File f = new File(fileName);
        if (f.exists()) {
          try {
            lurl=f.toURI().toURL();
          } catch (MalformedURLException e) {
            LOG.error("Unable to load the resource name [" + fileName + "]. Ignoring the resource:" + f.getPath());
          }
        } else {
          if(LOG.isDebugEnabled()) {
            LOG.debug("Conf file path " + fileName + " does not exists");
          }
        }
      }
    }
    return lurl;
  }

  public RangerPluginCfg(String serviceType, String serviceName, String appId, String clusterName, String clusterType, RangerPolicyEngineOptions policyEngineOptions) {
    super(serviceType, serviceName, appId, clusterName, clusterType, policyEngineOptions);
  }
}

================================================
FILE: sdk/java/src/main/java/io/juicefs/permission/RangerRules.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.permission;

import org.apache.ranger.plugin.util.RangerRoles;
import org.apache.ranger.plugin.util.ServicePolicies;
import org.apache.ranger.plugin.util.ServiceTags;

import java.io.Serializable;

public class RangerRules implements Serializable {
  private ServicePolicies policies;
  private ServiceTags tags;
  private RangerRoles roles;

  public RangerRules() {
  }

  public RangerRules(ServicePolicies policies, ServiceTags tags, RangerRoles roles) {
    this.policies = policies;
    this.tags = tags;
    this.roles = roles;
  }

  public ServicePolicies getPolicies() {
    return policies;
  }

  public void setPolicies(ServicePolicies policies) {
    this.policies = policies;
  }

  public ServiceTags getTags() {
    return tags;
  }

  public void setTags(ServiceTags tags) {
    this.tags = tags;
  }

  public RangerRoles getRoles() {
    return roles;
  }

  public void setRoles(RangerRoles roles) {
    this.roles = roles;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/tools/RangerDownloader.java
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.tools;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import io.juicefs.JuiceFileSystemImpl;
import io.juicefs.Main;
import io.juicefs.permission.RangerPermissionChecker;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;

import java.io.IOException;
import java.net.URI;

@Parameters(commandDescription = "Download policies from ranger and save to JuiceFS")
public class RangerDownloader extends Main.Command {

  @Parameter(names = {"--fs"}, description = "JuiceFileSystem: jfs://{JFS_VOL_NAME}", required = true)
  private String fs;

  @Parameter(names = {"--keytab"}, description = "local keytab file location")
  private String keytab;

  @Parameter(names = {"--principal"}, description = "principal allowed access ranger admin")
  private String principal;

  @Override
  public void init() throws IOException {

  }

  @Override
  public void run() throws IOException {
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    if (!ugi.hasKerberosCredentials() && (keytab == null || principal == null)) {
      throw new IllegalArgumentException("No kerberos credential was found! Parameter \"--keytab\" and \"--principal\" must be provided.");
    }
    if (keytab != null) {
      UserGroupInformation.loginUserFromKeytab(principal, keytab);
    }
    Configuration cfg = new Configuration();
    JuiceFileSystemImpl jfs = new JuiceFileSystemImpl(true);
    jfs.initialize(URI.create(fs), cfg);
    new RangerPermissionChecker(jfs, jfs.checkAndGetRangerParams(cfg));
  }

  @Override
  public void close() throws IOException {

  }

  @Override
  public String getCommand() {
    return "ranger";
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/AclTransformation.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.utils;

import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import org.apache.hadoop.fs.permission.*;

import java.io.IOException;
import java.util.*;

import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS;
import static org.apache.hadoop.fs.permission.AclEntryScope.DEFAULT;
import static org.apache.hadoop.fs.permission.AclEntryType.*;

/**
 * AclTransformation defines the operations that can modify an ACL.  All ACL
 * modifications take as input an existing ACL and apply logic to add new
 * entries, modify existing entries or remove old entries.  Some operations also
 * accept an ACL spec: a list of entries that further describes the requested
 * change.  Different operations interpret the ACL spec differently.  In the
 * case of adding an ACL to an inode that previously did not have one, the
 * existing ACL can be a "minimal ACL" containing exactly 3 entries for owner,
 * group and other, all derived from the {@link FsPermission} bits.
 * <p>
 * The algorithms implemented here require sorted lists of ACL entries.  For any
 * existing ACL, it is assumed that the entries are sorted.  This is because all
 * ACL creation and modification is intended to go through these methods, and
 * they all guarantee correct sort order in their outputs.  However, an ACL spec
 * is considered untrusted user input, so all operations pre-sort the ACL spec as
 * the first step.
 */
public final class AclTransformation {
  private static final int MAX_ENTRIES = 32;

  public static List<AclEntry> filterAclEntriesByAclSpec(List<AclEntry> existingAcl, List<AclEntry> inAclSpec) throws AclException {
    ValidatedAclSpec aclSpec = new ValidatedAclSpec(inAclSpec);
    ArrayList<AclEntry> aclBuilder = Lists.newArrayListWithCapacity(MAX_ENTRIES);
    EnumMap<AclEntryScope, AclEntry> providedMask = Maps.newEnumMap(AclEntryScope.class);
    EnumSet<AclEntryScope> maskDirty = EnumSet.noneOf(AclEntryScope.class);
    EnumSet<AclEntryScope> scopeDirty = EnumSet.noneOf(AclEntryScope.class);
    for (AclEntry existingEntry : existingAcl) {
      if (aclSpec.containsKey(existingEntry)) {
        scopeDirty.add(existingEntry.getScope());
        if (existingEntry.getType() == MASK) {
          maskDirty.add(existingEntry.getScope());
        }
      } else {
        if (existingEntry.getType() == MASK) {
          providedMask.put(existingEntry.getScope(), existingEntry);
        } else {
          aclBuilder.add(existingEntry);
        }
      }
    }
    copyDefaultsIfNeeded(aclBuilder);
    calculateMasks(aclBuilder, providedMask, maskDirty, scopeDirty);
    return buildAndValidateAcl(aclBuilder);
  }

  public static List<AclEntry> mergeAclEntries(List<AclEntry> existingAcl, List<AclEntry> inAclSpec) throws AclException {
    ValidatedAclSpec aclSpec = new ValidatedAclSpec(inAclSpec);
    ArrayList<AclEntry> aclBuilder = Lists.newArrayListWithCapacity(MAX_ENTRIES);
    List<AclEntry> foundAclSpecEntries = Lists.newArrayListWithCapacity(MAX_ENTRIES);
    EnumMap<AclEntryScope, AclEntry> providedMask = Maps.newEnumMap(AclEntryScope.class);
    EnumSet<AclEntryScope> maskDirty = EnumSet.noneOf(AclEntryScope.class);
    EnumSet<AclEntryScope> scopeDirty = EnumSet.noneOf(AclEntryScope.class);
    for (AclEntry existingEntry : existingAcl) {
      AclEntry aclSpecEntry = aclSpec.findByKey(existingEntry);
      if (aclSpecEntry != null) {
        foundAclSpecEntries.add(aclSpecEntry);
        scopeDirty.add(aclSpecEntry.getScope());
        if (aclSpecEntry.getType() == MASK) {
          providedMask.put(aclSpecEntry.getScope(), aclSpecEntry);
          maskDirty.add(aclSpecEntry.getScope());
        } else {
          aclBuilder.add(aclSpecEntry);
        }
      } else {
        if (existingEntry.getType() == MASK) {
          providedMask.put(existingEntry.getScope(), existingEntry);
        } else {
          aclBuilder.add(existingEntry);
        }
      }
    }
    // ACL spec entries that were not replacements are new additions.
    for (AclEntry newEntry : aclSpec) {
      if (Collections.binarySearch(foundAclSpecEntries, newEntry, ACL_ENTRY_COMPARATOR) < 0) {
        scopeDirty.add(newEntry.getScope());
        if (newEntry.getType() == MASK) {
          providedMask.put(newEntry.getScope(), newEntry);
          maskDirty.add(newEntry.getScope());
        } else {
          aclBuilder.add(newEntry);
        }
      }
    }
    copyDefaultsIfNeeded(aclBuilder);
    calculateMasks(aclBuilder, providedMask, maskDirty, scopeDirty);
    return buildAndValidateAcl(aclBuilder);
  }

  public static List<AclEntry> replaceAclEntries(List<AclEntry> existingAcl, List<AclEntry> inAclSpec) throws AclException {
    ValidatedAclSpec aclSpec = new ValidatedAclSpec(inAclSpec);
    ArrayList<AclEntry> aclBuilder = Lists.newArrayListWithCapacity(MAX_ENTRIES);
    // Replacement is done separately for each scope: access and default.
    EnumMap<AclEntryScope, AclEntry> providedMask = Maps.newEnumMap(AclEntryScope.class);
    EnumSet<AclEntryScope> maskDirty = EnumSet.noneOf(AclEntryScope.class);
    EnumSet<AclEntryScope> scopeDirty = EnumSet.noneOf(AclEntryScope.class);
    for (AclEntry aclSpecEntry : aclSpec) {
      scopeDirty.add(aclSpecEntry.getScope());
      if (aclSpecEntry.getType() == MASK) {
        providedMask.put(aclSpecEntry.getScope(), aclSpecEntry);
        maskDirty.add(aclSpecEntry.getScope());
      } else {
        aclBuilder.add(aclSpecEntry);
      }
    }
    // Copy existing entries if the scope was not replaced.
    for (AclEntry existingEntry : existingAcl) {
      if (!scopeDirty.contains(existingEntry.getScope())) {
        if (existingEntry.getType() == MASK) {
          providedMask.put(existingEntry.getScope(), existingEntry);
        } else {
          aclBuilder.add(existingEntry);
        }
      }
    }
    copyDefaultsIfNeeded(aclBuilder);
    calculateMasks(aclBuilder, providedMask, maskDirty, scopeDirty);
    return buildAndValidateAcl(aclBuilder);
  }

  private AclTransformation() {
  }

  public static final Comparator<AclEntry> ACL_ENTRY_COMPARATOR = new Comparator<AclEntry>() {
    @Override
    public int compare(AclEntry entry1, AclEntry entry2) {
      return ComparisonChain.start().compare(entry1.getScope(), entry2.getScope(), Ordering.explicit(ACCESS, DEFAULT)).compare(entry1.getType(), entry2.getType(), Ordering.explicit(USER, GROUP, MASK, OTHER)).compare(entry1.getName(), entry2.getName(), Ordering.natural().nullsFirst()).result();
    }
  };

  public static List<AclEntry> buildAndValidateAcl(ArrayList<AclEntry> aclBuilder) throws AclException {
    aclBuilder.trimToSize();
    Collections.sort(aclBuilder, ACL_ENTRY_COMPARATOR);
    // Full iteration to check for duplicates and invalid named entries.
    AclEntry prevEntry = null;
    for (AclEntry entry : aclBuilder) {
      if (prevEntry != null && ACL_ENTRY_COMPARATOR.compare(prevEntry, entry) == 0) {
        throw new AclException("Invalid ACL: multiple entries with same scope, type and name.");
      }
      if (entry.getName() != null && (entry.getType() == MASK || entry.getType() == OTHER)) {
        throw new AclException("Invalid ACL: this entry type must not have a name: " + entry + ".");
      }
      prevEntry = entry;
    }

    ScopedAclEntries scopedEntries = new ScopedAclEntries(aclBuilder);
    checkMaxEntries(scopedEntries);

    // Search for the required base access entries.  If there is a default ACL,
    // then do the same check on the default entries.
    for (AclEntryType type : EnumSet.of(USER, GROUP, OTHER)) {
      AclEntry accessEntryKey = new AclEntry.Builder().setScope(ACCESS).setType(type).build();
      if (Collections.binarySearch(scopedEntries.getAccessEntries(), accessEntryKey, ACL_ENTRY_COMPARATOR) < 0) {
        throw new AclException("Invalid ACL: the user, group and other entries are required.");
      }
      if (!scopedEntries.getDefaultEntries().isEmpty()) {
        AclEntry defaultEntryKey = new AclEntry.Builder().setScope(DEFAULT).setType(type).build();
        if (Collections.binarySearch(scopedEntries.getDefaultEntries(), defaultEntryKey, ACL_ENTRY_COMPARATOR) < 0) {
          throw new AclException("Invalid default ACL: the user, group and other entries are required.");
        }
      }
    }
    return Collections.unmodifiableList(aclBuilder);
  }

  private static void checkMaxEntries(ScopedAclEntries scopedEntries) throws AclException {
    List<AclEntry> accessEntries = scopedEntries.getAccessEntries();
    List<AclEntry> defaultEntries = scopedEntries.getDefaultEntries();
    if (accessEntries.size() > MAX_ENTRIES) {
      throw new AclException("Invalid ACL: ACL has " + accessEntries.size() + " access entries, which exceeds maximum of " + MAX_ENTRIES + ".");
    }
    if (defaultEntries.size() > MAX_ENTRIES) {
      throw new AclException("Invalid ACL: ACL has " + defaultEntries.size() + " default entries, which exceeds maximum of " + MAX_ENTRIES + ".");
    }
  }

  private static void calculateMasks(List<AclEntry> aclBuilder, EnumMap<AclEntryScope, AclEntry> providedMask, EnumSet<AclEntryScope> maskDirty, EnumSet<AclEntryScope> scopeDirty) throws AclException {
    EnumSet<AclEntryScope> scopeFound = EnumSet.noneOf(AclEntryScope.class);
    EnumMap<AclEntryScope, FsAction> unionPerms = Maps.newEnumMap(AclEntryScope.class);
    EnumSet<AclEntryScope> maskNeeded = EnumSet.noneOf(AclEntryScope.class);
    // Determine which scopes are present, which scopes need a mask, and the
    // union of group class permissions in each scope.
    for (AclEntry entry : aclBuilder) {
      scopeFound.add(entry.getScope());
      if (entry.getType() == GROUP || entry.getName() != null) {
        FsAction scopeUnionPerms = unionPerms.get(entry.getScope());
        if (scopeUnionPerms == null) {
          scopeUnionPerms = FsAction.NONE;
        }
        unionPerms.put(entry.getScope(), scopeUnionPerms.or(entry.getPermission()));
      }
      if (entry.getName() != null) {
        maskNeeded.add(entry.getScope());
      }
    }
    // Add mask entry if needed in each scope.
    for (AclEntryScope scope : scopeFound) {
      if (!providedMask.containsKey(scope) && maskNeeded.contains(scope) && maskDirty.contains(scope)) {
        // Caller explicitly removed mask entry, but it's required.
        throw new AclException("Invalid ACL: mask is required and cannot be deleted.");
      } else if (providedMask.containsKey(scope) && (!scopeDirty.contains(scope) || maskDirty.contains(scope))) {
        // Caller explicitly provided new mask, or we are preserving the existing
        // mask in an unchanged scope.
        aclBuilder.add(providedMask.get(scope));
      } else if (maskNeeded.contains(scope) || providedMask.containsKey(scope)) {
        // Otherwise, if there are maskable entries present, or the ACL
        // previously had a mask, then recalculate a mask automatically.
        aclBuilder.add(new AclEntry.Builder().setScope(scope).setType(MASK).setPermission(unionPerms.get(scope)).build());
      }
    }
  }

  private static void copyDefaultsIfNeeded(List<AclEntry> aclBuilder) {
    Collections.sort(aclBuilder, ACL_ENTRY_COMPARATOR);
    ScopedAclEntries scopedEntries = new ScopedAclEntries(aclBuilder);
    if (!scopedEntries.getDefaultEntries().isEmpty()) {
      List<AclEntry> accessEntries = scopedEntries.getAccessEntries();
      List<AclEntry> defaultEntries = scopedEntries.getDefaultEntries();
      List<AclEntry> copiedEntries = Lists.newArrayListWithCapacity(3);
      for (AclEntryType type : EnumSet.of(USER, GROUP, OTHER)) {
        AclEntry defaultEntryKey = new AclEntry.Builder().setScope(DEFAULT).setType(type).build();
        int defaultEntryIndex = Collections.binarySearch(defaultEntries, defaultEntryKey, ACL_ENTRY_COMPARATOR);
        if (defaultEntryIndex < 0) {
          AclEntry accessEntryKey = new AclEntry.Builder().setScope(ACCESS).setType(type).build();
          int accessEntryIndex = Collections.binarySearch(accessEntries, accessEntryKey, ACL_ENTRY_COMPARATOR);
          if (accessEntryIndex >= 0) {
            copiedEntries.add(new AclEntry.Builder().setScope(DEFAULT).setType(type).setPermission(accessEntries.get(accessEntryIndex).getPermission()).build());
          }
        }
      }
      // Add all copied entries when done to prevent potential issues with binary
      // search on a modified aclBulider during the main loop.
      aclBuilder.addAll(copiedEntries);
    }
  }

  private static final class ValidatedAclSpec implements Iterable<AclEntry> {
    private final List<AclEntry> aclSpec;

    /**
     * Creates a ValidatedAclSpec by pre-validating and sorting the given ACL
     * entries.  Pre-validation checks that it does not exceed the maximum
     * entries.  This check is performed before modifying the ACL, and it's
     * actually insufficient for enforcing the maximum number of entries.
     * Transformation logic can create additional entries automatically,such as
     * the mask and some of the default entries, so we also need additional
     * checks during transformation.  The up-front check is still valuable here
     * so that we don't run a lot of expensive transformation logic while
     * holding the namesystem lock for an attacker who intentionally sent a huge
     * ACL spec.
     *
     * @param aclSpec List<AclEntry> containing unvalidated input ACL spec
     * @throws AclException if validation fails
     */
    public ValidatedAclSpec(List<AclEntry> aclSpec) throws AclException {
      Collections.sort(aclSpec, ACL_ENTRY_COMPARATOR);
      checkMaxEntries(new ScopedAclEntries(aclSpec));
      this.aclSpec = aclSpec;
    }

    /**
     * Returns true if this contains an entry matching the given key.  An ACL
     * entry's key consists of scope, type and name (but not permission).
     *
     * @param key AclEntry search key
     * @return boolean true if found
     */
    public boolean containsKey(AclEntry key) {
      return Collections.binarySearch(aclSpec, key, ACL_ENTRY_COMPARATOR) >= 0;
    }

    /**
     * Returns the entry matching the given key or null if not found.  An ACL
     * entry's key consists of scope, type and name (but not permission).
     *
     * @param key AclEntry search key
     * @return AclEntry entry matching the given key or null if not found
     */
    public AclEntry findByKey(AclEntry key) {
      int index = Collections.binarySearch(aclSpec, key, ACL_ENTRY_COMPARATOR);
      if (index >= 0) {
        return aclSpec.get(index);
      }
      return null;
    }

    @Override
    public Iterator<AclEntry> iterator() {
      return aclSpec.iterator();
    }
  }

  public static class AclException extends IOException {
    private static final long serialVersionUID = 1L;

    /**
     * Creates a new AclException.
     *
     * @param message String message
     */
    public AclException(String message) {
      super(message);
    }

    /**
     * Creates a new AclException.
     *
     * @param message String message
     * @param cause   The cause of the exception
     */
    public AclException(String message, Throwable cause) {
      super(message, cause);
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/BgTaskUtil.java
================================================
/*
 * JuiceFS, Copyright 2023 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.utils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

public class BgTaskUtil {
  private static final Logger LOG = LoggerFactory.getLogger(BgTaskUtil.class);

  private static final Map<String, ScheduledExecutorService> bgThreadForName = new HashMap<>(); // volName -> threadpool
  private static final Map<String, Object> tasks = new HashMap<>(); // volName|taskName -> running
  private static final Map<String, Set<Long>> runningInstance = new HashMap<>();

  public static void reset() {
    bgThreadForName.clear();
    tasks.clear();
    runningInstance.clear();
  }

  public static Map<String, ScheduledExecutorService> getBgThreadForName() {
    return bgThreadForName;
  }

  public static Map<String, Set<Long>> getRunningInstance() {
    return runningInstance;
  }

  public static void register(String volName, long handle) {
    if (handle <= 0) {
      return;
    }
    synchronized (runningInstance) {
      LOG.debug("register instance for {}({})", volName, handle);
      if (!runningInstance.containsKey(volName)) {
        Set<Long> handles = new HashSet<>();
        handles.add(handle);
        runningInstance.put(volName, handles);
      } else {
        runningInstance.get(volName).add(handle);
      }
    }
  }

  public static void unregister(String volName, long handle, Runnable cleanupTask) {
    if (handle <= 0) {
      return;
    }
    synchronized (runningInstance) {
      if (!runningInstance.containsKey(volName)) {
        return;
      }
      Set<Long> handles = runningInstance.get(volName);
      boolean removed = handles.remove(handle);
      if (!removed) {
        return;
      }
      LOG.debug("unregister instance for {}({})", volName, handle);
      if (handles.size() == 0) {
        LOG.debug("clean resources for {}", volName);
        ScheduledExecutorService pool = bgThreadForName.remove(volName);
        if (pool != null) {
          pool.shutdownNow();
        }
        stopTrashEmptier(volName);
        tasks.entrySet().removeIf(e -> e.getKey().startsWith(volName + "|"));
        cleanupTask.run();
        runningInstance.remove(volName);
      }
    }
  }

  public  interface Task {
    void run() throws IOException;
  }


  public static void putTask(String volName, String taskName, Task task, long delay, long period, TimeUnit unit) throws IOException {
    synchronized (tasks) {
      String key = volName + "|" + taskName;
      if (!tasks.containsKey(key)) {
        LOG.debug("start task {}", key);
        task.run();
        // build background task thread for volume name
        ScheduledExecutorService pool = bgThreadForName.computeIfAbsent(volName,
            n -> Executors.newScheduledThreadPool(1, r -> {
              Thread thread = new Thread(r, "JuiceFS Background Task");
              thread.setDaemon(true);
              return thread;
            })
        );
        pool.scheduleAtFixedRate(()->{
          try {
            task.run();
          } catch (IOException e) {
            LOG.warn("run {} failed", key, e);
          }
        }, delay, period, unit);
        tasks.put(key, new Object());
      }
    }
  }

  public static void startTrashEmptier(String name, Runnable emptierTask, long delay, TimeUnit unit) {
    synchronized (tasks) {
      String key = name + "|" + "Trash emptier";
      if (!tasks.containsKey(key)) {
        LOG.debug("run trash emptier for {}", name);
        ScheduledExecutorService thread = Executors.newScheduledThreadPool(1, r -> {
          Thread t = new Thread(r, "JuiceFS Trash Emptier");
          t.setDaemon(true);
          return t;
        });
        thread.schedule(emptierTask, delay, unit);
        tasks.put(key, thread);
      }
    }
  }

  private static void stopTrashEmptier(String name) {
    synchronized (tasks) {
      String key = name + "|" + "Trash emptier";
      Object v = tasks.remove(key);
      if (v instanceof ScheduledExecutorService) {
        LOG.debug("close trash emptier for {}", name);
        ((ScheduledExecutorService) v).shutdownNow();
      }
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/BufferPool.java
================================================
package io.juicefs.utils;

import java.lang.ref.WeakReference;
import java.nio.ByteBuffer;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;

/**
 * thread safe
 */
public class BufferPool {

  private static final ConcurrentMap<Integer, Queue<WeakReference<ByteBuffer>>> buffersBySize = new ConcurrentHashMap<>();

  public static ByteBuffer getBuffer(int size) {
    Queue<WeakReference<ByteBuffer>> list = buffersBySize.get(size);
    if (list == null) {
      return ByteBuffer.allocate(size);
    }

    WeakReference<ByteBuffer> ref;
    while ((ref = list.poll()) != null) {
      ByteBuffer b = ref.get();
      if (b != null) {
        return b;
      }
    }

    return ByteBuffer.allocate(size);
  }

  public static void returnBuffer(ByteBuffer buf) {
    buf.clear();
    int size = buf.capacity();
    Queue<WeakReference<ByteBuffer>> list = buffersBySize.get(size);
    if (list == null) {
      list = new ConcurrentLinkedQueue<>();
      Queue<WeakReference<ByteBuffer>> prev = buffersBySize.putIfAbsent(size, list);
      // someone else put a queue in the map before we did
      if (prev != null) {
        list = prev;
      }
    }
    list.add(new WeakReference<>(buf));
  }
}

================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/CallerContextUtil.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.apache.hadoop.ipc.CallerContext;


public class CallerContextUtil {

  public static void setContext(String context) throws Exception {
    CallerContext current = CallerContext.getCurrent();
    CallerContext.Builder builder;
    if (current == null || !current.isContextValid()) {
      builder = new CallerContext.Builder(context);
      CallerContext.setCurrent(builder.build());
    } else if (current.getSignature() == null && !current.getContext().endsWith("_" + context)) {
      builder = new CallerContext.Builder(current.getContext() + "_" + context);
      CallerContext.setCurrent(builder.build());
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/ConsistentHash.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.utils;

import com.google.common.hash.HashFunction;
import com.google.common.hash.Hashing;

import java.util.List;
import java.util.SortedMap;
import java.util.concurrent.ConcurrentSkipListMap;

public class ConsistentHash<T> {

  private final int numberOfVirtualNodeReplicas;
  private final SortedMap<Integer, T> circle = new ConcurrentSkipListMap<>();
  private final HashFunction nodeHash = Hashing.murmur3_32();
  private final HashFunction keyHash = Hashing.murmur3_32();

  public ConsistentHash(int numberOfVirtualNodeReplicas, List<T> nodes) {
    this.numberOfVirtualNodeReplicas = numberOfVirtualNodeReplicas;
    addNode(nodes);
  }

  public void addNode(List<T> nodes) {
    for (T node : nodes) {
      addNode(node);
    }
  }

  public void addNode(T node) {
    for (int i = 0; i < numberOfVirtualNodeReplicas; i++) {
      circle.put(getKetamaHash(i + "" + node), node);
    }
  }

  public void remove(List<T> nodes) {
    for (T node : nodes) {
      remove(node);
    }
  }

  public void remove(T node) {
    for (int i = 0; i < numberOfVirtualNodeReplicas; i++) {
      circle.remove(getKetamaHash(i + "" + node));
    }
  }

  public T get(Object key) {
    if (circle.isEmpty()) {
      return null;
    }
    int hash = getKeyHash(key.toString());
    if (!circle.containsKey(hash)) {
      SortedMap<Integer, T> tailMap = circle.tailMap(hash);
      hash = tailMap.isEmpty() ? circle.firstKey() : tailMap.firstKey();
    }
    return circle.get(hash);
  }

  private int getKeyHash(final String k) {
    return keyHash.hashBytes(k.getBytes()).asInt();
  }

  private int getKetamaHash(final String k) {
    return nodeHash.hashBytes(k.getBytes()).asInt();
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/FsNodesFetcher.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import io.juicefs.JuiceFileSystem;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

public class FsNodesFetcher extends NodesFetcher {
  private static final Logger LOG = LoggerFactory.getLogger(FsNodesFetcher.class);

  private Configuration conf;

  public FsNodesFetcher(String jfsName) {
    super(jfsName);
  }

  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  @Override
  public List<String> fetchNodes(String uri) {
    Path path = new Path(uri);
    try (FileSystem fs = FileSystem.newInstance(path.toUri(), conf);
         FSDataInputStream inputStream = fs.open(path)) {
      return new BufferedReader(new InputStreamReader(inputStream))
          .lines().filter(l->!l.isEmpty()).collect(Collectors.toList());
    } catch (Exception e) {
      LOG.warn("fetch nodes from {} failed", uri, e);
    }
    return null;
  }

  @Override
  protected Set<String> parseNodes(String response) throws Exception {
    return null;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/FsPermissionExtension.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.utils;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.permission.FsPermission;

/**
 * HDFS permission subclass used to indicate an ACL is present and/or that the
 * underlying file/dir is encrypted. The ACL/encrypted bits are not visible
 * directly to users of {@link FsPermission} serialization.  This is
 * done for backwards compatibility in case any existing clients assume the
 * value of FsPermission is in a particular range.
 */
@InterfaceAudience.Private
public class FsPermissionExtension extends FsPermission {
  private final static short ACL_BIT = 1 << 12;
  private final static short ENCRYPTED_BIT = 1 << 13;
  private final boolean aclBit;
  private final boolean encryptedBit;

  /**
   * Constructs a new FsPermissionExtension based on the given FsPermission.
   *
   * @param perm FsPermission containing permission bits
   */
  public FsPermissionExtension(FsPermission perm, boolean hasAcl,
                               boolean isEncrypted) {
    super(perm.toShort());
    aclBit = hasAcl;
    encryptedBit = isEncrypted;
  }

  @Override
  public short toExtendedShort() {
    return (short) (toShort() |
            (aclBit ? ACL_BIT : 0) | (encryptedBit ? ENCRYPTED_BIT : 0));
  }

  public boolean getAclBit() {
    return aclBit;
  }

  @Override
  public boolean getEncryptedBit() {
    return encryptedBit;
  }
}

================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/NodesFetcher.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * fetch calculate nodes of the cluster
 */
public abstract class NodesFetcher {
  private static final Log LOG = LogFactory.getLog(NodesFetcher.class);

  protected File cacheFolder = new File("/tmp/.juicefs");
  protected File cacheFile;
  private String jfsName;

  public NodesFetcher(String jfsName) {
    this.jfsName = jfsName;
    if (!cacheFolder.exists()) {
      cacheFolder.mkdirs();
    }
    cacheFile = new File(cacheFolder, jfsName + ".nodes");
    cacheFolder.setWritable(true, false);
    cacheFolder.setReadable(true, false);
    cacheFolder.setExecutable(true, false);
    cacheFile.setWritable(true, false);
    cacheFile.setReadable(true, false);
    cacheFile.setExecutable(true, false);
  }

  public List<String> fetchNodes(String urls) {
    List<String> result = readCache();

    // refresh local disk cache every 10 mins
    long duration = System.currentTimeMillis() - cacheFile.lastModified();
    if (duration > 10 * 60 * 1000L || result == null) {
      Set<String> nodes = getNodes(urls.split(","));
      if (nodes == null) return result;
      result = new ArrayList<>(nodes);
      cache(result);
    }

    return result;
  }

  public List<String> readCache() {
    try {
      if (!cacheFile.exists()) return null;
      return Files.readAllLines(cacheFile.toPath());
    } catch (IOException e) {
      LOG.warn("read cache failed due to: ", e);
      return null;
    }
  }

  public void cache(List<String> hostnames) {
    File tmpFile = new File(cacheFolder, System.getProperty("user.name") + "-" + jfsName + ".nodes.tmp");
    try (RandomAccessFile writer = new RandomAccessFile(tmpFile, "rws")) {
      tmpFile.setWritable(true, false);
      tmpFile.setReadable(true, false);
      if (hostnames != null) {
        String content = String.join("\n", hostnames);
        writer.write(content.getBytes());
      }
      tmpFile.renameTo(cacheFile);
    } catch (IOException e) {
      LOG.warn("wirte cache failed due to: ", e);
    }
  }

  public Set<String> getNodes(String[] urls) {
    if (urls == null) {
      return null;
    }
    for (String url : urls) {
      try {
        String response = doGet(url);
        if (response == null) {
          continue;
        }
        return parseNodes(response);
      } catch (Throwable e) {
        LOG.warn("fetch from:" + url + " failed, switch to another url", e);
      }
    }
    return null;
  }

  protected abstract Set<String> parseNodes(String response) throws Exception;

  protected String doGet(String url) {
    int timeout = 3; // seconds

    HttpURLConnection con = null;
    try {
      con = (HttpURLConnection) new URL(url).openConnection();
      con.setConnectTimeout(timeout * 1000);
      con.setReadTimeout(timeout * 1000);

      int status = con.getResponseCode();
      if (status != 200) return null;

      BufferedReader in = new BufferedReader(
              new InputStreamReader(con.getInputStream(), StandardCharsets.UTF_8));
      String content = in.lines().collect(Collectors.joining("\n"));
      in.close();
      return content;
    } catch (IOException e) {
      LOG.warn(e);
      return null;
    } finally {
      if (con != null) {
        con.disconnect();
      }
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/NodesFetcherBuilder.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.apache.hadoop.conf.Configuration;

public class NodesFetcherBuilder {
  public static NodesFetcher buildFetcher(String urls, String jfsName, Configuration conf) {
    NodesFetcher fetcher;
    if ((urls.startsWith("http") && urls.contains("cluster/nodes"))
        || "yarn".equals(urls.toLowerCase().trim())) {
      fetcher = new YarnNodesFetcher(jfsName);
    } else if (urls.startsWith("http") && urls.contains("service/presto")) {
      fetcher = new PrestoNodesFetcher(jfsName);
    }  else if (urls.startsWith("http") && urls.contains("/json")) {
      fetcher = new SparkNodesFetcher(jfsName);
    } else if (urls.startsWith("http") && urls.contains("api/v1/applications")) {
      fetcher = new SparkThriftNodesFetcher(jfsName);
    } else {
      fetcher = new FsNodesFetcher(jfsName);
      ((FsNodesFetcher) fetcher).setConf(conf);
    }
    return fetcher;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/PatchUtil.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import javassist.ClassPool;
import javassist.CtClass;
import javassist.CtMethod;
import javassist.NotFoundException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.instrument.ClassDefinition;

public class PatchUtil {
  private static final Logger LOG = LoggerFactory.getLogger(PatchUtil.class);

  public enum PatchType {
    BODY, BEFORE, AFTER
  }

  public static class ClassMethod {
    private String method;
    private String[] params;
    private PatchType[] types;
    private String[] codes;

    public ClassMethod(String method, String[] params, String[] codes, PatchType[] types) {
      if (codes.length != types.length) {
        LOG.error("{} has {} codes, but only {} types", method, codes.length, types.length);
      }
      this.method = method;
      this.params = params;
      this.codes = codes;
      this.types = types;
    }
  }

  public static synchronized void doPatch(String className, ClassMethod[] classMethods) {

    ClassPool classPool = ClassPool.getDefault();
    try {
      CtClass cls = classPool.get(className);

      for (ClassMethod classMethod : classMethods) {
        String method = classMethod.method;

        CtMethod mtd;
        String[] params = classMethod.params;
        if (params != null) {
          CtClass[] cts = new CtClass[params.length];
          for (int i = 0; i < params.length; i++) {
            cts[i] = classPool.get(params[i]);
          }
          mtd = cls.getDeclaredMethod(method, cts);
        } else {
          mtd = cls.getDeclaredMethod(method);
        }

        String[] codes = classMethod.codes;
        PatchType[] types = classMethod.types;
        for (int i = 0; i < codes.length; i++) {
          switch (types[i]) {
            case BODY:
              mtd.setBody(codes[0]);
              break;
            case AFTER:
              mtd.insertAfter(codes[0], true);
              break;
            case BEFORE:
              mtd.insertBefore(codes[0]);
              break;
          }
        }
      }

      RedefineClassAgent.redefineClasses(new ClassDefinition(Class.forName(className), cls.toBytecode()));
      cls.detach();
    } catch (NotFoundException | NoClassDefFoundError ignored) {
    } catch (Throwable e) {
      LOG.warn(String.format("patch %s failed", className), e);
    }
  }

  public static void patchBody(String className, String method, String[] params, String code) {
    doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{code}, new PatchType[]{PatchType.BODY})});
  }

  public static void patchBefore(String className, String method, String[] params, String code) {
    doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{code}, new PatchType[]{PatchType.BEFORE})});
  }

  public static void patchAfter(String className, String method, String[] params, String code) {
    doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{code}, new PatchType[]{PatchType.AFTER})});
  }

  public static void patchBeforeAndAfter(String className, String method, String[] params, String beforeCode, String afterCode) {
    doPatch(className, new ClassMethod[]{new ClassMethod(method, params, new String[]{beforeCode, afterCode}, new PatchType[]{PatchType.BEFORE, PatchType.AFTER})});
  }

}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/PrestoNodesFetcher.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.json.JSONArray;
import org.json.JSONObject;

import java.net.URL;
import java.util.HashSet;
import java.util.Set;

public class PrestoNodesFetcher extends NodesFetcher {

  public PrestoNodesFetcher(String jfsName) {
    super(jfsName);
  }

  // url like "http://hadoop01:8000/v1/service/presto"
  @Override
  protected Set<String> parseNodes(String response) throws Exception {
    Set<String> result = new HashSet<>();
    JSONArray nodes = new JSONObject(response).getJSONArray("services");
    for (Object node : nodes) {
      JSONObject nodeProperties = ((JSONObject) node).getJSONObject("properties");
      if (nodeProperties.getString("coordinator").equals("false")) {
        String http = nodeProperties.getString("http");
        result.add(new URL(http).getHost());
      }
    }
    return result;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/RedefineClassAgent.java
================================================
/*
Copyright 2017 Turn Inc
All rights reserved.
The contents of this file are subject to the MIT License as provided
below. Alternatively, the contents of this file may be used under
the terms of Mozilla Public License Version 1.1,
the terms of the GNU Lesser General Public License Version 2.1 or later,
or the terms of the Apache License Version 2.0.
License:
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package io.juicefs.utils;


import com.sun.tools.attach.VirtualMachine;
import javassist.CannotCompileException;
import javassist.ClassPool;
import javassist.CtClass;
import javassist.NotFoundException;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.lang.instrument.ClassDefinition;
import java.lang.instrument.Instrumentation;
import java.lang.instrument.UnmodifiableClassException;
import java.lang.management.ManagementFactory;
import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.jar.Manifest;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Packages everything necessary to be able to redefine a class using {@link Instrumentation} as provided by
 * Java 1.6 or later. Class redefinition is the act of replacing a class' bytecode at runtime, after that class
 * has already been loaded.
 * <p>
 * The scheme employed by this class uses an agent (defined by this class) that, when loaded into the JVM, provides
 * an instance of {@link Instrumentation} which in turn provides a method to redefine classes.
 * <p>
 * Users of this class only need to call {@link #redefineClasses(ClassDefinition...)}. The agent stuff will be done
 * automatically (and lazily).
 * <p>
 * Note that classes cannot be arbitrarily redefined. The new version must retain the same schema; methods and fields
 * cannot be added or removed. In practice this means that method bodies can be changed.
 * <p>
 * Note that this is a replacement for javassist's {@code HotSwapper}. {@code HotSwapper} depends on the debug agent
 * to perform the hotswap. That agent is available since Java 1.3, but the JVM must be started with the agent enabled,
 * and the agent often fails to perform the swap if the machine is under heavy load. This class is both cleaner and more
 * reliable.
 *
 * @author Adam Lugowski
 * @see Instrumentation#redefineClasses(ClassDefinition...)
 */
public class RedefineClassAgent {
  /**
   * Use the Java logger to avoid any references to anything not supplied by the JVM. This avoids issues with
   * classpath when compiling/loading this class as an agent.
   */
  private static final Logger LOGGER = Logger.getLogger(RedefineClassAgent.class.getSimpleName());

  /**
   * Populated when this class is loaded into the JVM as an agent (via {@link #ensureAgentLoaded()}.
   */
  private static volatile Instrumentation instrumentation = null;

  /**
   * How long to wait for the agent to load before giving up and assuming the load failed.
   */
  private static final int AGENT_LOAD_WAIT_TIME_SEC = 3;

  /**
   * Agent entry point. Do not call this directly.
   * <p>
   * This method is called by the JVM when this class is loaded as an agent.
   * <p>
   * Sets {@link #instrumentation} to {@code inst}, provided {@code inst} supports class redefinition.
   *
   * @param agentArgs ignored.
   * @param inst      This is the reason this class exists. {@link Instrumentation} has the
   *                  {@link Instrumentation#redefineClasses(ClassDefinition...)} method.
   */
  public static void agentmain(String agentArgs, Instrumentation inst) {
    if (!inst.isRedefineClassesSupported()) {
      LOGGER.severe("Class redefinition not supported. Aborting.");
      return;
    }

    instrumentation = inst;
  }

  /**
   * Attempts to redefine class bytecode.
   * <p>
   * On first call this method will attempt to load an agent into the JVM to obtain an instance of
   * {@link Instrumentation}. This agent load can introduce a pause (in practice 1 to 2 seconds).
   *
   * @param definitions classes to redefine.
   * @throws UnmodifiableClassException as thrown by {@link Instrumentation#redefineClasses(ClassDefinition...)}
   * @throws ClassNotFoundException     as thrown by {@link Instrumentation#redefineClasses(ClassDefinition...)}
   * @throws FailedToLoadAgentException if agent either failed to load or if the agent wasn't able to get an
   *                                    instance of {@link Instrumentation} that allows class redefinitions.
   * @see Instrumentation#redefineClasses(ClassDefinition...)
   */
  public static void redefineClasses(ClassDefinition... definitions)
          throws UnmodifiableClassException, ClassNotFoundException, FailedToLoadAgentException {
    ensureAgentLoaded();
    instrumentation.redefineClasses(definitions);
  }

  /**
   * Lazy loads the agent that populates {@link #instrumentation}. OK to call multiple times.
   *
   * @throws FailedToLoadAgentException if agent either failed to load or if the agent wasn't able to get an
   *                                    instance of {@link Instrumentation} that allows class redefinitions.
   */
  private static void ensureAgentLoaded() throws FailedToLoadAgentException {
    if (instrumentation != null) {
      // already loaded
      return;
    }

    // load the agent
    try {
      File agentJar = createAgentJarFile();

      // Loading an agent requires the PID of the JVM to load the agent to. Find out our PID.
      String nameOfRunningVM = ManagementFactory.getRuntimeMXBean().getName();
      String pid = nameOfRunningVM.substring(0, nameOfRunningVM.indexOf('@'));

      // load the agent
      VirtualMachine vm = VirtualMachine.attach(pid);
      vm.loadAgent(agentJar.getAbsolutePath(), "");
      vm.detach();
    } catch (Exception e) {
      throw new FailedToLoadAgentException(e);
    }

    // wait for the agent to load
    for (int sec = 0; sec < AGENT_LOAD_WAIT_TIME_SEC; sec++) {
      if (instrumentation != null) {
        // success!
        return;
      }

      try {
        LOGGER.info("Sleeping for 1 second while waiting for agent to load.");
        Thread.sleep(1000);
      } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new FailedToLoadAgentException();
      }
    }

    // agent didn't load
    throw new FailedToLoadAgentException();
  }

  /**
   * An agent must be specified as a .jar where the manifest has an Agent-Class attribute. Additionally, in order
   * to be able to redefine classes, the Can-Redefine-Classes attribute must be true.
   * <p>
   * This method creates such an agent Jar as a temporary file. The Agent-Class is this class. If the returned Jar
   * is loaded as an agent then {@link #agentmain(String, Instrumentation)} will be called by the JVM.
   *
   * @return a temporary {@link File} that points at Jar that packages this class.
   * @throws IOException if agent Jar creation failed.
   */
  private static File createAgentJarFile() throws IOException {
    File jarFile = File.createTempFile("agent", ".jar");
    jarFile.deleteOnExit();

    // construct a manifest that allows class redefinition
    Manifest manifest = new Manifest();
    Attributes mainAttributes = manifest.getMainAttributes();
    mainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0");
    mainAttributes.put(new Attributes.Name("Agent-Class"), RedefineClassAgent.class.getName());
    mainAttributes.put(new Attributes.Name("Can-Retransform-Classes"), "true");
    mainAttributes.put(new Attributes.Name("Can-Redefine-Classes"), "true");

    try (JarOutputStream jos = new JarOutputStream(new FileOutputStream(jarFile), manifest)) {
      // add the agent .class into the .jar
      JarEntry agent = new JarEntry(RedefineClassAgent.class.getName().replace('.', '/') + ".class");
      jos.putNextEntry(agent);

      // dump the class bytecode into the entry
      ClassPool pool = ClassPool.getDefault();
      CtClass ctClass = pool.get(RedefineClassAgent.class.getName());
      jos.write(ctClass.toBytecode());
      jos.closeEntry();
    } catch (CannotCompileException | NotFoundException e) {
      // Realistically this should never happen.
      LOGGER.log(Level.SEVERE, "Exception while creating RedefineClassAgent jar.", e);
      throw new IOException(e);
    }

    return jarFile;
  }

  /**
   * Marks a failure to load the agent and get an instance of {@link Instrumentation} that is able to redefine
   * classes.
   */
  public static class FailedToLoadAgentException extends Exception {
    public FailedToLoadAgentException() {
      super();
    }

    public FailedToLoadAgentException(Throwable cause) {
      super(cause);
    }
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/ReflectionUtil.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.utils;

import java.lang.reflect.Constructor;
import java.lang.reflect.Field;

public class ReflectionUtil {
  public static boolean hasMethod(String className, String method, String[] params) {
    try {
      Class<?>[] classes = null;
      if (params != null) {
        classes = new Class[params.length];
        for (int i = 0; i < params.length; i++) {
          classes[i] = Class.forName(params[i], false, Thread.currentThread().getContextClassLoader());
        }
      }
      return hasMethod(className, method, classes);
    } catch (ClassNotFoundException e) {
      return false;
    }
  }

  public static boolean hasMethod(String className, String method, Class<?>[] params) {
    try {
      Class<?> clazz = Class.forName(className, false, Thread.currentThread().getContextClassLoader());
      clazz.getDeclaredMethod(method, params);
    } catch (ClassNotFoundException | NoSuchMethodException e) {
      return false;
    }
    return true;
  }

  public static <T> Constructor<T> getConstructor(Class<T> clazz, Class<?>... params) {
    try {
      return clazz.getConstructor(params);
    } catch (NoSuchMethodException e) {
      return null;
    }
  }

  public static Object getField(String className, String field, Object obj) throws ClassNotFoundException, NoSuchFieldException, IllegalAccessException {
    Class<?> clazz = Class.forName(className);
    Field f = clazz.getDeclaredField(field);
    f.setAccessible(true);
    return f.get(obj);
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/SparkNodesFetcher.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.json.JSONArray;
import org.json.JSONObject;

import java.util.HashSet;
import java.util.Set;

public class SparkNodesFetcher extends NodesFetcher {
  public SparkNodesFetcher(String jfsName) {
    super(jfsName);
  }

  // url like "http://host:8888/json/"
  @Override
  protected Set<String> parseNodes(String response) throws Exception {
    Set<String> result = new HashSet<>();
    JSONArray workers = new JSONObject(response).getJSONArray("workers");
    for (Object worker : workers) {
      if (((JSONObject) worker).getString("state").equals("ALIVE")) {
        result.add(((JSONObject) worker).getString("host"));
      }
    }
    return result;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/SparkThriftNodesFetcher.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONArray;
import org.json.JSONObject;

import java.util.HashSet;
import java.util.Set;

// "http://hadoop01:4040/api/v1/applications/";
public class SparkThriftNodesFetcher extends NodesFetcher {
  private static final Log LOG = LogFactory.getLog(SparkThriftNodesFetcher.class);

  public SparkThriftNodesFetcher(String jfsName) {
    super(jfsName);
  }

  @Override
  public Set<String> getNodes(String[] urls) {
    if (urls == null || urls.length == 0) {
      return null;
    }
    for (String url : urls) {
      try {
        JSONArray appArrays = new JSONArray(doGet(url));
        if (appArrays.length() > 0) {
          String id = appArrays.getJSONObject(0).getString("id");
          url = url.endsWith("/") ? url : url + "/";
          return parseNodes(doGet(url + id + "/allexecutors"));
        }
      } catch (Throwable e) {
        LOG.warn("fetch from spark thrift server failed!", e);
      }
    }
    return null;
  }

  @Override
  protected Set<String> parseNodes(String response) throws Exception {
    if (response == null) {
      return null;
    }
    Set<String> res = new HashSet<>();
    for (Object item : new JSONArray(response)) {
      JSONObject obj = (JSONObject) item;
      String id = obj.getString("id");
      boolean isActive = obj.getBoolean("isActive");
      String hostPort = obj.getString("hostPort");
      boolean isBlacklisted = obj.getBoolean("isBlacklisted");
      String[] hAp = hostPort.split(":");
      if (hAp.length > 0 && !"driver".equals(id) && isActive && !isBlacklisted) {
        res.add(hAp[0]);
      }
    }
    return res;
  }
}


================================================
FILE: sdk/java/src/main/java/io/juicefs/utils/YarnNodesFetcher.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.json.JSONArray;
import org.json.JSONObject;

import java.util.*;

public class YarnNodesFetcher extends NodesFetcher {
  private static final Log LOG = LogFactory.getLog(YarnNodesFetcher.class);

  public YarnNodesFetcher(String jfsName) {
    super(jfsName);
  }

  @Override
  public Set<String> getNodes(String[] urls) {
    if (urls == null || urls.length == 0) {
      return null;
    }
    List<String> yarnUrls = Arrays.asList(urls);
    for (String url : urls) {
      if ("yarn".equals(url.toLowerCase().trim())) {
        Configuration conf = new Configuration();
        Map<String, String> props = conf.getValByRegex("yarn\\.resourcemanager\\.webapp\\.address.*");
        if (props.size() == 0) {
          return null;
        }
        yarnUrls = new ArrayList<>();
        for (String v : props.values()) {
          yarnUrls.add("http://" + v + "/ws/v1/cluster/nodes/");
        }
        break;
      }
    }
    return super.getNodes(yarnUrls.toArray(new String[0]));
  }

  @Override
  protected Set<String> parseNodes(String response) {
    Set<String> result = new HashSet<>();
    JSONArray allNodes = new JSONObject(response).getJSONObject("nodes").getJSONArray("node");
    for (Object obj : allNodes) {
      if (obj instanceof JSONObject) {
        JSONObject node = (JSONObject) obj;
        String state = node.getString("state");
        String hostname = node.getString("nodeHostName");
        if ("RUNNING".equals(state)) {
          result.add(hostname);
        }
      }
    }
    return result;
  }
}


================================================
FILE: sdk/java/src/main/resources/META-INF/services/org.apache.flink.core.fs.FileSystemFactory
================================================
# JuiceFS, Copyright 2020 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

io.juicefs.FlinkFileSystemFactory

================================================
FILE: sdk/java/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier
================================================
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

io.juicefs.kerberos.JuiceFSDelegationTokenIdentifier


================================================
FILE: sdk/java/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer
================================================
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

io.juicefs.kerberos.JuiceFSTokenRenewer


================================================
FILE: sdk/java/src/main/resources/META-INF/services/org.kitesdk.data.spi.Loadable
================================================
# JuiceFS, Copyright 2021 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

io.juicefs.KiteDataLoader


================================================
FILE: sdk/java/src/test/java/io/juicefs/JuiceFileSystemBgTaskTest.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs;

import io.juicefs.utils.BgTaskUtil;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.URI;
import java.util.Map;
import java.util.concurrent.*;

import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_CHECKPOINT_INTERVAL_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;

public class JuiceFileSystemBgTaskTest extends TestCase {
  private static final Logger LOG = LoggerFactory.getLogger(JuiceFileSystemBgTaskTest.class);

  public void testJuiceFileSystemBgTask() throws Exception {
    FileSystem.closeAll();
    BgTaskUtil.reset();
    Configuration conf = new Configuration();
    conf.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    conf.set(FS_TRASH_INTERVAL_KEY, "6");
    conf.set(FS_TRASH_CHECKPOINT_INTERVAL_KEY, "2");
    conf.set("juicefs.users", "jfs://dev/users");
    conf.set("juicefs.groups", "jfs://dev/groups");
    conf.set("juicefs.discover-nodes-url", "jfs://dev/etc/nodes");
    int threads = 100;
    int instances = 1000;
    CountDownLatch latch = new CountDownLatch(instances);
    ExecutorService pool = Executors.newFixedThreadPool(threads);
    for (int i = 0; i < instances; i++) {
      pool.submit(() -> {
        try (JuiceFileSystem jfs = new JuiceFileSystem()) {
          jfs.initialize(URI.create("jfs://dev/"), conf);
          if (ThreadLocalRandom.current().nextInt(10) % 2 == 0) {
            jfs.getFileBlockLocations(jfs.getFileStatus(new Path("jfs://dev/users")), 0, 1000);
          }
        } catch (Exception e) {
          LOG.error("unexpected exception", e);
        } finally {
          latch.countDown();
        }
      });
    }
    latch.await();
    Map<String, ScheduledExecutorService> bgThreadForName = BgTaskUtil.getBgThreadForName();
    for (String s : bgThreadForName.keySet()) {
      System.out.println(s);
    }
    assertEquals(0, bgThreadForName.size());
    assertEquals(0, BgTaskUtil.getRunningInstance().size());
    pool.shutdown();
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/JuiceFileSystemTest.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs;

import com.google.common.collect.Lists;
import io.juicefs.utils.AclTransformation;
import junit.framework.TestCase;
import org.apache.commons.io.IOUtils;
import org.apache.flink.runtime.fs.hdfs.HadoopRecoverableWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.*;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Method;
import java.net.InetAddress;
import java.net.URI;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_CHECKPOINT_INTERVAL_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS;
import static org.apache.hadoop.fs.permission.AclEntryScope.DEFAULT;
import static org.apache.hadoop.fs.permission.AclEntryType.*;
import static org.apache.hadoop.fs.permission.FsAction.*;
import static org.junit.Assert.assertArrayEquals;

public class JuiceFileSystemTest extends TestCase {
  FsShell shell;
  FileSystem fs;
  Configuration cfg;

  public void setUp() throws Exception {
    cfg = new Configuration();
    cfg.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    cfg.set(FS_TRASH_INTERVAL_KEY, "6");
    cfg.set(FS_TRASH_CHECKPOINT_INTERVAL_KEY, "2");
    cfg.set("juicefs.access-log", "/tmp/jfs.access.log");
    cfg.set("juicefs.discover-nodes-url", "jfs:///etc/nodes");
    fs = FileSystem.newInstance(cfg);
    fs.delete(new Path("/hello"));
    FSDataOutputStream out = fs.create(new Path("/hello"), true);
    out.writeBytes("hello\n");
    out.close();

    cfg.setQuietMode(false);
    shell = new FsShell(cfg);
  }

  public void tearDown() throws Exception {
    fs.close();
    FileSystem.closeAll();
  }

  public void testFsStatus() throws IOException {
    FsStatus st = fs.getStatus();
    assertTrue("capacity", st.getCapacity() > 0);
    assertTrue("remaining", st.getRemaining() > 0);
  }

  public void testSummary() throws IOException {
    ContentSummary summary = fs.getContentSummary(new Path("/"));
    assertTrue("length", summary.getLength() > 0);
    assertTrue("fileCount", summary.getFileCount() > 0);
    summary = fs.getContentSummary(new Path("/hello"));
    assertEquals(6, summary.getLength());
    assertEquals(1, summary.getFileCount());
    assertEquals(0, summary.getDirectoryCount());
    assertEquals(-1L, summary.getQuota());
    assertEquals(-1L, summary.getSpaceQuota());
  }

  public void testLongName() throws IOException {
    Path p = new Path(
            "/longname/very_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_long_name");
    fs.mkdirs(p);
    FileStatus[] files = fs.listStatus(new Path("/longname"));
    if (files.length != 1) {
      throw new IOException("expected one file but got " + files.length);
    }
    if (!files[0].getPath().getName().equals(p.getName())) {
      throw new IOException("not equal");
    }
  }

  public void testLocation() throws IOException {
    FileStatus f = new FileStatus(3L << 30, false, 1, 128L << 20, 0, new Path("/hello"));
    BlockLocation[] locations = fs.getFileBlockLocations(f, 128L * 1024 * 1024 - 256, 5L * 64 * 1024 * 1024 - 512L);

    String[] names = locations[0].getNames();
    for (String name : names) {
      assertEquals(name.split(":").length, 2);
    }

    String[] storageIds = locations[0].getStorageIds();
    assertNotNull(storageIds);
    assertEquals(names.length, storageIds.length);

    assertEquals(InetAddress.getLocalHost().getHostName() + ":50010", names[0]);
  }

  public void testReadWrite() throws Exception {
    long l = fs.getFileStatus(new Path("/hello")).getLen();
    assertEquals(6, l);
    byte[] buf = new byte[(int) l];
    FSDataInputStream in = fs.open(new Path("/hello"));
    in.readFully(buf);
    in.close();
    assertEquals("hello\n", new String(buf));
    assertEquals(0, shell.run(new String[]{"-cat", "/hello"}));

    fs.setPermission(new Path("/hello"), new FsPermission((short) 0000));
    UserGroupInformation ugi =
            UserGroupInformation.createUserForTesting("nobody", new String[]{"nogroup"});
    FileSystem fs2 = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
      @Override
      public FileSystem run() throws Exception {
        return FileSystem.get(new URI("jfs://dev"), cfg);
      }
    });
    try {
      in = fs2.open(new Path("/hello"));
      assertEquals(in, null);
    } catch (IOException e) {
      fs.setPermission(new Path("/hello"), new FsPermission((short) 0644));
    }
  }

  public void testWrite() throws Exception {
    Path f = new Path("/testWriteFile");
    FSDataOutputStream fou = fs.create(f);
    byte[] b = "hello world".getBytes();
    OutputStream ou = ((JuiceFileSystemImpl.BufferedFSOutputStream)fou.getWrappedStream()).getOutputStream();
    ou.write(b, 6, 5);
    ou.close();
    FSDataInputStream in = fs.open(f);
    String str = IOUtils.toString(in);
    assertEquals("world", str);
    in.close();

    int fileLen = 1 << 20;
    byte[] contents = new byte[fileLen];
    Random random = new Random();
    random.nextBytes(contents);
    f = new Path("/tmp/writeFile");
    FSDataOutputStream out = fs.create(f);
    int off = 0;
    int len = 256<<10;
    out.write(contents, off, len);
    out.close();

    byte[] readBytes = new byte[len];
    in = fs.open(f);
    in.read(readBytes);
    assertArrayEquals(Arrays.copyOfRange(contents, off, off + len), readBytes);
    in.close();

    out = fs.create(f);
    off = 0;
    len = fileLen;
    for (int i = off; i < len; i++) {
      out.write(contents[i]);
    }
    out.hflush();
    readBytes = new byte[len];
    in = fs.open(f);
    in.read(readBytes);
    assertArrayEquals(Arrays.copyOfRange(contents, off, off + len), readBytes);
    out.close();
    in.close();
  }

  public void testReadSkip() throws Exception {
    Path p = new Path("/test_readskip");
    fs.create(p).close();
    String content = "12345";
    writeFile(fs, p, content);
    FSDataInputStream in = fs.open(p);
    long skip = in.skip(2);
    assertEquals(2, skip);

    byte[] bytes = new byte[content.length() - (int)skip];
    in.readFully(bytes);
    assertEquals("345", new String(bytes));
  }

  public void testReadAfterClose() throws Exception {
    byte[] buf = new byte[6];
    FSDataInputStream in = fs.open(new Path("/hello"));
    in.close();
    try {
      in.read(0, buf, 0, 5);
    } catch (IOException e) {
      if (!e.getMessage().contains("closed")) {
        throw new IOException("message should be closed, but got " + e.getMessage());
      }
    }
    FSDataInputStream in2 = fs.open(new Path("/hello"));
    in.close();  // repeated close should not close other's fd
    in2.read(0, buf, 0, 5);
    in2.close();
  }

  public void testMkdirs() throws Exception {
    assertTrue(fs.mkdirs(new Path("/mkdirs")));
    assertTrue(fs.mkdirs(new Path("/mkdirs/dir")));
    assertTrue(fs.delete(new Path("/mkdirs"), true));
    assertTrue(fs.mkdirs(new Path("/mkdirs/test")));
    for (int i = 0; i < 50; i++) {
      fs.mkdirs(new Path("/mkdirs/d" + i));
    }
    assertEquals(51, fs.listStatus(new Path("/mkdirs/")).length);
    assertTrue(fs.delete(new Path("/mkdirs"), true));
    assertTrue(fs.mkdirs(new Path("parent/dir")));
    assertTrue(fs.exists(new Path(fs.getHomeDirectory(), "parent")));
  }

  public void testCreateWithoutPermission() throws Exception {
    assertTrue(fs.mkdirs(new Path("/noperm")));
    fs.setPermission(new Path("/noperm"), new FsPermission((short) 0555));
    UserGroupInformation ugi =
            UserGroupInformation.createUserForTesting("nobody", new String[]{"nogroup"});
    FileSystem fs2 = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
      @Override
      public FileSystem run() throws Exception {
        return FileSystem.get(new URI("jfs://dev"), cfg);
      }
    });
    try {
      fs2.create(new Path("/noperm/a/file"));
      throw new Exception("create should fail");
    } catch (IOException e) {
    }
  }

  public void testCreateNonRecursive() throws Exception {
    Path p = new Path("/NOT_EXIST_DIR");
    p = new Path(p, "file");
    try (FSDataOutputStream ou = fs.createNonRecursive(p, false, 1 << 20, (short) 1, 128 << 20, null);) {
      fail("createNonRecursive in a not exit dir should fail");
    } catch (IOException ignored) {
    }
  }

  public void testTruncate() throws Exception {
    Path p = new Path("/test_truncate");
    fs.create(p).close();
    fs.truncate(p, 1 << 20);
    assertEquals(1 << 20, fs.getFileStatus(p).getLen());
    fs.truncate(p, 1 << 10);
    assertEquals(1 << 10, fs.getFileStatus(p).getLen());
  }

  public void testAccess() throws Exception {
    Path p1 = new Path("/test_access");
    FileSystem newFs = createNewFs(cfg, "user1", new String[]{"group1"});
    newFs.create(p1).close();
    newFs.setPermission(p1, new FsPermission((short) 0444));
    newFs.access(p1, FsAction.READ);
    try {
      newFs.access(p1, FsAction.WRITE);
      fail("The access call should have failed.");
    } catch (AccessControlException e) {
    }

    Path badPath = new Path("/bad/bad");
    try {
      newFs.access(badPath, FsAction.READ);
      fail("The access call should have failed");
    } catch (FileNotFoundException e) {
    }
    newFs.close();
  }

  public void testSetPermission() throws Exception {
    assertEquals(0, shell.run(new String[]{"-chmod", "0777", "/hello"}));
    assertEquals(0777, fs.getFileStatus(new Path("/hello")).getPermission().toShort());
    assertEquals(0, shell.run(new String[]{"-chmod", "0666", "/hello"}));
    assertEquals(0666, fs.getFileStatus(new Path("/hello")).getPermission().toShort());
  }

  public void testSetTimes() throws Exception {
    fs.setTimes(new Path("/hello"), 1000, 2000);
    assertEquals(1000, fs.getFileStatus(new Path("/hello")).getModificationTime());
    // assertEquals(2000, fs.getFileStatus(new Path("/hello")).getAccessTime());

    Path p = new Path("/test-mtime");
    fs.delete(p, true);
    FSDataOutputStream out = fs.create(p);
    Thread.sleep(1000);
    long mtime1 = fs.getFileStatus(p).getModificationTime();
    out.writeBytes("hello\n");
    out.close();
    long mtime2 = fs.getFileStatus(p).getModificationTime();
    if (mtime2 - mtime1 < 1000) {
      throw new IOException("stale mtime");
    }
    Thread.sleep(1000);
    long mtime3 = fs.getFileStatus(p).getModificationTime();
    if (mtime3 != mtime2) {
      throw new IOException("mtime was updated");
    }
  }

  public void testSetOwner() throws Exception {
    fs.create(new Path("/hello"));
    FileStatus parent = fs.getFileStatus(new Path("/"));
    FileStatus st = fs.getFileStatus(new Path("/hello"));
    if (!parent.getGroup().equals(st.getGroup())) {
      throw new Exception(
              "group of new created file should be " + parent.getGroup() + ", but got " + st.getGroup());
    }
    return; // only root can change the owner/group to others
    // fs.setOwner(new Path("/hello"), null, "nogroup");
    // assertEquals("nogroup", fs.getFileStatus(new Path("/hello")).getGroup());
  }

  public void testCloseFileSystem() throws Exception {
    Configuration conf = new Configuration();
    conf.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    for (int i = 0; i < 5; i++) {
      FileSystem fs = FileSystem.get(conf);
      fs.getFileStatus(new Path("/hello"));
      fs.close();
    }
  }

  public void testReadahead() throws Exception {
    FSDataOutputStream out = fs.create(new Path("/hello"), true);
    for (int i = 0; i < 1000000; i++) {
      out.writeBytes("hello\n");
    }
    out.close();

    // simulate reading a parquet file
    int size = 1000000 * 6;
    byte[] buf = new byte[128000];
    FSDataInputStream in = fs.open(new Path("/hello"));
    in.read(size - 8, buf, 0, 8);
    in.read(size - 5000, buf, 0, 3000);
    in.close();
    in = fs.open(new Path("/hello"));
    in.read(size - 8, buf, 0, 8);
    in.read(size - 5000, buf, 0, 3000);
    in.close();
    in = fs.open(new Path("/hello"));
    in.read(2000000, buf, 0, 128000);
    in.close();
  }

  public void testOutputStream() throws Exception {
    FSDataOutputStream out = fs.create(new Path("/haha"));
    if (!(out instanceof Syncable)) {
      throw new RuntimeException("FSDataOutputStream should be syncable");
    }
    if (!(out.getWrappedStream() instanceof Syncable)) {
      throw new RuntimeException("BufferedOutputStream should be syncable");
    }
    out.hflush();
    out.hsync();
  }

  public void testInputStream() throws Exception {
    FSDataInputStream in = fs.open(new Path("/hello"));
    if (!(in instanceof ByteBufferReadable)) {
      throw new RuntimeException("Inputstream should be bytebufferreadable");
    }
    if (!(in.getWrappedStream() instanceof ByteBufferReadable)) {
      throw new RuntimeException("Inputstream should not be bytebufferreadable");
    }

    FSDataOutputStream out = fs.create(new Path("/hello"), true);
    for (int i = 0; i < 1000000; i++) {
      out.writeBytes("hello\n");
    }
    out.close();

    in = fs.open(new Path("/hello"));
    ByteBuffer buf = ByteBuffer.allocateDirect(6 * 1000000);
    buf.put((byte) in.read());
    while (buf.hasRemaining()) {
      int readCount = in.read(buf);
      if (readCount == -1) {
        // this is probably a bug in the ParquetReader. We shouldn't have called
        // readFully with a buffer
        // that has more remaining than the amount of data in the stream.
        throw new IOException("Reached the end of stream. Still have: " + buf.remaining() + " bytes left");
      }
    }

    Path directReadFile = new Path("/direct_file");
    FSDataOutputStream ou = fs.create(directReadFile);
    ou.write("hello world".getBytes());
    ou.close();
    FSDataInputStream dto = fs.open(directReadFile);
    ByteBuffer directBuf = ByteBuffer.allocateDirect(11);
    directBuf.put("hello ".getBytes());
    dto.seek(6);
    dto.read(directBuf);
    byte[] rest = new byte[11];
    directBuf.flip();
    directBuf.get(rest, 0, rest.length);
    assertEquals("hello world", new String(rest));

    /*
     * FSDataOutputStream out = fs.create(new Path("/bigfile"), true); byte[] arr =
     * new byte[1<<20]; for (int i=0; i<1024; i++) { out.write(arr); } out.close();
     *
     * long start = System.currentTimeMillis(); in = fs.open(new Path("/bigfile"));
     * ByteBuffer buf = ByteBuffer.allocateDirect(1<<20); long total=0; while (true)
     * { int n = in.read(buf); total += n; if (n < buf.capacity()) { break; } } long
     * used = System.currentTimeMillis() - start;
     * System.out.printf("ByteBuffer read %d throughput %f MB/s\n", total,
     * total/1024.0/1024.0/used*1000);
     *
     * start = System.currentTimeMillis(); in = fs.open(new Path("/bigfile"));
     * total=0; while (true) { int n = in.read(buf); total += n; if (n <
     * buf.capacity()) { break; } } used = System.currentTimeMillis() - start;
     * System.out.printf("ByteBuffer read %d throughput %f MB/s\n", total,
     * total/1024.0/1024.0/used*1000);
     *
     * start = System.currentTimeMillis(); in = fs.open(new Path("/bigfile"));
     * total=0; while (true) { int n = in.read(arr); total += n; if (n <
     * buf.capacity()) { break; } } used = System.currentTimeMillis() - start;
     * System.out.printf("Array read %d throughput %f MB/s\n", total,
     * total/1024.0/1024.0/used*1000);
     */
  }

  public void testInputStreamSkipNBytes() throws Exception {
    Path f = new Path("/test-skipnbytes");
    try (FSDataOutputStream out = fs.create(f)) {
      out.writeBytes("hello juicefs");
    }
    Class<JuiceFileSystemImpl.FileInputStream> inputStreamClass = JuiceFileSystemImpl.FileInputStream.class;
    Method skipNBytes = inputStreamClass.getMethod("skipNBytes", long.class);
    try (FSDataInputStream in = fs.open(f)) {
      skipNBytes.invoke(in.getWrappedStream(), 6);
      String s = IOUtils.toString(in);
      assertEquals("juicefs", s);
    }
  }

  public void testReadStats() throws IOException {
    FileSystem.Statistics statistics = FileSystem.getStatistics(fs.getScheme(),
            ((FilterFileSystem) fs).getRawFileSystem().getClass());
    statistics.reset();
    Path path = new Path("/hello");
    FSDataOutputStream out = fs.create(path, true);
    for (int i = 0; i < 1 << 20; i++) {
      out.writeBytes("hello\n");
    }
    out.close();
    FSDataInputStream in = fs.open(path);

    int readSize = 512 << 10;

    ByteBuffer buf = ByteBuffer.allocateDirect(readSize);
    while (buf.hasRemaining()) {
      in.read(buf);
    }
    assertEquals(readSize, statistics.getBytesRead());

    in.seek(0);
    buf = ByteBuffer.allocate(readSize);
    while (buf.hasRemaining()) {
      in.read(buf);
    }
    assertEquals(readSize * 2, statistics.getBytesRead());

    in.read(0, new byte[3000], 0, 3000);
    assertEquals(readSize * 2 + 3000, statistics.getBytesRead());

    in.read(3000, new byte[6000], 0, 3000);
    assertEquals(readSize * 2 + 3000 + 3000, statistics.getBytesRead());

    in.read(new byte[3000], 0, 3000);
    assertEquals(readSize * 2 + 3000 + 3000 + 3000, statistics.getBytesRead());

    in.close();
  }

  public void testChecksum() throws IOException {
    Path f = new Path("/empty");
    FSDataOutputStream out = fs.create(f, true);
    out.close();
    FileChecksum sum = fs.getFileChecksum(f);
    assertEquals(new MD5MD5CRC32GzipFileChecksum(0, 0, new MD5Hash("70bc8f4b72a86921468bf8e8441dce51")), sum);

    f = new Path("/small");
    out = fs.create(f, true);
    out.writeBytes("world\n");
    out.close();
    sum = fs.getFileChecksum(f);
    assertEquals(new MD5MD5CRC32CastagnoliFileChecksum(512, 0, new MD5Hash("a74dcf6d5ba98e50ae0182c9d5d886fe")),
            sum);
    sum = fs.getFileChecksum(f, 5);
    assertEquals(new MD5MD5CRC32CastagnoliFileChecksum(512, 0, new MD5Hash("05a157db1cc7549c82ec6f31f63fdb46")),
            sum);

    f = new Path("/medium");
    out = fs.create(f, true);
    byte[] bytes = new byte[(128 << 20) - 1];
    out.write(bytes);
    out.close();
    sum = fs.getFileChecksum(f);
    assertEquals(
            new MD5MD5CRC32CastagnoliFileChecksum(512, 0, new MD5Hash("1cf326bae8274fd824ec69ece3e4082f")),
            sum);

    f = new Path("/big");
    out = fs.create(f, true);
    byte[] zeros = new byte[1024 * 1000];
    for (int i = 0; i < 150; i++) {
      out.write(zeros);
    }
    out.close();
    sum = fs.getFileChecksum(f);
    assertEquals(
            new MD5MD5CRC32CastagnoliFileChecksum(512, 262144, new MD5Hash("7d04ac8132ad64988f7ba4d819cbde62")),
            sum);
  }

  public void testXattr() throws IOException {
    Path p = new Path("/test-xattr");
    fs.delete(p, true);
    fs.create(p);
    assertEquals(null, fs.getXAttr(p, "x1"));
    fs.setXAttr(p, "x1", new byte[1]);
    fs.setXAttr(p, "x2", new byte[2]);
    List<String> names = fs.listXAttrs(p);
    assertEquals(2, names.size());
    Map<String, byte[]> values = fs.getXAttrs(p);
    assertEquals(2, values.size());
    assertEquals(1, values.get("x1").length);
    assertEquals(2, values.get("x2").length);
    fs.removeXAttr(p, "x2");
    names = fs.listXAttrs(p);
    assertEquals(1, names.size());
    assertEquals("x1", names.get(0));

    // stress
    for (int i = 0; i < 100; i++) {
      fs.setXAttr(p, "test" + i, new byte[4096]);
    }
    values = fs.getXAttrs(p);
    assertEquals(101, values.size());
    // xattr should be remove together with file
    fs.delete(p);
    fs.create(p);
    names = fs.listXAttrs(p);
    assertEquals(0, names.size());
  }

  public void testAppend() throws Exception {
    Path f = new Path("/tmp/testappend");
    fs.delete(f);
    FSDataOutputStream out = fs.create(f);
    out.write("hello".getBytes());
    out.close();
    FSDataOutputStream append = fs.append(f);
    assertEquals(5, append.getPos());
  }

  public void testFlinkHadoopRecoverableWriter() throws Exception {
    new HadoopRecoverableWriter(fs);
  }

  public void testConcat() throws Exception {
    Path trg = new Path("/tmp/concat");
    Path src1 = new Path("/tmp/concat1");
    Path src2 = new Path("/tmp/concat2");
    FSDataOutputStream ou = fs.create(trg);
    ou.write("hello".getBytes());
    ou.close();
    FSDataOutputStream sou1 = fs.create(src1);
    sou1.write("hello".getBytes());
    sou1.close();
    FSDataOutputStream sou2 = fs.create(src2);
    sou2.write("hello".getBytes());
    sou2.close();
    fs.concat(trg, new Path[]{src1, src2});
    FSDataInputStream in = fs.open(trg);
    assertEquals("hellohellohello", IOUtils.toString(in));
    in.close();
    // src should be deleted after concat
    assertFalse(fs.exists(src1));
    assertFalse(fs.exists(src2));

    Path emptyFile = new Path("/tmp/concat_empty_file");
    Path src = new Path("/tmp/concat_empty_file_src");
    FSDataOutputStream srcOu = fs.create(src);
    srcOu.write("hello".getBytes());
    srcOu.close();
    fs.create(emptyFile).close();
    fs.concat(emptyFile, new Path[]{src});
    in = fs.open(emptyFile);
    assertEquals("hello", IOUtils.toString(in));
    in.close();
  }

  public void testList() throws Exception {
    Path p = new Path("/listsort");
    String[] org = new String[]{
            "/listsort/p4",
            "/listsort/p2",
            "/listsort/p1",
            "/listsort/p3"
    };
    fs.mkdirs(p);
    for (String path : org) {
      fs.mkdirs(new Path(path));
    }
    FileStatus[] fss = fs.listStatus(p);
    String[] res = new String[fss.length];
    for (int i = 0; i < fss.length; i++) {
      res[i] = fss[i].getPath().toUri().getPath();
    }
    Arrays.sort(org);
    assertArrayEquals(org, res);
  }

  private void writeFile(FileSystem fs, Path p, String content) throws IOException {
    FSDataOutputStream ou = fs.create(p);
    ou.write(content.getBytes());
    ou.close();
  }

  public FileSystem createNewFs(Configuration conf, String user, String[] group) throws IOException, InterruptedException {
    if (user != null && group != null) {
      UserGroupInformation root = UserGroupInformation.createUserForTesting(user, group);
      return root.doAs((PrivilegedExceptionAction<FileSystem>) () -> FileSystem.newInstance(FileSystem.getDefaultUri(conf), conf));
    }
    return FileSystem.newInstance(FileSystem.getDefaultUri(conf), conf);
  }

  public void testUsersAndGroups() throws Exception {
    Path users1 = new Path("/tmp/users1");
    Path groups1 = new Path("/tmp/groups1");
    Path users2 = new Path("/tmp/users2");
    Path groups2 = new Path("/tmp/groups2");

    writeFile(fs, users1, "user1:2001\n");
    writeFile(fs, groups1, "group1:3001:user1\n");
    writeFile(fs, users2, "user2:2001\n");
    writeFile(fs, groups2, "group2:3001:user2\n");
    fs.close();

    Configuration conf = new Configuration(cfg);
    conf.set("juicefs.users", users1.toUri().getPath());
    conf.set("juicefs.groups", groups1.toUri().getPath());
    conf.set("juicefs.superuser", UserGroupInformation.getCurrentUser().getShortUserName());

    FileSystem newFs = createNewFs(conf, null, null);
    Path p = new Path("/test_user_group_file");
    newFs.create(p).close();
    newFs.setOwner(p, "user1", "group1");
    newFs.close();

    conf.set("juicefs.users", users2.toUri().getPath());
    conf.set("juicefs.groups", groups2.toUri().getPath());
    newFs = createNewFs(conf, null, null);
    FileStatus fileStatus = newFs.getFileStatus(p);
    assertEquals("user2", fileStatus.getOwner());
    assertEquals("group2", fileStatus.getGroup());
    newFs.close();
  }

  public void testGroupPerm() throws Exception {
    Path testPath = new Path("/test_group_perm");

    Configuration conf = new Configuration(cfg);
    conf.set("juicefs.supergroup", "hadoop");
    conf.set("juicefs.superuser", "hadoop");
    FileSystem uer1Fs = createNewFs(conf, "user1", new String[]{"hadoop"});
    uer1Fs.delete(testPath, true);
    uer1Fs.mkdirs(testPath);
    uer1Fs.setPermission(testPath, FsPermission.createImmutable((short) 0775));
    uer1Fs.close();

    FileSystem uer2Fs = createNewFs(conf, "user2", new String[]{"hadoop"});
    Path f = new Path(testPath, "test_file");
    uer2Fs.create(f).close();
    FileStatus fileStatus = uer2Fs.getFileStatus(f);
    assertEquals("user2", fileStatus.getOwner());
    uer2Fs.close();
  }

  public void testUmask() throws Exception {
    Configuration conf = new Configuration(cfg);
    conf.set("juicefs.umask", "077");
    UserGroupInformation currentUser = UserGroupInformation.getCurrentUser();
    FileSystem newFs = createNewFs(conf, currentUser.getShortUserName(), currentUser.getGroupNames());
    newFs.delete(new Path("/test_umask"), true);
    newFs.mkdirs(new Path("/test_umask/dir"));
    newFs.create(new Path("/test_umask/dir/f")).close();
    assertEquals(FsPermission.createImmutable((short) 0700), newFs.getFileStatus(new Path("/test_umask")).getPermission());
    assertEquals(FsPermission.createImmutable((short) 0700), newFs.getFileStatus(new Path("/test_umask/dir")).getPermission());
    assertEquals(FsPermission.createImmutable((short) 0600), newFs.getFileStatus(new Path("/test_umask/dir/f")).getPermission());
    newFs.close();

    conf.set("juicefs.umask", "000");
    newFs = createNewFs(conf, currentUser.getShortUserName(), currentUser.getGroupNames());
    newFs.delete(new Path("/test_umask"), true);
    newFs.mkdirs(new Path("/test_umask/dir"));
    newFs.create(new Path("/test_umask/dir/f")).close();
    assertEquals(FsPermission.createImmutable((short) 0777), newFs.getFileStatus(new Path("/test_umask")).getPermission());
    assertEquals(FsPermission.createImmutable((short) 0777), newFs.getFileStatus(new Path("/test_umask/dir")).getPermission());
    assertEquals(FsPermission.createImmutable((short) 0666), newFs.getFileStatus(new Path("/test_umask/dir/f")).getPermission());

    conf.set("juicefs.umask", "022");
    conf.set("fs.permissions.umask-mode", "077");
    Path p = new Path("/test_umask/u_parent/f");
    newFs = createNewFs(conf, currentUser.getShortUserName(), currentUser.getGroupNames());
    newFs.delete(p.getParent());
    FSDataOutputStream out = newFs.create(p, true);
    out.close();
    assertEquals(FsPermission.createImmutable((short) 0755), fs.getFileStatus(p.getParent()).getPermission());
    assertEquals(FsPermission.createImmutable((short) 0644), fs.getFileStatus(p).getPermission());

    newFs.close();
  }

  public void testGuidMapping() throws Exception {
    Configuration newConf = new Configuration(cfg);

    FSDataOutputStream ou = fs.create(new Path("/etc/users"));
    ou.write("foo:10000\n".getBytes());
    ou.close();
    newConf.set("juicefs.users", "/etc/users");

    FileSystem fooFs = createNewFs(newConf, "foo", new String[]{"nogrp"});
    Path f = new Path("/test_foo");
    fooFs.create(f).close();
    assertEquals("foo", fooFs.getFileStatus(f).getOwner());
    fooFs.close();

    ou = fs.create(new Path("/etc/users"));
    ou.write("foo:10001\n".getBytes());
    ou.close();
    fs.close();

    FileSystem newFS = FileSystem.newInstance(newConf);
    assertEquals("10000", newFS.getFileStatus(f).getOwner());
    newFS.delete(f, false);
    newFS.close();
  }

  public void testGuidMappingFromString() throws Exception {
    fs.close();
    Configuration newConf = new Configuration(cfg);

    newConf.set("juicefs.users", "bar:10000;foo:20000;baz:30000");
    newConf.set("juicefs.groups", "user:10000:foo,bar;admin:2000:baz");
    newConf.set("juicefs.superuser", UserGroupInformation.getCurrentUser().getShortUserName());

    FileSystem fooFs = createNewFs(newConf, "foo", new String[]{"nogrp"});
    Path f = new Path("/test_foo");
    fooFs.create(f).close();
    fooFs.setOwner(f, "foo", "user");
    assertEquals("foo", fooFs.getFileStatus(f).getOwner());
    assertEquals("user", fooFs.getFileStatus(f).getGroup());
    fooFs.close();

    newConf.set("juicefs.users", "foo:20001");
    newConf.set("juicefs.groups", "user:1001:foo,bar;admin:2001:baz");
    FileSystem newFS = FileSystem.newInstance(newConf);
    assertEquals("20000", newFS.getFileStatus(f).getOwner());
    assertEquals("10000", newFS.getFileStatus(f).getGroup());

    newFS.delete(f, false);
    newFS.close();
  }

  public void testTrash() throws Exception {
    Trash trash = new Trash(fs, cfg);
    Path trashFile = new Path("/tmp/trashfile");
    trash.expungeImmediately();
    fs.create(trashFile).close();
    Trash.moveToAppropriateTrash(fs, trashFile, cfg);
    trash.checkpoint();
    fs.create(trashFile).close();
    Trash.moveToAppropriateTrash(fs, trashFile, cfg);
    assertEquals(2, fs.listStatus(fs.getTrashRoot(trashFile)).length);
    trash.expungeImmediately();
    assertEquals(0, fs.listStatus(fs.getTrashRoot(trashFile)).length);
  }

  public void testBlockSize() throws Exception {
    Configuration newConf = new Configuration(cfg);
    newConf.set("dfs.blocksize", "256m");
    FileSystem newFs = FileSystem.newInstance(newConf);
    assertEquals(256 << 20, newFs.getDefaultBlockSize(new Path("/")));
  }

  public void testReadSpeed() throws Exception {
    int read = (128 << 10) ;
    Path speedFile = new Path("/tmp/speedFile");
    fs.delete(speedFile, false);
    FSDataOutputStream ou = fs.create(speedFile);
    int fileSize = 128 << 20;
    ou.write(new byte[fileSize]);
    ou.close();
    FSDataInputStream open = fs.open(speedFile);
    AtomicLong counter = new AtomicLong(0L);
    AtomicBoolean finished = new AtomicBoolean(false);
    TimerTask timerTask = new TimerTask() {
      @Override
      public void run() {
        System.out.printf("read method calls: %d\n", counter.get());
        finished.set(true);
      }
    };
    Timer timer = new Timer();
    timer.schedule(timerTask, 1000);

    ByteBuffer readArray = ByteBuffer.allocateDirect(read);
    while (!finished.get()) {
      open.seek(0);
      readArray.position(0);
      readArray.limit(read);
      open.read(readArray);
      counter.getAndIncrement();
    }
  }

  private void createFileWithContents(FileSystem fs, Path f, byte[] contents) throws IOException {
    try (FSDataOutputStream out = fs.create(f)) {
      if (contents != null) {
        out.write(contents);
      }
    }
  }

  public void testIOClosed() throws Exception {
    Path f = new Path("/tmp/closedFile");
    FSDataOutputStream ou = fs.create(f);
    ou.close();
    try {
      ou.write(new byte[1]);
      fail("should not work when write to a closed stream");
    } catch (IOException ignored) {
    }
    FSDataInputStream in = fs.open(f);
    in.close();
    try {
      in.read(new byte[1]);
      fail("should not work when read a closed stream");
    } catch (IOException ignored) {
    }

    ou = fs.create(f);
    ou.close();
    ou.close();
  }

  public void testRead() throws Exception {
    Path f = new Path("/tmp/posFile");
    int fileLen = 1 << 20;
    byte[] contents = new byte[fileLen];
    Random random = new Random();
    random.nextBytes(contents);
    createFileWithContents(fs, f, contents);
    FSDataInputStream in = fs.open(f);

    byte[] readBytes = new byte[fileLen];
    int got = in.read(readBytes);
    assertFalse(in.markSupported());
    assertEquals(fileLen, got);
    assertEquals(fileLen, in.getPos());
    assertArrayEquals(Arrays.copyOfRange(contents, 0, fileLen), readBytes);
    in.close();

    in = fs.open(f);
    int b = 0;
    int count = 0;
    while ((b = in.read()) != -1) {
      assertEquals(contents[count]&0xFF, b);
      count++;
    }
    assertEquals(fileLen, count);
    in.close();

    int readSize = 100;
    in = fs.open(f);
    got = in.read(new byte[readSize]);
    assertEquals(readSize, got);
    assertEquals(readSize, in.getPos());
    assertEquals(fileLen - readSize, in.available());
    in.close();

    in = fs.open(f);
    readBytes = new byte[128<<10];
    int off = 100;
    int len = 100;
    int read = in.read(readBytes, off, len);
    assertEquals(len, read);
    assertArrayEquals(Arrays.copyOfRange(contents, 0, len), Arrays.copyOfRange(readBytes, off, off + len));
    in.close();

    try {
      in = fs.open(f);
      in.read(readBytes, off, readBytes.length - off + 1);
      fail("IndexOutOfBoundsException");
    } catch (IndexOutOfBoundsException ignored) {
    } finally {
      in.close();
    }

    in = fs.open(f);
    in.seek(fileLen - 100);
    long skip = in.skip(100);
    assertEquals(100, skip);

    in.seek(fileLen - 100);
    skip = in.skip(fileLen - 100 + 1);
    assertEquals(100, skip);
    in.close();
  }

  public void testInnerSymlink() throws Exception {
    //echo "hello juicefs" > inner_sym_link
    FileStatus status = fs.getFileStatus(new Path("/inner_sym_link"));
    assertEquals("inner_sym_link", status.getPath().getName());
    assertEquals(14, status.getLen());
  }

  public void testUserWithMultiGroups() throws Exception {
    Path users = new Path("/etc/users");
    Path groups = new Path("/etc/groups_multi");

    writeFile(fs, users, "tom:2001\n");
    writeFile(fs, groups, "groupa:3001:tom\ngroupb:3002:tom");
    fs.close();

    Configuration conf = new Configuration(cfg);
    conf.set("juicefs.users", users.toUri().getPath());
    conf.set("juicefs.groups", groups.toUri().getPath());
    conf.set("juicefs.debug", "true");

    FileSystem superFs = createNewFs(conf, "hdfs", new String[]{"hadoop"});
    Path testDir = new Path("/test_multi_group/d1");
    superFs.mkdirs(testDir);
    superFs.setOwner(testDir.getParent(), "hdfs", "groupb");
    superFs.setOwner(testDir, "hdfs", "groupb");
    superFs.setPermission(testDir.getParent(), FsPermission.createImmutable((short) 0770));
    superFs.setPermission(testDir, FsPermission.createImmutable((short) 0770));

    FileSystem tomFs = createNewFs(conf, "tom", new String[]{"randgroup"});
    tomFs.listStatus(testDir);

    superFs.delete(testDir.getParent(), true);
    tomFs.close();
    superFs.close();
  }

  public void testConcurrentCreate() throws Exception {
    int threads = 100;
    ExecutorService pool = Executors.newFixedThreadPool(threads);
    for (int i = 0; i < threads; i++) {
      pool.submit(() -> {
        JuiceFileSystem jfs = new JuiceFileSystem();
        try {
          jfs.initialize(URI.create("jfs://dev/"), cfg);
          jfs.listStatus(new Path("/"));
          jfs.close();
        } catch (IOException e) {
          fail("concurrent create failed");
          System.exit(1);
        }
      });
    }
    pool.shutdown();
    pool.awaitTermination(1, TimeUnit.MINUTES);
  }

  private boolean tryAccess(Path path, String user, String[] group, FsAction action) throws Exception {
    UserGroupInformation testUser = UserGroupInformation.createUserForTesting(user, group);
    FileSystem fs = testUser.doAs((PrivilegedExceptionAction<FileSystem>) () -> {
      Configuration conf = new Configuration();
      conf.set("juicefs.grouping", "");
      return FileSystem.get(conf);
    });

    boolean canAccess;
    try {
      fs.access(path, action);
      canAccess = true;
    } catch (AccessControlException e) {
      canAccess = false;
    }
    return canAccess;
  }
  static AclEntry aclEntry(AclEntryScope scope, AclEntryType type, FsAction permission) {
    return new AclEntry.Builder().setScope(scope).setType(type).setPermission(permission).build();
  }

  static AclEntry aclEntry(AclEntryScope scope, AclEntryType type, String name, FsAction permission) {
    return new AclEntry.Builder().setScope(scope).setType(type).setName(name).setPermission(permission).build();
  }

  public void testAcl() throws Exception {
    List<AclEntry> acls = Lists.newArrayList(
        aclEntry(DEFAULT, USER, "foo", ALL)
    );
    Path p = new Path("/testacldir");
    fs.delete(p, true);
    fs.mkdirs(p);
    fs.setAcl(p, acls);
    Path childFile = new Path(p, "file");
    fs.create(childFile).close();
    assertTrue(tryAccess(childFile, "foo", new String[]{"nogrp"}, WRITE));
    assertFalse(tryAccess(childFile, "wrong", new String[]{"nogrp"}, WRITE));
    assertEquals(fs.getFileStatus(childFile).getPermission().getGroupAction(), READ_WRITE);

    Path childDir = new Path(p, "dir");
    fs.mkdirs(childDir);
    assertEquals(fs.getFileStatus(childDir).getPermission().getGroupAction(), ALL);
  }

  public void testAclException() throws Exception {
    List<AclEntry> acls = Lists.newArrayList(
        aclEntry(ACCESS, USER, "foo", ALL)
    );
    Path p = new Path("/test_acl_exception");
    fs.delete(p, true);
    fs.mkdirs(p);
    try {
      fs.setAcl(p, acls);
      fail("Invalid ACL: the user, group and other entries are required.");
    } catch (AclTransformation.AclException ignored) {
    }
  }

  public void testDefaultAclExistingDirFile() throws Exception {
    Path parent = new Path("/testDefaultAclExistingDirFile");
    fs.delete(parent, true);
    fs.mkdirs(parent);
    // the old acls
    List<AclEntry> acls1 = Lists.newArrayList(aclEntry(DEFAULT, USER, "foo", ALL));
    // the new acls
    List<AclEntry> acls2 = Lists.newArrayList(aclEntry(DEFAULT, USER, "foo", READ_EXECUTE));
    // set parent to old acl
    fs.setAcl(parent, acls1);

    Path childDir = new Path(parent, "childDir");
    fs.mkdirs(childDir);
    // the sub directory should also have the old acl
    AclEntry[] childDirExpectedAcl = new AclEntry[] { aclEntry(ACCESS, USER, "foo", ALL),
        aclEntry(ACCESS, GROUP, READ_EXECUTE), aclEntry(DEFAULT, USER, ALL),
        aclEntry(DEFAULT, USER, "foo", ALL), aclEntry(DEFAULT, GROUP, READ_EXECUTE),
        aclEntry(DEFAULT, MASK, ALL), aclEntry(DEFAULT, OTHER, READ_EXECUTE) };
    AclStatus childDirAcl = fs.getAclStatus(childDir);
    assertArrayEquals(childDirExpectedAcl, childDirAcl.getEntries().toArray());

    Path childFile = new Path(childDir, "childFile");
    // the sub file should also have the old acl
    fs.create(childFile).close();
    AclEntry[] childFileExpectedAcl = new AclEntry[] { aclEntry(ACCESS, USER, "foo", ALL),
        aclEntry(ACCESS, GROUP, READ_EXECUTE) };
    AclStatus childFileAcl = fs.getAclStatus(childFile);
    assertArrayEquals(childFileExpectedAcl, childFileAcl.getEntries().toArray());

    // now change parent to new acls
    fs.setAcl(parent, acls2);

    // sub directory and sub file should still have the old acls
    childDirAcl = fs.getAclStatus(childDir);
    assertArrayEquals(childDirExpectedAcl, childDirAcl.getEntries().toArray());
    childFileAcl = fs.getAclStatus(childFile);
    assertArrayEquals(childFileExpectedAcl, childFileAcl.getEntries().toArray());

    // now remove the parent acls
    fs.removeAcl(parent);

    // sub directory and sub file should still have the old acls
    childDirAcl = fs.getAclStatus(childDir);
    assertArrayEquals(childDirExpectedAcl, childDirAcl.getEntries().toArray());
    childFileAcl = fs.getAclStatus(childFile);
    assertArrayEquals(childFileExpectedAcl, childFileAcl.getEntries().toArray());

    // check changing the access mode of the file
    // mask out the access of group other for testing
    fs.setPermission(childFile, new FsPermission((short) 0640));
    boolean canAccess = tryAccess(childFile, "other", new String[] { "other" }, READ);
    assertFalse(canAccess);
    fs.delete(parent, true);
  }

  public void testAccessAclNotInherited() throws IOException {
    Path parent = new Path("/testAccessAclNotInherited");
    fs.delete(parent, true);
    fs.mkdirs(parent);
    // parent have both access acl and default acl
    List<AclEntry> acls = Lists.newArrayList(aclEntry(DEFAULT, USER, "foo", READ_EXECUTE),
        aclEntry(ACCESS, USER, ALL), aclEntry(ACCESS, GROUP, READ), aclEntry(ACCESS, OTHER, READ),
        aclEntry(ACCESS, USER, "bar", ALL));
    fs.setAcl(parent, acls);
    AclEntry[] expectedAcl = new AclEntry[] { aclEntry(ACCESS, USER, "bar", ALL), aclEntry(ACCESS, GROUP, READ),
        aclEntry(DEFAULT, USER, ALL), aclEntry(DEFAULT, USER, "foo", READ_EXECUTE),
        aclEntry(DEFAULT, GROUP, READ), aclEntry(DEFAULT, MASK, READ_EXECUTE), aclEntry(DEFAULT, OTHER, READ) };
    AclStatus dirAcl = fs.getAclStatus(parent);
    assertArrayEquals(expectedAcl, dirAcl.getEntries().toArray());

    Path childDir = new Path(parent, "childDir");
    fs.mkdirs(childDir);
    // subdirectory should only have the default acl inherited
    AclEntry[] childDirExpectedAcl = new AclEntry[] { aclEntry(ACCESS, USER, "foo", READ_EXECUTE),
        aclEntry(ACCESS, GROUP, READ), aclEntry(DEFAULT, USER, ALL),
        aclEntry(DEFAULT, USER, "foo", READ_EXECUTE), aclEntry(DEFAULT, GROUP, READ),
        aclEntry(DEFAULT, MASK, READ_EXECUTE), aclEntry(DEFAULT, OTHER, READ) };
    AclStatus childDirAcl = fs.getAclStatus(childDir);
    assertArrayEquals(childDirExpectedAcl, childDirAcl.getEntries().toArray());

    Path childFile = new Path(parent, "childFile");
    fs.create(childFile).close();
    // sub file should only have the default acl inherited
    AclEntry[] childFileExpectedAcl = new AclEntry[] { aclEntry(ACCESS, USER, "foo", READ_EXECUTE),
        aclEntry(ACCESS, GROUP, READ) };
    AclStatus childFileAcl = fs.getAclStatus(childFile);
    assertArrayEquals(childFileExpectedAcl, childFileAcl.getEntries().toArray());

    fs.delete(parent, true);
  }

  public void testFileStatusWithAcl() throws Exception {
    List<AclEntry> acls = Lists.newArrayList(
        aclEntry(ACCESS, USER, ALL),
        aclEntry(ACCESS, USER, "foo", ALL),
        aclEntry(ACCESS, OTHER, ALL),
        aclEntry(ACCESS, GROUP, ALL)
    );
    Path p = new Path("/test_acl_status");
    fs.delete(p, true);
    fs.mkdirs(p);
    FileStatus pStatus = fs.getFileStatus(p);
    assertFalse(pStatus.hasAcl());

    Path f = new Path(p, "f");
    fs.create(f).close();
    fs.setAcl(f, acls);
    FileStatus[] fileStatuses = fs.listStatus(p);
    assertTrue(fileStatuses[0].getPermission().getAclBit());
    assertTrue(fileStatuses[0].hasAcl());
  }

  public void testRenameAccessControlException() throws Exception {
    Path d1 = new Path("/renameAccessControlExceptionDir1");
    Path d2 = new Path("/renameAccessControlExceptionDir2");
    Path p = new Path(d1, "file");
    FileSystem user1Fs = createNewFs(cfg, "user1", new String[]{"group1"});

    user1Fs.mkdirs(d1);
    user1Fs.mkdirs(d2);
    user1Fs.create(p).close();
    user1Fs.setPermission(d1, new FsPermission((short) 0000));
    user1Fs.setPermission(d2, new FsPermission((short) 0777));
    try {
      user1Fs.rename(p, d2);
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("renameAccessControlExceptionDir1"));
    }

    user1Fs.setPermission(d1, new FsPermission((short) 0777));
    user1Fs.setPermission(d2, new FsPermission((short) 000));
    try {
      user1Fs.rename(p, d2);
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("renameAccessControlExceptionDir2"));
    }

    // clean
    user1Fs.setPermission(d1, new FsPermission((short) 0777));
    user1Fs.setPermission(d2, new FsPermission((short) 0777));
    user1Fs.delete(d1, true);
    user1Fs.delete(d2, true);
  }

  public void testSubdir() throws IOException, InterruptedException {
    Configuration newConf = new Configuration(cfg);
    newConf.set("fs.defaultFS", "jfs://test/");
    newConf.set("juicefs.name", "test");
    newConf.set("juicefs.test.meta", newConf.get("juicefs.dev.meta"));

    // Test creating a new filesystem with a valid subdir
    Path subdir = new Path("/test_subdir");
    fs.delete(subdir, true);
    fs.mkdirs(subdir);
    fs.setPermission(subdir, new FsPermission((short) 0777));
    newConf.set("juicefs.subdir", "/test_subdir");
    FileSystem newFS = FileSystem.newInstance(newConf);

    // Test file operations within the subdir
    assertTrue(newFS.mkdirs(new Path("/test_subdir/dir")));
    newFS.create(new Path("/test_subdir/dir/f")).close();
    assertTrue(newFS.exists(new Path("/test_subdir/dir/f")));

    // Test file operations not within the subdir
    Path nonexistent = new Path("/nonexistent");
    try {
      newFS.exists(nonexistent);
      fail("exists should not work because the path is not under the subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }
    try {
      newFS.mkdirs(nonexistent);
      fail("mkdirs should not work because the path is not under the subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }
    try {
      newFS.create(nonexistent);
      fail("create should not work because the path is not under the subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }

    // Test creating a path with the same prefix but not under the subdir
    Path wrongPathWithSamePrefix = new Path("/test_subdir_wrong");
    fs.mkdirs(wrongPathWithSamePrefix);
    try {
      newFS.listStatus(wrongPathWithSamePrefix);
      fail("listStatus should not work because the path is not under the subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }
    newFS.close();
  }

  public void testMultipleSubdirs() throws IOException, InterruptedException {
    Configuration newConf = new Configuration(cfg);
    newConf.set("fs.defaultFS", "jfs://test/");
    newConf.set("juicefs.name", "test");
    newConf.set("juicefs.test.meta", newConf.get("juicefs.dev.meta"));

    // Create multiple subdirs
    Path subdir1 = new Path("/subdir1");
    Path subdir2 = new Path("/subdir2");
    Path subdir3 = new Path("/subdir3");
    
    fs.delete(subdir1, true);
    fs.delete(subdir2, true);
    fs.delete(subdir3, true);
    
    fs.mkdirs(subdir1);
    fs.mkdirs(subdir2);
    fs.mkdirs(subdir3);
    fs.setPermission(subdir1, new FsPermission((short) 0777));
    fs.setPermission(subdir2, new FsPermission((short) 0777));
    fs.setPermission(subdir3, new FsPermission((short) 0777));
    
    // Set multiple subdirs separated by comma
    newConf.set("juicefs.subdir", "/subdir1,/subdir2,/subdir3");
    FileSystem newFS = FileSystem.newInstance(newConf);

    // Test file operations within subdir1
    assertTrue(newFS.mkdirs(new Path("/subdir1/dir1")));
    newFS.create(new Path("/subdir1/dir1/f1")).close();
    assertTrue(newFS.exists(new Path("/subdir1/dir1/f1")));

    // Test file operations within subdir2
    assertTrue(newFS.mkdirs(new Path("/subdir2/dir2")));
    newFS.create(new Path("/subdir2/dir2/f2")).close();
    assertTrue(newFS.exists(new Path("/subdir2/dir2/f2")));

    // Test file operations within subdir3
    assertTrue(newFS.mkdirs(new Path("/subdir3/dir3")));
    newFS.create(new Path("/subdir3/dir3/f3")).close();
    assertTrue(newFS.exists(new Path("/subdir3/dir3/f3")));

    // Test file operations not within any subdir
    Path nonexistent = new Path("/nonexistent");
    try {
      newFS.exists(nonexistent);
      fail("exists should not work because the path is not under any subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }
    try {
      newFS.mkdirs(nonexistent);
      fail("mkdirs should not work because the path is not under any subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }
    try {
      newFS.create(nonexistent);
      fail("create should not work because the path is not under any subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }

    // Test creating a path with the same prefix but not under any subdir
    Path wrongPathWithSamePrefix = new Path("/subdir1_wrong");
    fs.mkdirs(wrongPathWithSamePrefix);
    try {
      newFS.listStatus(wrongPathWithSamePrefix);
      fail("listStatus should not work because the path is not under any subdir");
    } catch (AccessControlException e) {
      assertTrue(e.getMessage().contains("Permission denied"));
    }

    // Test that paths in different subdirs are accessible
    assertTrue(newFS.exists(new Path("/subdir1/dir1/f1")));
    assertTrue(newFS.exists(new Path("/subdir2/dir2/f2")));
    assertTrue(newFS.exists(new Path("/subdir3/dir3/f3")));

    // Cleanup
    newFS.close();
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/acl/TestAclCLI.java
================================================
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.acl;

import org.apache.hadoop.cli.CLITestHelperDFS;
import org.apache.hadoop.cli.util.CLICommand;
import org.apache.hadoop.cli.util.CommandExecutor.Result;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class TestAclCLI extends CLITestHelperDFS {
  private String vol = null;
  private String username = null;

  protected void initConf() {
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
    conf.setBoolean(
        DFSConfigKeys.DFS_NAMENODE_POSIX_ACL_INHERITANCE_ENABLED_KEY, false);
  }

  @Before
  @Override
  public void setUp() throws Exception {
    super.setUp();
    initConf();
    vol = "jfs://dev/";
    username = System.getProperty("user.name");
  }

  @After
  @Override
  public void tearDown() throws Exception {
    super.tearDown();
  }

  @Override
  protected String getTestFile() {
    return "testAclCLI.xml";
  }

  @Override
  protected String expandCommand(final String cmd) {
    String expCmd = cmd;
    expCmd = expCmd.replaceAll("NAMENODE", vol);
    expCmd = expCmd.replaceAll("USERNAME", username);
    expCmd = expCmd.replaceAll("#LF#",
        System.getProperty("line.separator"));
    expCmd = super.expandCommand(expCmd);
    return expCmd;
  }

  @Override
  protected Result execute(CLICommand cmd) throws Exception {
    return cmd.getExecutor(vol, conf).executeCommand(cmd.getCmd());
  }

  @Test
  @Override
  public void testAll() {
    super.testAll();
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/JuiceFSContract.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractBondedFSContract;

public class JuiceFSContract extends AbstractBondedFSContract {

  public JuiceFSContract(Configuration conf) {
    super(conf);
    addConfResource("contract/juicefs.xml");
  }

  @Override
  public String getScheme() {
    return "jfs";
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestAppend.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractContractAppendTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;


public class TestAppend extends AbstractContractAppendTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }

  @Override
  public void teardown() throws Exception {
    getFileSystem().delete(new Path(path("test"), "target"));
    super.teardown();
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestConcat.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractConcatTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestConcat extends AbstractContractConcatTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
      return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestCreate.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestCreate extends AbstractContractCreateTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestDelete.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestDelete extends AbstractContractDeleteTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestGetFileStatus.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;


public class TestGetFileStatus extends AbstractContractGetFileStatusTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }

  @Override
  public void setup() throws Exception {
    super.setup();
    getFileSystem().delete(new Path("jfs:///test"));
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestJuiceFileSystemContract.java
================================================
/*
 * JuiceFS, Copyright 2021 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import io.juicefs.JuiceFileSystemTest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assume.assumeNotNull;

public class TestJuiceFileSystemContract extends FileSystemContractBaseTest {
  @Before
  public void setUp() throws Exception {
    Configuration cfg = new Configuration();
    cfg.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    fs = FileSystem.get(cfg);
    assumeNotNull(fs);
  }

  public FileSystem createNewFs(Configuration conf) throws IOException {
    return FileSystem.newInstance(FileSystem.getDefaultUri(conf), conf);
  }

  @Test
  public void testMkdirsWithUmask() throws Exception {
    Configuration conf = new Configuration(fs.getConf());
    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK);
    FileSystem newFs = createNewFs(conf);
    try {
      final Path dir = path("newDir");
      assertTrue(newFs.mkdirs(dir, new FsPermission((short) 0777)));
      FileStatus status = newFs.getFileStatus(dir);
      assertTrue(status.isDirectory());
      assertEquals((short) 0715, status.getPermission().toShort());
    } finally {
      newFs.close();
    }
  }
}

================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestMkdir.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestMkdir extends AbstractContractMkdirTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestOpen.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestOpen extends AbstractContractOpenTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestRename.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractRenameTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;


public class TestRename extends AbstractContractRenameTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestSeek.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestSeek extends AbstractContractSeekTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }

  @Override
  public void teardown() throws Exception {
    getFileSystem().delete(path("bigseekfile.txt"));
    super.teardown();
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/contract/TestSetTimes.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.contract;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractSetTimesTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

public class TestSetTimes extends AbstractContractSetTimesTest {
  @Override
  protected AbstractFSContract createContract(Configuration conf) {
    return new JuiceFSContract(conf);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/kerberos/KerberosTest.java
================================================
/*
 * JuiceFS, Copyright 2025 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.juicefs.kerberos;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
import org.junit.Test;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

public class KerberosTest {
  private static final String clientPrincipal = "client/localhost";
  private static final String clientKeytab = "/tmp/client.keytab";
  private static final String tomPrincipal = "tom/localhost";
  private static final String tomKeytab = "/tmp/tom.keytab";

  private static final String jerryPrincipal = "jerry/localhost";
  private static final String jerryKeytab = "/tmp/jerry.keytab";
  private static final String serverPrincipal = "server/localhost";


  @Test
  public void testWithoutKrb() throws Exception {
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("juicefs.memory-size", "99"); // to new another jfs
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    try (FileSystem fs = FileSystem.newInstance(cfg)) {
      fail("should not success without kerberos login");
    } catch (IOException ignored) {
    }
    UserGroupInformation.reset();
  }

  @Test
  public void test() throws Exception {
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("hadoop.security.authentication", "kerberos");
    cfg.set("juicefs.server-principal", serverPrincipal);
    cfg.set("juicefs.memory-size", "100"); // to new another jfs
    UserGroupInformation.setConfiguration(cfg);
    UserGroupInformation.loginUserFromKeytab(clientPrincipal, clientKeytab);
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    FileSystem fs = FileSystem.newInstance(cfg);
    fs.listStatus(new Path("/"));
    UserGroupInformation.reset();
    fs.close();
  }

  @Test
  public void testToken() throws Exception {
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("hadoop.security.authentication", "kerberos");
    cfg.set("juicefs.server-principal", serverPrincipal);
    cfg.set("juicefs.memory-size", "101"); // to new another jfs
    UserGroupInformation.setConfiguration(cfg);
    UserGroupInformation.loginUserFromKeytab(clientPrincipal, clientKeytab);
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    FileSystem fs = FileSystem.newInstance(cfg);
    long start = System.currentTimeMillis();
    Token<?> t = fs.getDelegationToken(UserGroupInformation.getCurrentUser().getShortUserName());
    long end = System.currentTimeMillis();
    System.out.println("get token time: " + (end - start) + " ms");

    // token renewer
    JuiceFSTokenRenewer renewer = new JuiceFSTokenRenewer();
    start = System.currentTimeMillis();
    System.out.println(renewer.renew(t, cfg));
    AbstractDelegationTokenIdentifier identifier = (AbstractDelegationTokenIdentifier) t.decodeIdentifier();
    System.out.println("token id: " + identifier.getMasterKeyId());
    end = System.currentTimeMillis();
    System.out.println("renew token time: " + (end - start) + " ms");
    start = System.currentTimeMillis();
    renewer.cancel(t, cfg);
    end = System.currentTimeMillis();
    System.out.println("cancel token time: " + (end - start) + " ms");
    UserGroupInformation.reset();
    fs.close();
  }

  @Test
  public void testProxyUser() throws Exception {
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("hadoop.security.authentication", "kerberos");
    cfg.set("juicefs.server-principal", serverPrincipal);
    UserGroupInformation.setConfiguration(cfg);
    UserGroupInformation.loginUserFromKeytab(clientPrincipal, clientKeytab);
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    UserGroupInformation realUser = UserGroupInformation.getCurrentUser();
    UserGroupInformation foo = UserGroupInformation.createProxyUser("foo", realUser);
    foo.doAs(new PrivilegedExceptionAction<Object>() {
      @Override
      public Object run() throws Exception {
        cfg.set("juicefs.memory-size", "102"); // to new another jfs
        FileSystem fs = FileSystem.newInstance(cfg);
        fs.close();
        return null;
      }
    });

    UserGroupInformation bar = UserGroupInformation.createProxyUser("bar", realUser);
    bar.doAs(new PrivilegedExceptionAction<Object>() {
      @Override
      public Object run() throws Exception {
        try {
          cfg.set("juicefs.memory-size", "103"); // to new another jfs
          FileSystem fs = FileSystem.newInstance(cfg);
          fail("user bar should not proxyed");
        } catch (Exception ignored){
        }
        return null;
      }
    });
  }

  @Test
  public void testSuperUser() throws Exception {
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("hadoop.security.authentication", "kerberos");
    cfg.set("juicefs.server-principal", serverPrincipal);
    cfg.set("juicefs.memory-size", "104"); // to new another jfs
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));

    UserGroupInformation.setConfiguration(cfg);
    UserGroupInformation.loginUserFromKeytab(clientPrincipal, clientKeytab);
    FileSystem fs = FileSystem.newInstance(cfg);
    Path dir = new Path("/testsuperuser");
    fs.delete(dir);
    fs.mkdirs(dir);
    fs.setOwner(dir, "foo", "foo"); // only superuser has permission
  }

  @Test
  public void testMapRule() throws Exception {
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("hadoop.security.authentication", "kerberos");
    cfg.set("juicefs.server-principal", serverPrincipal);
    cfg.set("juicefs.memory-size", "105"); // to new another jfs
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    cfg.set("hadoop.security.auth_to_local", "RULE:[2:$1/$2@$0](jerry/.*@EXAMPLE\\.COM)s/.*/jerry_map/\nDEFAULT");

    UserGroupInformation.setConfiguration(cfg);
    UserGroupInformation.loginUserFromKeytab(jerryPrincipal, jerryKeytab);
    FileSystem fs = FileSystem.newInstance(cfg);
    Path dir = new Path("/testAuthToLocal");
    fs.delete(dir);
    fs.mkdirs(dir);
    FileStatus[] statuses = fs.listStatus(new Path("/"));
    assertEquals("jerry_map", fs.getFileStatus(dir).getOwner());
    fs.close();
  }

  @Test
  public void testMapRuleWithProxyUser() throws Exception {
    // test for proxy user
    UserGroupInformation.reset();
    Configuration cfg = new Configuration();
    cfg.set("hadoop.security.authentication", "kerberos");
    cfg.set("juicefs.server-principal", serverPrincipal);
    cfg.set("juicefs.memory-size", "106"); // to new another jfs
    cfg.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    // map tom to client
    cfg.set("hadoop.security.auth_to_local", "RULE:[2:$1/$2@$0](tom/.*@EXAMPLE\\.COM)s/.*/client/\nDEFAULT");
    UserGroupInformation.setConfiguration(cfg);
    UserGroupInformation.loginUserFromKeytab(tomPrincipal, tomKeytab);
    UserGroupInformation foo = UserGroupInformation.createProxyUser("foo", UserGroupInformation.getCurrentUser());
    foo.doAs((PrivilegedExceptionAction<Object>) () -> {
      FileSystem fs = FileSystem.newInstance(cfg);
      Path dir = new Path("/testAuthToLocalWithProxyUser");
      fs.delete(dir);
      fs.mkdirs(dir);
      FileStatus[] statuses = fs.listStatus(new Path("/"));
      for (FileStatus status : statuses) {
        System.out.println(status.getPath().toString() + " " + status.getOwner() + " " + status.getGroup());
      }
      assertEquals("foo", fs.getFileStatus(dir).getOwner());
      fs.close();
      return null;
    });

    // test for proxy user
    UserGroupInformation.reset();
    Configuration cfg2 = new Configuration();
    cfg2.set("hadoop.security.authentication", "kerberos");
    cfg2.set("juicefs.server-principal", serverPrincipal);
    cfg.set("juicefs.memory-size", "107"); // to new another jfs
    cfg2.addResource(KerberosTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    // map tom to client
    cfg2.set("hadoop.security.auth_to_local", "RULE:[2:$1/$2@$0](tom/.*@EXAMPLE\\.COM)s/.*/client/\nDEFAULT");
    UserGroupInformation.setConfiguration(cfg2);
    UserGroupInformation.loginUserFromKeytab(tomPrincipal, tomKeytab);
    UserGroupInformation bar = UserGroupInformation.createProxyUser("bar", UserGroupInformation.getCurrentUser());
    bar.doAs((PrivilegedExceptionAction<Object>) () -> {
      try {
        FileSystem fs = FileSystem.newInstance(cfg2);
        fs.close();
        fail("user client should not proxy bar");
      } catch (Exception ignored) {
      }
      return null;
    });
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/permission/RangerAdminClientImpl.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.permission;

import org.apache.hadoop.conf.Configuration;
import org.apache.ranger.admin.client.AbstractRangerAdminClient;
import org.apache.ranger.plugin.util.ServicePolicies;
import org.apache.ranger.plugin.util.ServiceTags;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.util.List;

public class RangerAdminClientImpl extends AbstractRangerAdminClient {

  private static final Logger LOG = LoggerFactory.getLogger(RangerAdminClientImpl.class);

  private final static String cacheFilename = "hdfs-policies.json";
  private final static String tagFilename = "hdfs-policies-tag.json";
  public void init(String serviceName, String appId, String configPropertyPrefix, Configuration config) {
    super.init(serviceName, appId, configPropertyPrefix, config);
  }

  public ServicePolicies getServicePoliciesIfUpdated(long lastKnownVersion, long lastActivationTimeInMillis) throws Exception {

    String basedir = System.getProperty("basedir");
    if (basedir == null) {
      basedir = new File(".").getCanonicalPath();
    }
    final String relativePath  = "/src/test/resources/";
    java.nio.file.Path cachePath = FileSystems.getDefault().getPath(basedir, relativePath + cacheFilename);
    byte[] cacheBytes = Files.readAllBytes(cachePath);
    return gson.fromJson(new String(cacheBytes), ServicePolicies.class);
  }

  public ServiceTags getServiceTagsIfUpdated(long lastKnownVersion, long lastActivationTimeInMillis) throws Exception {
    String basedir = System.getProperty("basedir");
    if (basedir == null) {
      basedir = new File(".").getCanonicalPath();
    }
    final String relativePath = "/src/test/resources/";
    java.nio.file.Path cachePath = FileSystems.getDefault().getPath(basedir, relativePath + tagFilename);
    byte[] cacheBytes = Files.readAllBytes(cachePath);
    return gson.fromJson(new String(cacheBytes), ServiceTags.class);
  }

  public List<String> getTagTypes(String tagTypePattern) throws Exception {
    return null;
  }


}


================================================
FILE: sdk/java/src/test/java/io/juicefs/permission/RangerPermissionCheckerTest.java
================================================
/*
 * JuiceFS, Copyright 2024 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package io.juicefs.permission;

import io.juicefs.JuiceFileSystemTest;
import junit.framework.TestCase;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.Assert;

import java.io.ByteArrayOutputStream;
import java.security.PrivilegedExceptionAction;

public class RangerPermissionCheckerTest extends TestCase {

  private FileSystem fs;
  private Configuration cfg;

  public void setUp() throws Exception {
    cfg = new Configuration();
    cfg.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    // set superuser
    cfg.set("juicefs.superuser", UserGroupInformation.getCurrentUser().getShortUserName());
    fs = FileSystem.newInstance(cfg);
    cfg.setQuietMode(false);
  }

  public void tearDown() throws Exception {
    fs.close();
  }

  public void testRangerCheckerInitFailed() throws Exception {
    Configuration cfg1 = new Configuration();
    cfg1.addResource(JuiceFileSystemTest.class.getClassLoader().getResourceAsStream("core-site.xml"));
    cfg1.set("juicefs.superuser", UserGroupInformation.getCurrentUser().getShortUserName());
    cfg1.setQuietMode(false);

    FileSystem fs1 = FileSystem.newInstance(cfg1);

    final Path file = new Path("/tmp/tmpdir/data-file2");
    FSDataOutputStream out = fs1.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    fs1.setPermission(file, new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE));

    // Now try to read the file as unknown user "bob" - ranger should allow this user, but now should not be allowed
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg1);
        try {
          fs.open(file);
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }

        fs.close();
        return null;
      }
    });

    fs1.delete(file);
    fs1.close();
  }

  public void testRead() throws Exception {
    HDFSReadTest("/tmp/tmpdir/data-file2");
  }

  public void testWrite() throws Exception {

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir2/data-file3");
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    fs.setPermission(file, new FsPermission(FsAction.READ_WRITE, FsAction.READ_WRITE, FsAction.NONE));

    // Now try to write to the file as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Write to the file
        fs.append(file);
        fs.close();
        return null;
      }
    });

    // Now try to write to the file as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[]{"IT"});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Write to the file
        fs.append(file);
        fs.close();
        return null;
      }
    });

    // Now try to read the file as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Write to the file
        try {
          fs.append(file);
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          // expected
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }
        fs.close();
        return null;
      }
    });

    fs.delete(file);
  }

  public void testExecute() throws Exception {

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir3/data-file2");
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    fs.setPermission(file, new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE));

    Path parentDir = new Path("/tmp/tmpdir3");

    fs.setPermission(parentDir, new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.NONE));


    // Try to read the directory as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
        Assert.assertTrue(iter.hasNext());

        fs.close();
        return null;
      }
    });
    // Try to read the directory as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[]{"IT"});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
        Assert.assertTrue(iter.hasNext());
        fs.close();
        return null;
      }
    });

    // Now try to read the directory as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        try {
          RemoteIterator<LocatedFileStatus> iter = fs.listFiles(file.getParent(), false);
          Assert.assertTrue(iter.hasNext());
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }

        fs.close();
        return null;
      }
    });

    fs.delete(file);
    fs.delete(parentDir);
  }

  public void testSetPermission() throws Exception {

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir123/data-file3");
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    // Now try to read the file as unknown user "eve" - this will not find in ranger, and fallback check by origin Mask which should fail
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("eve", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Write to the file
        try {
          fs.setPermission(file, new FsPermission(FsAction.READ, FsAction.NONE, FsAction.NONE));
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          // expected
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }
        fs.close();
        return null;
      }
    });

    fs.delete(file);
  }

  public void testSetOwner() throws Exception {

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir123/data-file3");
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    // Now try to read the file as unknown user "eve" - this will not find in ranger, and fallback check by origin Mask which should fail
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("eve", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Write to the file
        try {
          fs.setOwner(file, "eve", "eve");
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          // expected
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }
        fs.close();
        return null;
      }
    });

    fs.delete(file);
  }

  public void testReadTestUsingTagPolicy() throws Exception {

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path("/tmp/tmpdir6/data-file2");
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    fs.setPermission(file, new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE));

    // Now try to read the file as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Read the file
        FSDataInputStream in = fs.open(file);
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        IOUtils.copy(in, output);
        String content = new String(output.toByteArray());
        Assert.assertTrue(content.startsWith("data0"));
        fs.close();
        return null;
      }
    });

    // Now try to read the file as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[]{"IT"});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Read the file
        FSDataInputStream in = fs.open(file);
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        IOUtils.copy(in, output);
        String content = new String(output.toByteArray());
        Assert.assertTrue(content.startsWith("data0"));

        fs.close();
        return null;
      }
    });

    // Now try to read the file as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Read the file
        try {
          fs.open(file);
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          // expected
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }
        fs.close();
        return null;
      }
    });

    // Now try to read the file as known user "dave" - this should not be allowed, as he doesn't have the correct permissions
    ugi = UserGroupInformation.createUserForTesting("dave", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);

        // Read the file
        try {
          fs.open(file);
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          // expected
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }

        fs.close();
        return null;
      }
    });

    fs.delete(file);
  }

  public void testHDFSContentSummary() throws Exception {
    HDFSGetContentSummary("/tmp/get-content-summary");
    fs.delete(new Path("/tmp/get-content-summary"), true);
  }

  void HDFSReadTest(String fileName) throws Exception {

    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    final Path file = new Path(fileName);
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();

    fs.setPermission(file, new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE));

    // Now try to read the file as "bob" - this should be allowed (by the policy - user)
    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        // Read the file
        FSDataInputStream in = fs.open(file);
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        IOUtils.copy(in, output);
        String content = new String(output.toByteArray());
        Assert.assertTrue(content.startsWith("data0"));

        fs.close();
        return null;
      }
    });

    // Now try to read the file as "alice" - this should be allowed (by the policy - group)
    ugi = UserGroupInformation.createUserForTesting("alice", new String[]{"IT"});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        FSDataInputStream in = fs.open(file);
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        IOUtils.copy(in, output);
        String content = new String(output.toByteArray());
        Assert.assertTrue(content.startsWith("data0"));
        fs.close();
        return null;
      }
    });

    // Now try to read the file as unknown user "eve" - this should not be allowed
    ugi = UserGroupInformation.createUserForTesting("eve", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {
      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        try {
          fs.open(file);
          Assert.fail("Failure expected on an incorrect permission");
        } catch (AccessControlException ex) {
          Assert.assertTrue(AccessControlException.class.getName().equals(ex.getClass().getName()));
        }

        fs.close();
        return null;
      }
    });

    fs.delete(file);
  }

  void HDFSGetContentSummary(final String dirName) throws Exception {

    String subdirName = dirName + "/tmpdir";

    createFile(subdirName, 1);
    createFile(subdirName, 2);

    fs.setPermission(new Path(dirName), new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE));

    UserGroupInformation ugi = UserGroupInformation.createUserForTesting("bob", new String[]{});
    ugi.doAs(new PrivilegedExceptionAction<Void>() {

      public Void run() throws Exception {
        FileSystem fs = FileSystem.get(cfg);
        try {
          // GetContentSummary on the directory dirName
          ContentSummary contentSummary = fs.getContentSummary(new Path(dirName));

          long directoryCount = contentSummary.getDirectoryCount();
          Assert.assertTrue("Found unexpected number of directories; expected-count=3, actual-count=" + directoryCount, directoryCount == 3);
        } catch (Exception e) {
          Assert.fail("Failed to getContentSummary, exception=" + e);
        }
        fs.close();
        return null;
      }
    });

    deleteFile(subdirName, 1);
    deleteFile(subdirName, 2);
  }

  void createFile(String baseDir, Integer index) throws Exception {
    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    String dirName = baseDir + (index != null ? String.valueOf(index) : "");
    String fileName = dirName + "/dummy-data";
    final Path file = new Path(fileName);
    FSDataOutputStream out = fs.create(file);
    for (int i = 0; i < 1024; ++i) {
      out.write(("data" + i + "\n").getBytes("UTF-8"));
      out.flush();
    }
    out.close();
  }

  void deleteFile(String baseDir, Integer index) throws Exception {
    // Write a file - the AccessControlEnforcer won't be invoked as we are the "superuser"
    String dirName = baseDir + (index != null ? String.valueOf(index) : "");
    String fileName = dirName + "/dummy-data";
    final Path file = new Path(fileName);
    fs.delete(file);
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/utils/BgTaskUtilTest.java
================================================
package io.juicefs.utils;

import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.URI;
import java.util.concurrent.*;

public class BgTaskUtilTest extends TestCase {
  private static final Logger LOG = LoggerFactory.getLogger(BgTaskUtilTest.class);

  public void testBgTask() throws Exception {
    BgTaskUtil.reset();

    String[] volNames = new String[]{"fs1", "fs2", "fs3"};
    String[] taskNames = new String[]{"task1", "task2", "task3"};
    int threads = 20;
    ExecutorService pool = Executors.newFixedThreadPool(threads);

    int instances = 100;
    CountDownLatch latch = new CountDownLatch(instances);

    for (int i = 0; i < instances; i++) {
      int handle = i + 1;
      pool.submit(() -> {
        String volName = volNames[ThreadLocalRandom.current().nextInt(100) % volNames.length];
        try {
          BgTaskUtil.register(volName, handle);
          BgTaskUtil.startTrashEmptier(volName, () -> {
            LOG.info("tid {} running trash empiter for {}", Thread.currentThread().getId(), volName);
            while (true) {
              try {
                Thread.sleep(100);
              } catch (InterruptedException e) {
                break;
              }
            }
          }, 0, TimeUnit.MINUTES);
          // put many tasks
          for (int j = 0; j < 10; j++) {
            String taskName = taskNames[ThreadLocalRandom.current().nextInt(100) % taskNames.length];
            BgTaskUtil.putTask(volName,
                taskName,
                () -> {
                  LOG.info("running {}|{}", volName, taskName);
                  try {
                    Thread.sleep(ThreadLocalRandom.current().nextInt(2000));
                  } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                  }
                },
                0, 1, TimeUnit.MINUTES
            );
          }
        } catch (Exception e) {
          LOG.error("unexpected", e);
        } finally {
          BgTaskUtil.unregister(volName, handle, () -> {
            LOG.info("clean {}", volName);
          });
          latch.countDown();
        }
      });
    }
    latch.await();
    assertEquals(0, BgTaskUtil.getBgThreadForName().size());
    assertEquals(0, BgTaskUtil.getRunningInstance().size());
  }
}


================================================
FILE: sdk/java/src/test/java/io/juicefs/utils/HashTest.java
================================================
/*
 * JuiceFS, Copyright 2020 Juicedata, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.juicefs.utils;

import com.google.common.collect.Lists;
import junit.framework.TestCase;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;

import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;

public class HashTest extends TestCase {
  private static List<String> PATHS = new ArrayList<String>() {
    {
      String prefix = "jfs:///tmp/file";
      for (int i = 0; i < 1_000; i++) {
        add(prefix + i);
      }
    }
  };

  public void testConsitentHashCompat() {
    ConsistentHash<String> hash = new ConsistentHash<>(100, Lists.newArrayList());
    hash.addNode("192.168.1.1");
    hash.addNode("192.168.2.2");
    hash.addNode("192.168.3.3");
    hash.addNode("192.168.4.4");
    assertEquals("192.168.3.3", hash.get("123-0"));
    assertEquals("192.168.4.4", hash.get("456-2"));
    assertEquals("192.168.2.2", hash.get("789-3"));
  }

  public void testConsitentHash() {
    ConsistentHash<String> hash = new ConsistentHash<>(100, getNodes());
    Map<String, String> before = new HashMap<>();
    Map<String, String> after = new HashMap<>();

    for (String path : PATHS) {
      before.put(path, hash.get(path));
    }

    hash.remove("Node4");
    for (String path : PATHS) {
      after.put(path, hash.get(path));
    }
    System.out.println("====== stdev");
    System.out.println("before:\t" + stdev(before));
    System.out.println("after:\t" + stdev(after));

    System.out.println("====== (max - min)/avg");
    Map<String, Long> collect = after.values().stream()
            .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
    Long max = Collections.max(collect.values());
    Long min = Collections.min(collect.values());
    long sum = collect.values().stream().mapToLong(i -> i).sum();
    System.out.println((double) (max - min) / ((double) sum / getNodes().size()));

    int count = 0; // total count of path that was moved
    for (Map.Entry<String, String> entry : before.entrySet()) {
      String path = entry.getKey();
      String host = entry.getValue();
      if (!host.equals(after.get(path)))
        count++;
    }
    double moveRatio = (double) count / before.size();
    System.out.println("move ratio:\t" + moveRatio);

    assertTrue(moveRatio < (double) 2 / getNodes().size());
  }

  private static double stdev(Map<String, String> after) {
    Map<String, Long> collect = after.values().stream()
            .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
    SummaryStatistics statistics = new SummaryStatistics();
    for (Long value : collect.values()) {
      statistics.addValue(value);
    }
    double sum = statistics.getSum();
    statistics.clear();
    for (Long value : collect.values()) {
      statistics.addValue((double) value / sum);
    }

    return statistics.getStandardDeviation();
  }

  private List<String> getNodes() {
    List<String> nodes = Lists.newArrayList();
    for (int i = 0; i < 100; i++) {
      nodes.add("Node" + i);
    }
    return nodes;
  }
}


================================================
FILE: sdk/java/src/test/resources/hdfs-policies-tag.json
================================================
{
  "op": "add_or_update",
  "serviceName": "cl1_hadoop",
  "tagVersion": 2,
  "tagDefinitions": {},
  "tags": {
    "2": {
      "type": "TmpdirTag",
      "owner": 0,
      "attributes": {},
      "id": 2,
      "isEnabled": true,
      "version": 1
    }
  },
  "serviceResources": [
    {
      "resourceElements": {
        "path": {
          "values": [
            "/tmp/tmpdir6"
          ],
          "isExcludes": false,
          "isRecursive": true
        }
      },
      "id": 2,
      "isEnabled": true,
      "version": 2
    }
  ],
  "resourceToTagIds": {
    "2": [
      2
    ]
  }
}

================================================
FILE: sdk/java/src/test/resources/hdfs-policies.json
================================================
{
  "serviceName": "cl1_hadoop",
  "serviceId": 6,
  "policyVersion": 7,
  "policyUpdateTime": "20170220-12:36:01.000-+0000",
  "policies": [
    {
      "service": "cl1_hadoop",
      "name": "/tmp/tmpdir",
      "policyType": 0,
      "policyPriority": 0,
      "description": "",
      "isAuditEnabled": false,
      "resources": {
        "path": {
          "values": [
            "/tmp/tmpdir/"
          ],
          "isExcludes": false,
          "isRecursive": true
        }
      },
      "policyItems": [
        {
          "accesses": [
            {
              "type": "read",
              "isAllowed": true
            }
          ],
          "users": [],
          "groups": [
            "IT"
          ],
          "roles": [],
          "conditions": [],
          "delegateAdmin": false
        },
        {
          "accesses": [
            {
              "type": "read",
              "isAllowed": true
            }
          ],
          "users": [
            "bob"
          ],
          "groups": [],
          "roles": [],
          "conditions": [],
          "delegateAdmin": false
        }
      ],
      "denyPolicyItems": [],
      "allowExceptions": [],
      "denyExceptions": [],
      "dataMaskPolicyItems": [],
      "rowFilterPolicyItems": [],
      "serviceType": "hdfs",
      "id": 14,
      "isEnabled": true,
      "version": 4
    },
    {
      "service": "cl1_hadoop",
      "name": "/tmp/tmpdir2",
      "policyType": 0,
      "description": "",
      "isAuditEnabled": true,
      "resources": {
        "path": {
          "values": [
            "/tmp/tmpdir2"
          ],
          "isExcludes": false,
          "isRecursive": true
        }
      },
      "policyItems": [
        {
          "accesses": [
            {
              "type": "write",
              "isAllowed": true
            }
          ],
          "users": [],
          "groups": [
            "IT"
          ],
          "conditions": [],
          "delegateAdmin": false
        },
        {
          "accesses": [
            {
              "type": "write",
              "isAllowed": true
            }
          ],
          "users": [
            "bob"
          ],
          "groups": [],
          "conditions": [],
          "delegateAdmin": false
        }
      ],
      "denyPolicyItems": [],
      "allowExceptions": [],
      "denyExceptions": [],
      "dataMaskPolicyItems": [],
      "rowFilterPolicyItems": [],
      "id": 15,
      "isEnabled": true,
      "version": 1
    },
    {
      "service": "cl1_hadoop",
      "name": "/tmp/tmpdir3",
      "policyType": 0,
      "description": "",
      "isAuditEnabled": true,
      "resources": {
        "path": {
          "values": [
            "/tmp/tmpdir3"
          ],
          "isExcludes": false,
          "isRecursive": true
        }
      },
      "policyItems": [
        {
          "accesses": [
            {
              "type": "read",
              "isAllowed": true
            },
            {
              "type": "execute",
              "isAllowed": true
            }
          ],
          "users": [],
          "groups": [
            "IT"
          ],
          "conditions": [],
          "delegateAdmin": false
        },
        {
          "accesses": [
            {
              "type": "read",
              "isAllowed": true
            },
            {
              "type": "execute",
              "isAllowed": true
            }
          ],
          "users": [
            "bob"
          ],
          "groups": [],
          "conditions": [],
          "delegateAdmin": false
        }
      ],
      "denyPolicyItems": [],
      "allowExceptions": [],
      "denyExceptions": [],
      "dataMaskPolicyItems": [],
      "rowFilterPolicyItems": [],
      "id": 16,
      "isEnabled": true,
      "version": 1
    },
    {
      "service": "cl1_hadoop",
      "name": "/tmp/get-content-summary",
      "policyType": 0,
      "description": "",
      "isAuditEnabled": true,
      "resources": {
        "path": {"values": ["/tmp/get-content-summary", "/tmp/get-content-summary/tmpdir1", "/tmp/get-content-summary/tmpdir2"], "isExcludes": false, "isRecursive": false}
      },
      "policyItems": [
        {
          "accesses": [{"type": "read","isAllowed": true}, {"type": "execute","isAllowed": true}],
          "users": ["bob"],
          "groups": ["IT"],
          "conditions": [],
          "delegateAdmin": false
        }
      ],
      "denyPolicyItems": [],
      "allowExceptions": [],
      "denyExceptions": [],
      "dataMaskPolicyItems": [],
      "rowFilterPolicyItems": [],
      "id": 40,
      "isEnabled": true,
      "version": 1
    }
  ],
  "serviceDef": {
    "name": "hdfs",
    "implClass": "org.apache.ranger.services.hdfs.RangerServiceHdfs",
    "label": "HDFS Repository",
    "description": "HDFS Repository",
    "options": {},
    "configs": [
      {
        "itemId": 1,
        "name": "username",
        "type": "string",
        "subType": "",
        "mandatory": true,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Username"
      },
      {
        "itemId": 2,
        "name": "password",
        "type": "password",
        "subType": "",
        "mandatory": true,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Password"
      },
      {
        "itemId": 3,
        "name": "fs.default.name",
        "type": "string",
        "subType": "",
        "mandatory": true,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Namenode URL"
      },
      {
        "itemId": 4,
        "name": "hadoop.security.authorization",
        "type": "bool",
        "subType": "YesTrue:NoFalse",
        "mandatory": true,
        "defaultValue": "false",
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Authorization Enabled"
      },
      {
        "itemId": 5,
        "name": "hadoop.security.authentication",
        "type": "enum",
        "subType": "authnType",
        "mandatory": true,
        "defaultValue": "simple",
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Authentication Type"
      },
      {
        "itemId": 6,
        "name": "hadoop.security.auth_to_local",
        "type": "string",
        "subType": "",
        "mandatory": false,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": ""
      },
      {
        "itemId": 7,
        "name": "dfs.datanode.kerberos.principal",
        "type": "string",
        "subType": "",
        "mandatory": false,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": ""
      },
      {
        "itemId": 8,
        "name": "dfs.namenode.kerberos.principal",
        "type": "string",
        "subType": "",
        "mandatory": false,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": ""
      },
      {
        "itemId": 9,
        "name": "dfs.secondary.namenode.kerberos.principal",
        "type": "string",
        "subType": "",
        "mandatory": false,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": ""
      },
      {
        "itemId": 10,
        "name": "hadoop.rpc.protection",
        "type": "enum",
        "subType": "rpcProtection",
        "mandatory": false,
        "defaultValue": "authentication",
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "RPC Protection Type"
      },
      {
        "itemId": 11,
        "name": "commonNameForCertificate",
        "type": "string",
        "subType": "",
        "mandatory": false,
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Common Name for Certificate"
      }
    ],
    "resources": [
      {
        "itemId": 1,
        "name": "path",
        "type": "path",
        "level": 10,
        "mandatory": true,
        "lookupSupported": true,
        "recursiveSupported": true,
        "excludesSupported": false,
        "matcher": "org.apache.ranger.plugin.resourcematcher.RangerPathResourceMatcher",
        "matcherOptions": {
          "wildCard": "true",
          "ignoreCase": "false"
        },
        "validationRegEx": "",
        "validationMessage": "",
        "uiHint": "",
        "label": "Resource Path",
        "description": "HDFS file or directory path"
      }
    ],
    "accessTypes": [
      {
        "itemId": 1,
        "name": "read",
        "label": "Read",
        "impliedGrants": []
      },
      {
        "itemId": 2,
        "name": "write",
        "label": "Write",
        "impliedGrants": []
      },
      {
        "itemId": 3,
        "name": "execute",
        "label": "Execute",
        "impliedGrants": []
      }
    ],
    "policyConditions": [],
    "contextEnrichers": [],
    "enums": [
      {
        "itemId": 1,
        "name": "authnType",
        "elements": [
          {
            "itemId": 1,
            "name": "simple",
            "label": "Simple"
          },
          {
            "itemId": 2,
            "name": "kerberos",
            "label": "Kerberos"
          }
        ],
        "defaultIndex": 0
      },
      {
        "itemId": 2,
        "name": "rpcProtection",
        "elements": [
          {
            "itemId": 1,
            "name": "authentication",
            "label": "Authentication"
          },
          {
            "itemId": 2,
            "name": "integrity",
            "label": "Integrity"
          },
          {
            "itemId": 3,
            "name": "privacy",
            "label": "Privacy"
          }
        ],
        "defaultIndex": 0
      }
    ],
    "dataMaskDef": {
      "maskTypes": [],
      "accessTypes": [],
      "resources": []
    },
    "rowFilterDef": {
      "accessTypes": [],
      "resources": []
    },
    "id": 1,
    "guid": "0d047247-bafe-4cf8-8e9b-d5d377284b2d",
    "isEnabled": true,
    "createTime": "20170217-11:41:31.000-+0000",
    "updateTime": "20170217-11:41:31.000-+0000",
    "version": 1
  },
  "auditMode": "audit-default",
  "tagPolicies": {
    "serviceName": "KafkaTagService",
    "serviceId": 5,
    "policyVersion": 5,
    "policyUpdateTime": "20170220-12:35:51.000-+0000",
    "policies": [
      {
        "service": "KafkaTagService",
        "name": "EXPIRES_ON",
        "policyType": 0,
        "description": "Policy for data with EXPIRES_ON tag",
        "isAuditEnabled": true,
        "resources": {
          "tag": {
            "values": [
              "EXPIRES_ON"
            ],
            "isExcludes": false,
            "isRecursive": false
          }
        },
        "policyItems": [],
        "denyPolicyItems": [
          {
            "accesses": [
              {
                "type": "hdfs:read",
                "isAllowed": true
              },
              {
                "type": "hdfs:write",
                "isAllowed": true
              },
              {
                "type": "hdfs:execute",
                "isAllowed": true
              },
              {
                "type": "hbase:read",
                "isAllowed": true
              },
              {
                "type": "hbase:write",
                "isAllowed": true
              },
              {
                "type": "hbase:create",
                "isAllowed": true
              },
              {
                "type": "hbase:admin",
                "isAllowed": true
              },
              {
                "type": "hive:select",
                "isAllowed": true
              },
              {
                "type": "hive:update",
                "isAllowed": true
              },
              {
                "type": "hive:create",
                "isAllowed": true
              },
              {
                "type": "hive:drop",
                "isAllowed": true
              },
              {
                "type": "hive:alter",
                "isAllowed": true
              },
              {
                "type": "hive:index",
                "isAllowed": true
              },
              {
                "type": "hive:lock",
                "isAllowed": true
              },
              {
                "type": "hive:all",
                "isAllowed": true
              },
              {
                "type": "yarn:submit-app",
                "isAllowed": true
              },
              {
                "type": "yarn:admin-queue",
                "isAllowed": true
              },
              {
                "type": "knox:allow",
                "isAllowed": true
              },
              {
                "type": "storm:submitTopology",
                "isAllowed": true
              },
              {
                "type": "storm:fileUpload",
                "isAllowed": true
              },
              {
                "type": "storm:fileDownload",
                "isAllowed": true
              },
              {
                "type": "storm:killTopology",
                "isAllowed": true
              },
              {
                "type": "storm:rebalance",
                "isAllowed": true
              },
              {
                "type": "storm:activate",
                "isAllowed": true
              },
              {
                "type": "storm:deactivate",
                "isAllowed": true
              },
              {
                "type": "storm:getTopologyConf",
                "isAllowed": true
              },
              {
                "type": "storm:getTopology",
                "isAllowed": true
              },
              {
                "type": "storm:getUserTopology",
                "isAllowed": true
              },
              {
                "type": "storm:getTopologyInfo",
                "isAllowed": true
              },
              {
                "type": "storm:uploadNewCredentials",
                "isAllowed": true
              },
              {
                "type": "kms:create",
                "isAllowed": true
              },
              {
                "type": "kms:delete",
                "isAllowed": true
              },
              {
                "type": "kms:rollover",
                "isAllowed": true
              },
              {
                "type": "kms:setkeymaterial",
                "isAllowed": true
              },
              {
                "type": "kms:get",
                "isAllowed": true
              },
              {
                "type": "kms:getkeys",
                "isAllowed": true
              },
              {
                "type": "kms:getmetadata",
                "isAllowed": true
              },
              {
                "type": "kms:generateeek",
                "isAllowed": true
              },
              {
                "type": "kms:decrypteek",
                "isAllowed": true
              },
              {
                "type": "solr:query",
                "isAllowed": true
              },
              {
                "type": "solr:update",
                "isAllowed": true
              },
              {
                "type": "solr:others",
                "isAllowed": true
              },
              {
                "type": "solr:solr_admin",
                "isAllowed": true
              },
              {
                "type": "kafka:publish",
                "isAllowed": true
              },
              {
                "type": "kafka:consume",
                "isAllowed": true
              },
              {
                "type": "kafka:configure",
                "isAllowed": true
              },
              {
                "type": "kafka:describe",
                "isAllowed": true
              },
              {
                "type": "kafka:create",
                "isAllowed": true
              },
              {
                "type": "kafka:delete",
                "isAllowed": true
              },
              {
                "type": "kafka:kafka_admin",
                "isAllowed": true
              },
              {
                "type": "atlas:read",
                "isAllowed": true
              },
              {
                "type": "atlas:create",
                "isAllowed": true
              },
              {
                "type": "atlas:update",
                "isAllowed": true
              },
              {
                "type": "atlas:delete",
                "isAllowed": true
              },
              {
                "type": "atlas:all",
                "isAllowed": true
              }
            ],
            "users": [],
            "groups": [
              "public"
            ],
            "conditions": [
              {
                "type": "accessed-after-expiry",
                "values": [
                  "yes"
                ]
              }
            ],
            "delegateAdmin": false
          }
        ],
        "allowExceptions": [],
        "denyExceptions": [],
        "dataMaskPolicyItems": [],
        "rowFilterPolicyItems": [],
        "id": 10,
        "isEnabled": true,
        "version": 1
      },
      {
        "service": "KafkaTagService",
        "name": "AtlasKafkaTagPolicy",
        "policyType": 0,
        "description": "",
        "isAuditEnabled": true,
        "resources": {
          "tag": {
            "values": [
              "KafkaTag"
            ],
            "isExcludes": false,
            "isRecursive": false
          }
        },
        "policyItems": [
          {
            "accesses": [
              {
                "type": "kafka:consume",
                "isAllowed": true
              },
              {
                "type": "kafka:describe",
                "isAllowed": true
              }
            ],
            "users": [
              "CN\u003dClient,O\u003dApache,L\u003dDublin,ST\u003dLeinster,C\u003dIE"
            ],
            "groups": [],
            "conditions": [],
            "delegateAdmin": false
          }
        ],
        "denyPolicyItems": [],
        "allowExceptions": [],
        "denyExceptions": [],
        "dataMaskPolicyItems": [],
        "rowFilterPolicyItems": [],
        "id": 11,
        "isEnabled": true,
        "version": 2
      },
      {
        "service": "KafkaTagService",
        "name": "TmpdirTagPolicy",
        "policyType": 0,
        "description": "",
        "isAuditEnabled": true,
        "resources": {
          "tag": {
            "values": [
              "TmpdirTag"
            ],
            "isExcludes": false,
            "isRecursive": false
          }
        },
        "policyItems": [
          {
            "accesses": [
              {
                "type": "hdfs:read",
                "isAllowed": true
              }
            ],
            "users": [],
            "groups": [
              "IT"
            ],
            "conditions": [],
            "delegateAdmin": false
          },
          {
            "accesses": [
              {
                "type": "hdfs:read",
                "isAllowed": true
              }
            ],
            "users": [
              "bob"
            ],
            "groups": [],
            "conditions": [],
            "delegateAdmin": false
          }
        ],
        "denyPolicyItems": [],
        "allowExceptions": [],
        "denyExceptions": [],
        "dataMaskPolicyItems": [],
        "rowFilterPolicyItems": [],
        "id": 17,
        "isEnabled": true,
        "version": 1
      }
    ],
    "serviceDef": {
      "name": "tag",
      "implClass": "org.apache.ranger.services.tag.RangerServiceTag",
      "label": "TAG",
      "description": "TAG Service Definition",
      "options": {
        "ui.pages": "tag-based-policies"
      },
      "configs": [],
      "resources": [
        {
          "itemId": 1,
          "name": "tag",
          "type": "string",
          "level": 1,
          "mandatory": true,
          "lookupSupported": true,
          "recursiveSupported": false,
          "excludesSupported": false,
          "matcher": "org.apache.ranger.plugin.resourcematcher.RangerDefaultResourceMatcher",
          "matcherOptions": {
            "wildCard": "false",
            "ignoreCase": "false"
          },
          "validationRegEx": "",
          "validationMessage": "",
          "uiHint": "{ \"singleValue\":true }",
          "label": "TAG",
          "description": "TAG"
        }
      ],
      "accessTypes": [
        {
          "itemId": 1002,
          "name": "hdfs:read",
          "label": "Read",
          "impliedGrants": []
        },
        {
          "itemId": 1003,
          "name": "hdfs:write",
          "label": "Write",
          "impliedGrants": []
        },
        {
          "itemId": 1004,
          "name": "hdfs:execute",
          "label": "Execute",
          "impliedGrants": []
        },
        {
          "itemId": 2003,
          "name": "hbase:read",
          "label": "Read",
          "impliedGrants": []
        },
        {
          "itemId": 2004,
          "name": "hbase:write",
          "label": "Write",
          "impliedGrants": []
        },
        {
          "itemId": 2005,
          "name": "hbase:create",
          "label": "Create",
          "impliedGrants": []
        },
        {
          "itemId": 2006,
          "name": "hbase:admin",
          "label": "Admin",
          "impliedGrants": [
            "hbase:read",
            "hbase:write",
            "hbase:create"
          ]
        },
        {
          "itemId": 3004,
          "name": "hive:select",
          "label": "select",
          "impliedGrants": []
        },
        {
          "itemId": 3005,
          "name": "hive:update",
          "label": "update",
          "impliedGrants": []
        },
        {
          "itemId": 3006,
          "name": "hive:create",
          "label": "Create",
          "impliedGrants": []
        },
        {
          "itemId": 3007,
          "name": "hive:drop",
          "label": "Drop",
          "impliedGrants": []
        },
        {
          "itemId": 3008,
          "name": "hive:alter",
          "label": "Alter",
          "impliedGrants": []
        },
        {
          "itemId": 3009,
          "name": "hive:index",
          "label": "Index",
          "impliedGrants": []
        },
        {
          "itemId": 3010,
          "name": "hive:lock",
          "label": "Lock",
          "impliedGrants": []
        },
        {
          "itemId": 3011,
          "name": "hive:all",
          "label": "All",
          "impliedGrants": [
            "hive:select",
            "hive:update",
            "hive:create",
            "hive:drop",
            "hive:alter",
            "hive:index",
            "hive:lock"
          ]
        },
        {
          "itemId": 4005,
          "name": "yarn:submit-app",
          "label": "submit-app",
          "impliedGrants": []
        },
        {
          "itemId": 4006,
          "name": "yarn:admin-queue",
          "label": "admin-queue",
          "impliedGrants": [
            "yarn:submit-app"
          ]
        },
        {
          "itemId": 5006,
          "name": "knox:allow",
          "label": "Allow",
          "impliedGrants": []
        },
        {
          "itemId": 6007,
          "name": "storm:submitTopology",
          "label": "Submit Topology",
          "impliedGrants": [
            "storm:fileUpload",
            "storm:fileDownload"
          ]
        },
        {
          "itemId": 6008,
          "name": "storm:fileUpload",
          "label": "File Upload",
          "impliedGrants": []
        },
        {
          "itemId": 6011,
          "name": "storm:fileDownload",
          "label": "File Download",
          "impliedGrants": []
        },
        {
          "itemId": 6012,
          "name": "storm:killTopology",
          "label": "Kill Topology",
          "impliedGrants": []
        },
        {
          "itemId": 6013,
          "name": "storm:rebalance",
          "label": "Rebalance",
          "impliedGrants": []
        },
        {
          "itemId": 6014,
          "name": "storm:activate",
          "label": "Activate",
          "impliedGrants": []
        },
        {
          "itemId": 6015,
          "name": "storm:deactivate",
          "label": "Deactivate",
          "impliedGrants": []
        },
        {
          "itemId": 6016,
          "name": "storm:getTopologyConf",
          "label": "Get Topology Conf",
          "impliedGrants": []
        },
        {
          "itemId": 6017,
          "name": "storm:getTopology",
          "label": "Get Topology",
          "impliedGrants": []
        },
        {
          "itemId": 6018,
          "name": "storm:getUserTopology",
          "label": "Get User Topology",
          "impliedGrants": []
        },
        {
          "itemId": 6019,
          "name": "storm:getTopologyInfo",
          "label": "Get Topology Info",
          "impliedGrants": []
        },
        {
          "itemId": 6020,
          "name": "storm:uploadNewCredentials",
          "label": "Upload New Credential",
          "impliedGrants": []
        },
        {
          "itemId": 7008,
          "name": "kms:create",
          "label": "Create",
          "impliedGrants": []
        },
        {
          "itemId": 7009,
          "name": "kms:delete",
          "label": "Delete",
          "impliedGrants": []
        },
        {
          "itemId": 7010,
          "name": "kms:rollover",
          "label": "Rollover",
          "impliedGrants": []
        },
        {
          "itemId": 7011,
          "name": "kms:setkeymaterial",
          "label": "Set Key Material",
          "impliedGrants": []
        },
        {
          "itemId": 7012,
          "name": "kms:get",
          "label": "Get",
          "impliedGrants": []
        },
        {
          "itemId": 7013,
          "name": "kms:getkeys",
          "label": "Get Keys",
          "impliedGrants": []
        },
        {
          "itemId": 7014,
          "name": "kms:getmetadata",
          "label": "Get Metadata",
          "impliedGrants": []
        },
        {
          "itemId": 7015,
          "name": "kms:generateeek",
          "label": "Generate EEK",
          "impliedGrants": []
        },
        {
          "itemId": 7016,
          "name": "kms:decrypteek",
          "label": "Decrypt EEK",
          "impliedGrants": []
        },
        {
          "itemId": 8108,
          "name": "solr:query",
          "label": "Query",
          "impliedGrants": []
        },
        {
          "itemId": 8208,
          "name": "solr:update",
          "label": "Update",
          "impliedGrants": []
        },
        {
          "itemId": 8308,
          "name": "solr:others",
          "label": "Others",
          "impliedGrants": []
        },
        {
          "itemId": 8908,
          "name": "solr:solr_admin",
          "label": "Solr Admin",
          "impliedGrants": [
            "solr:query",
            "solr:update",
            "solr:others"
          ]
        },
        {
          "itemId": 9010,
          "name": "kafka:publish",
          "label": "Publish",
          "impliedGrants": [
            "kafka:describe"
          ]
        },
        {
          "itemId": 9011,
          "name": "kafka:consume",
          "label": "Consume",
          "impliedGrants": [
            "kafka:describe"
          ]
        },
        {
          "itemId": 9014,
          "name": "kafka:configure",
          "label": "Configure",
          "impliedGrants": [
            "kafka:describe"
          ]
        },
        {
          "itemId": 9015,
          "name": "kafka:describe",
          "label": "Describe",
          "impliedGrants": []
        },
        {
          "itemId": 9017,
          "name": "kafka:create",
          "label": "Create",
          "impliedGrants": []
        },
        {
          "itemId": 9018,
          "name": "kafka:delete",
          "label": "Delete",
          "impliedGrants": []
        },
        {
          "itemId": 9016,
          "name": "kafka:kafka_admin",
          "label": "Kafka Admin",
          "impliedGrants": [
            "kafka:publish",
            "kafka:consume",
            "kafka:configure",
            "kafka:describe",
            "kafka:create",
            "kafka:delete"
          ]
        },
        {
          "itemId": 11012,
          "name": "atlas:read",
          "label": "read",
          "impliedGrants": []
        },
        {
          "itemId": 11013,
          "name": "atlas:create",
          "label": "create",
          "impliedGrants": []
        },
        {
          "itemId": 11014,
          "name": "atlas:update",
          "label": "update",
          "impliedGrants": []
        },
        {
          "itemId": 11015,
          "name": "atlas:delete",
          "label": "delete",
          "impliedGrants": []
        },
        {
          "itemId": 11016,
          "name": "atlas:all",
          "label": "All",
          "impliedGrants": [
            "atlas:read",
            "atlas:create",
            "atlas:update",
            "atlas:delete"
          ]
        }
      ],
      "policyConditions": [
        {
          "itemId": 1,
          "name": "accessed-after-expiry",
          "evaluator": "org.apache.ranger.plugin.conditionevaluator.RangerScriptTemplateConditionEvaluator",
          "evaluatorOptions": {
            "scriptTemplate": "ctx.isAccessedAfter(\u0027expiry_date\u0027);"
          },
          "uiHint": "{ \"singleValue\":true }",
          "label": "Accessed after expiry_date (yes/no)?",
          "description": "Accessed after expiry_date? (yes/no)"
        }
      ],
      "contextEnrichers": [
        {
          "itemId": 1,
          "name": "TagEnricher",
          "enricher": "org.apache.ranger.plugin.contextenricher.RangerTagEnricher",
          "enricherOptions": {
            "tagRetrieverClassName": "org.apache.ranger.plugin.contextenricher.RangerAdminTagRetriever",
            "tagRefresherPollingInterval": "60000"
          }
        }
      ],
      "enums": [],
      "dataMaskDef": {
        "maskTypes": [],
        "accessTypes": [],
        "resources": []
      },
      "rowFilterDef": {
        "accessTypes": [],
        "resources": []
      },
      "id": 100,
      "guid": "0d047248-baff-4cf9-8e9e-d5d377284b2e",
      "isEnabled": true,
      "createTime": "20170217-11:41:33.000-+0000",
      "updateTime": "20170217-11:41:35.000-+0000",
      "version": 11
    },
    "auditMode": "audit-default"
  }
}

================================================
FILE: sdk/java/src/test/resources/kerberos.cfg
================================================
# kerberos keytab
dev.keytab={BASE64 KEYTAB}
# delegation token
dev.token.life=604800
dev.token.renew=86400

# superuser and supergroup
dev.superuser=client
dev.supergroup=supergroup

# Mapping from Kerberos principals to OS user accounts
# https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SecureMode.html#Mapping_from_Kerberos_principals_to_OS_user_accounts
dev.mechanism=hadoop
dev.rule=RULE:[2:$1/$2@$0](root/.*@example.com)s/.*/hdfs/
dev.rule=RULE:[2:$1/$2@$0](jerry/.*@EXAMPLE\.COM)s/.*/jerry_map/
dev.rule=RULE:[2:$1/$2@$0](tom/.*@EXAMPLE\.COM)s/.*/client/
dev.rule=DEFAULT

# proxy user settings
# https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SecureMode.html#Proxy_user
# users: user1,user2 or *
dev.proxy.client.users=foo
# groups: group1,group2 or *
dev.proxy.client.groups=foogrp
# hosts: host1,host2 or 192.168.1.1,192.168.1.2 or 192.168.1.1/32 or *
dev.proxy.client.hosts=*


================================================
FILE: sdk/java/src/test/resources/log4j.properties
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppenderk
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

================================================
FILE: sdk/java/src/test/resources/testAclCLI.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="testConf.xsl"?>

<!--
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
-->

<configuration>
  <!-- Normal mode is test. To run just the commands and dump the output
       to the log, set it to nocompare -->
  <mode>test</mode>

  <!--  Comparator types:
           ExactComparator
           SubstringComparator
           RegexpComparator
           TokenComparator
           -->
  <tests>
    <!-- Tests for setfacl and getfacl-->
    <test>
      <description>getfacl: basic permissions</description>
      <test-commands>
        <command>-fs NAMENODE -touchz /file1</command>
        <command>-fs NAMENODE -getfacl /file1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /file1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /file1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r--</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>getfacl: basic permissions for directory</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Add an ACL</description>
      <test-commands>
        <command>-fs NAMENODE -touchz /file1</command>
        <command>-fs NAMENODE -setfacl -m user:bob:r-- /file1</command>
        <command>-fs NAMENODE -getfacl /file1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /file1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /file1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:bob:r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>mask::r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r--</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Add multiple ACLs at once</description>
      <test-commands>
        <command>-fs NAMENODE -touchz /file1</command>
        <command>-fs NAMENODE -setfacl -m user:bob:r--,group:users:r-x /file1</command>
        <command>-fs NAMENODE -getfacl /file1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /file1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /file1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:bob:r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group:users:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>mask::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r--</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Remove an ACL</description>
      <test-commands>
        <command>-fs NAMENODE -touchz /file1</command>
        <command>-fs NAMENODE -setfacl -m user:bob:r--,user:charlie:r-x /file1</command>
        <command>-fs NAMENODE -setfacl -x user:bob /file1</command>
        <command>-fs NAMENODE -getfacl /file1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /file1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /file1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:charlie:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r--</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!bob)*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Add default ACL</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:bob:r--,group:users:r-x /dir1</command>
        <command>-fs NAMENODE -setfacl -m default:user:charlie:r-x,default:group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:bob:r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group:users:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>mask::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:user:charlie:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:group:admin:rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:mask::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:other::r-x</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Add minimal default ACL</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m default:user::rwx /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!default\:mask)*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : try adding default ACL to file</description>
      <test-commands>
        <command>-fs NAMENODE -touchz /file1</command>
        <command>-fs NAMENODE -setfacl -m default:user:charlie:r-x /file1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /file1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>setfacl: Invalid ACL: only directories may have a default ACL</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Remove one default ACL</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:bob:r--,group:users:r-x /dir1</command>
        <command>-fs NAMENODE -setfacl -m default:user:charlie:r-x,default:group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -setfacl -x default:user:charlie /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:bob:r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group:users:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>mask::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:group:admin:rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:mask::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>default:other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!default:user:charlie).*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Remove all default ACL</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:bob:r--,group:users:r-x /dir1</command>
        <command>-fs NAMENODE -setfacl -m default:user:charlie:r-x,default:group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -setfacl -k /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:bob:r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group:users:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>mask::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!default).*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Remove all but base ACLs for a directory</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:charlie:r-x,default:group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -setfacl -b /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!charlie).*</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!default).*</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!admin).*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : Remove all but base ACLs for a file</description>
      <test-commands>
        <command>-fs NAMENODE -touchz /file1</command>
        <command>-fs NAMENODE -setfacl -m user:charlie:r-x,group:admin:rwx /file1</command>
        <command>-fs NAMENODE -setfacl -b /file1</command>
        <command>-fs NAMENODE -getfacl /file1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm /file1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /file1</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r--</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r--</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!charlie).*</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!admin).*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : check inherit default ACL to file</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m default:user:charlie:r-x,default:group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -touchz /dir1/file</command>
        <command>-fs NAMENODE -getfacl /dir1/file</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># file: /dir1/file</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>user:charlie:r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>group:admin:rwx</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>mask::rw-</expected-output>
        </comparator>
        <comparator>
          <type>SubstringComparator</type>
          <expected-output>other::r--</expected-output>
        </comparator>
        <comparator>
          <type>RegexpAcrossOutputComparator</type>
          <expected-output>.*(?!default).*</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl : check inherit default ACL to dir</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m default:user:charlie:r-x,default:group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -mkdir /dir1/dir2</command>
        <command>-fs NAMENODE -getfacl /dir1/dir2</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output># file: /dir1/dir2</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output># owner: USERNAME</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output># group: supergroup</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>user:charlie:r-x</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>RegexpComparator</type>
          <expected-output>^group:admin:rwx\b.*</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>mask::rwx</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>default:user::rwx</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>default:user:charlie:r-x</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>default:group::r-x</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>default:group:admin:rwx</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>default:mask::rwx</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>default:other::r-x</expected-output>
        </comparator>
        <comparator>
          <type>ExactLineComparator</type>
          <expected-output>other::r-x</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>getfacl -R : recursive</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:charlie:r-x,group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -mkdir /dir1/dir2</command>
        <command>-fs NAMENODE -setfacl -m user:user1:r-x,group:users:rwx /dir1/dir2</command>
        <command>-fs NAMENODE -getfacl -R /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>ExactComparator</type>
          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:r-x#LF#group::r-x#LF#group:admin:rwx#LF#mask::rwx#LF#other::r-x#LF##LF## file: /dir1/dir2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:user1:r-x#LF#group::r-x#LF#group:users:rwx#LF#mask::rwx#LF#other::r-x#LF##LF#</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl -R : recursive</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -mkdir /dir1/dir2</command>
        <command>-fs NAMENODE -setfacl -R -m user:charlie:r-x,group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -getfacl -R /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>ExactComparator</type>
          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:r-x#LF#group::r-x#LF#group:admin:rwx#LF#mask::rwx#LF#other::r-x#LF##LF## file: /dir1/dir2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:r-x#LF#group::r-x#LF#group:admin:rwx#LF#mask::rwx#LF#other::r-x#LF##LF#</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl --set : Set full set of ACLs</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:charlie:r-x,group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -setfacl --set user::rw-,group::r--,other::r--,user:user1:r-x,group:users:rw- /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>ExactComparator</type>
          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rw-#LF#user:user1:r-x#LF#group::r--#LF#group:users:rw-#LF#mask::rwx#LF#other::r--#LF##LF#</expected-output>
        </comparator>
      </comparators>
    </test>
    <test>
      <description>setfacl -x mask : remove mask entry along with other ACL entries</description>
      <test-commands>
        <command>-fs NAMENODE -mkdir /dir1</command>
        <command>-fs NAMENODE -setfacl -m user:charlie:r-x,group:admin:rwx /dir1</command>
        <command>-fs NAMENODE -setfacl -x mask::,user:charlie,group:admin /dir1</command>
        <command>-fs NAMENODE -getfacl /dir1</command>
      </test-commands>
      <cleanup-commands>
        <command>-fs NAMENODE -rm -R /dir1</command>
      </cleanup-commands>
      <comparators>
        <comparator>
          <type>ExactComparator</type>
          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#group::r-x#LF#other::r-x#LF##LF#</expected-output>
        </comparator>
      </comparators>
    </test>
<!--    <test>-->
<!--      <description>getfacl: only default ACL</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir /dir1</command>-->
<!--        <command>-fs NAMENODE -setfacl -m default:user:charlie:rwx /dir1</command>-->
<!--        <command>-fs NAMENODE -getfacl /dir1</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output># file: /dir1</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output># owner: USERNAME</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output># group: supergroup</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>user::rwx</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>group::r-x</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>other::r-x</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:user::rwx</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:user:charlie:rwx</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:group::r-x</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:mask::rwx</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:other::r-x</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
<!--    <test>-->
<!--      <description>getfacl: effective permissions</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir /dir1</command>-->
<!--        <command>-fs NAMENODE -setfacl -m user:charlie:rwx,group::-wx,group:sales:rwx,mask::r-x,default:user:charlie:rwx,default:group::r-x,default:group:sales:rwx,default:mask::rw- /dir1</command>-->
<!--        <command>-fs NAMENODE -getfacl /dir1</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output># file: /dir1</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output># owner: USERNAME</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output># group: supergroup</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>user::rwx</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^user:charlie:rwx\t#effective:r-x$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^group::-wx\t#effective:&#45;&#45;x$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^group:sales:rwx\t#effective:r-x$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>mask::r-x</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>other::r-x</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:user::rwx</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^default:user:charlie:rwx\t#effective:rw-$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^default:group::r-x\t#effective:r&#45;&#45;$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^default:group:sales:rwx\t#effective:rw-$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:mask::rw-</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>SubstringComparator</type>-->
<!--          <expected-output>default:other::r-x</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
<!--    <test>-->
<!--      <description>ls: display extended acl marker</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1/dir2</command>-->
<!--        <command>-fs NAMENODE -setfacl -m user:charlie:rwx,group::-wx,group:sales:rwx,mask::r-x,default:user:charlie:rwx,default:group::r-x,default:group:sales:rwx,default:mask::rw- /dir1/dir2</command>-->
<!--        <command>-fs NAMENODE -ls /dir1</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^drwxr-xr-x\+( )*-( )*USERNAME( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir1/dir2</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
<!--    <test>-->
<!--      <description>setfacl: recursive modify entries with mix of files and directories</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1</command>-->
<!--        <command>-fs NAMENODE -touchz /dir1/file1</command>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1/dir2</command>-->
<!--        <command>-fs NAMENODE -touchz /dir1/dir2/file2</command>-->
<!--        <command>-fs NAMENODE -setfacl -R -m user:charlie:rwx,default:user:charlie:r-x /dir1</command>-->
<!--        <command>-fs NAMENODE -getfacl -R /dir1</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>ExactComparator</type>-->
<!--          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF#default:user::rwx#LF#default:user:charlie:r-x#LF#default:group::r-x#LF#default:mask::r-x#LF#default:other::r-x#LF##LF## file: /dir1/dir2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF#default:user::rwx#LF#default:user:charlie:r-x#LF#default:group::r-x#LF#default:mask::r-x#LF#default:other::r-x#LF##LF## file: /dir1/dir2/file2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rw-#LF#user:charlie:rwx#LF#group::r&#45;&#45;#LF#mask::rwx#LF#other::r&#45;&#45;#LF##LF## file: /dir1/file1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rw-#LF#user:charlie:rwx#LF#group::r&#45;&#45;#LF#mask::rwx#LF#other::r&#45;&#45;#LF##LF#</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
<!--    <test>-->
<!--      <description>setfacl: recursive remove entries with mix of files and directories</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1</command>-->
<!--        <command>-fs NAMENODE -touchz /dir1/file1</command>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1/dir2</command>-->
<!--        <command>-fs NAMENODE -touchz /dir1/dir2/file2</command>-->
<!--        <command>-fs NAMENODE -setfacl -R -m user:bob:rwx,user:charlie:rwx,default:user:bob:rwx,default:user:charlie:r-x /dir1</command>-->
<!--        <command>-fs NAMENODE -setfacl -R -x user:bob,default:user:bob /dir1</command>-->
<!--        <command>-fs NAMENODE -getfacl -R /dir1</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>ExactComparator</type>-->
<!--          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF#default:user::rwx#LF#default:user:charlie:r-x#LF#default:group::r-x#LF#default:mask::r-x#LF#default:other::r-x#LF##LF## file: /dir1/dir2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF#default:user::rwx#LF#default:user:charlie:r-x#LF#default:group::r-x#LF#default:mask::r-x#LF#default:other::r-x#LF##LF## file: /dir1/dir2/file2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rw-#LF#user:charlie:rwx#LF#group::r&#45;&#45;#LF#mask::rwx#LF#other::r&#45;&#45;#LF##LF## file: /dir1/file1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rw-#LF#user:charlie:rwx#LF#group::r&#45;&#45;#LF#mask::rwx#LF#other::r&#45;&#45;#LF##LF#</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
<!--    <test>-->
<!--      <description>setfacl: recursive set with mix of files and directories</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1</command>-->
<!--        <command>-fs NAMENODE -touchz /dir1/file1</command>-->
<!--        <command>-fs NAMENODE -mkdir -p /dir1/dir2</command>-->
<!--        <command>-fs NAMENODE -touchz /dir1/dir2/file2</command>-->
<!--        <command>-fs NAMENODE -setfacl -R &#45;&#45;set user::rwx,user:charlie:rwx,group::r-x,other::r-x,default:user:charlie:r-x /dir1</command>-->
<!--        <command>-fs NAMENODE -getfacl -R /dir1</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>ExactComparator</type>-->
<!--          <expected-output># file: /dir1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF#default:user::rwx#LF#default:user:charlie:r-x#LF#default:group::r-x#LF#default:mask::r-x#LF#default:other::r-x#LF##LF## file: /dir1/dir2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF#default:user::rwx#LF#default:user:charlie:r-x#LF#default:group::r-x#LF#default:mask::r-x#LF#default:other::r-x#LF##LF## file: /dir1/dir2/file2#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF##LF## file: /dir1/file1#LF## owner: USERNAME#LF## group: supergroup#LF#user::rwx#LF#user:charlie:rwx#LF#group::r-x#LF#mask::rwx#LF#other::r-x#LF##LF#</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
<!--    <test>-->
<!--      <description>copyFromLocal: copying file into a directory with a default ACL</description>-->
<!--      <test-commands>-->
<!--        <command>-fs NAMENODE -mkdir /dir1</command>-->
<!--        <command>-fs NAMENODE -setfacl -m default:user:charlie:rwx /dir1</command>-->
<!--        <command>-fs NAMENODE -copyFromLocal CLITEST_DATA/data1k /dir1/data1k</command>-->
<!--        <command>-fs NAMENODE -getfacl /dir1/data1k</command>-->
<!--      </test-commands>-->
<!--      <cleanup-commands>-->
<!--        <command>-fs NAMENODE -rm -R /dir1</command>-->
<!--      </cleanup-commands>-->
<!--      <comparators>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^# file: /dir1/data1k$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^# owner: USERNAME$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^# group: supergroup$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^user::rw-$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^user:charlie:rwx\t#effective:r&#45;&#45;$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^group::r-x\t#effective:r&#45;&#45;$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^mask::r&#45;&#45;$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpComparator</type>-->
<!--          <expected-output>^other::r&#45;&#45;$</expected-output>-->
<!--        </comparator>-->
<!--        <comparator>-->
<!--          <type>RegexpAcrossOutputComparator</type>-->
<!--          <expected-output>.*(?!default).*</expected-output>-->
<!--        </comparator>-->
<!--      </comparators>-->
<!--    </test>-->
  </tests>
</configuration>


================================================
FILE: sdk/java/src/test/test-spark.sh
================================================
#!/usr/bin/env bash

set -e
set -o pipefail

HADOOP_VERSION="2.7.7"
SPARK_VERSION="2.4.0"
EXAMPLES_JAR="spark-examples_2.11-2.4.0.jar"

SPARK_DIST="spark-${SPARK_VERSION}-bin-without-hadoop"
SPARK_HOME="/opt/${SPARK_DIST}"
HADOOP_DIST="hadoop-${HADOOP_VERSION}"
HADOOP_HOME="/opt/${HADOOP_DIST}"

curl -o "${HADOOP_HOME}.tar.gz" "https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/${HADOOP_DIST}.tar.gz"
tar -xf "${HADOOP_HOME}.tar.gz" -C /opt

export _JAVA_OPTIONS="-Djava.library.path=$(pwd)/../mount/libjfs"
export HADOOP_CLASSPATH="$(pwd)/target/juicefs-hadoop-0.1-SNAPSHOT.jar"
"${HADOOP_HOME}/bin/hadoop" --config "$(pwd)/conf" jar "${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar" grep hello output 'dfs[a-z.]+'

curl -o "${SPARK_HOME}.tgz" "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_DIST}.tgz"
tar -xf "${SPARK_HOME}.tgz" -C /opt

echo "export SPARK_DIST_CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath)" > "${SPARK_HOME}/conf/spark-env.sh"
echo "export HADOOP_CONF_DIR=$(pwd)/conf" >> "${SPARK_HOME}/conf/spark-env.sh"
cp "${SPARK_HOME}/examples/jars/${EXAMPLES_JAR}" /jfs/

"${SPARK_HOME}/bin/spark-submit" --class org.apache.spark.examples.JavaWordCount --master "local" "jfs:///${EXAMPLES_JAR}" "jfs:///hello"


================================================
FILE: sdk/python/.gitignore
================================================
dist
build
*.egg-info 
*.h
*.so


================================================
FILE: sdk/python/Dockerfile.builder
================================================
FROM centos/python-38-centos7

USER 0

RUN curl -fsSL https://autoinstall.plesk.com/PSA_18.0.62/examiners/repository_check.sh | bash -s -- update >/dev/null && \
    yum install -y make gcc && \
    cd /tmp && \
    curl -L https://static.juicefs.com/misc/go1.21.13.linux-amd64.tar.gz -o go1.21.13.linux-amd64.tar.gz && \
    tar -C /usr/local -xzf go1.21.13.linux-amd64.tar.gz && \
    rm go1.21.13.linux-amd64.tar.gz && \
    ln -s /usr/local/go/bin/go /usr/bin/go && \
    python3 -m pip install --upgrade pip && \
    python3 -m pip install --upgrade setuptools && \
    pip install wheel build 


================================================
FILE: sdk/python/Dockerfile.builder.arm
================================================
FROM golang:1.24

RUN apt update && \
    apt install -y --no-install-recommends \
        git \
        make \
        gcc \
        python3 \
        python3-pip \
        python3-setuptools \
        python3-wheel \
        python3-build \
        python3-venv \
        ca-certificates \
    && \
    apt clean && \
    rm -rf /var/lib/apt/lists/*


================================================
FILE: sdk/python/Makefile
================================================
LDFLAGS = -s -w

.PHONY: libjfs.so juicefs

# SET GOPROXY if WITH_PROXY is set
CN_GOPROXY ?= 0
ifeq ($(CN_GOPROXY), 1)
	GOPROXY = https://proxy.golang.com.cn,direct
endif

VERSION_FILE := ../../pkg/version/version.go

VERSION := $(shell awk '/major[[:space:]]*:[[:space:]]*/ {gsub(/[^0-9]/, "", $$2); major=$$2} \
    /minor[[:space:]]*:[[:space:]]*/ {gsub(/[^0-9]/, "", $$2); minor=$$2} \
    /patch[[:space:]]*:[[:space:]]*/ {gsub(/[^0-9]/, "", $$2); patch=$$2} \
    END {print major "." minor "." patch}' $(VERSION_FILE))

REVISION := $(shell git rev-parse --short HEAD 2>/dev/null)
REVISIONDATE := $(shell git log -1 --pretty=format:'%cd' --date short 2>/dev/null)
BUILD_DATE := $(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
BUILD_DATE_SHORT := $(shell date -u +'%Y%m%d%H%M')

PKG := github.com/juicedata/juicefs/pkg/version
ifneq ($(strip $(REVISION)),) # Use git clone
	LDFLAGS += -X $(PKG).revision=$(REVISION) \
		   -X $(PKG).revisionDate=$(REVISIONDATE)
endif

# libjfs is located in the sdk/java/libjfs
libjfs.so:
	go build -buildmode c-shared -ldflags="$(LDFLAGS)" -o juicefs/juicefs/libjfs.so ../java/libjfs

builder: Dockerfile.builder
	docker build -t sdkbuilder -f Dockerfile.builder .

arm-builder: Dockerfile.builder.arm
	docker build -t sdkbuilder -f Dockerfile.builder.arm .

juicefs:
	sudo rm -rf juicefs.egg-info
	echo "Building juicefs version $(VERSION).$(BUILD_DATE_SHORT)"
	sed -i 's/^VERSION = .*/VERSION = "$(VERSION).$(BUILD_DATE_SHORT)"/' juicefs/setup.py
	sed -i 's/^BUILD_INFO = .*/BUILD_INFO = "$(BUILD_DATE) $(REVISION)"/' juicefs/setup.py
	docker run --rm -i -v ${PWD}/../../:/opt/jfs -w /opt/jfs/sdk/python -e GOPROXY=${GOPROXY} sdkbuilder sh -c 'make libjfs.so && cd juicefs && python3 -m build -w'

clean:
    $(clean)


================================================
FILE: sdk/python/examples/ffrecord/dataloader.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from typing import List, Iterator, Callable
from multiprocessing import Pool
from dataset import FFRecordDataset
import os
import torch
import time

class FFRecordDataLoader(torch.utils.data.DataLoader):
    def __init__(
                self,
                dataset: FFRecordDataset,
                batch_size=1,
                shuffle: bool = False,
                sampler=None,
                batch_sampler=None,
                num_workers: int = 0,
                collate_fn=None,
                pin_memory: bool = False,
                drop_last: bool = False,
                timeout: float = 0,
                worker_init_fn=None,
                generator=None,
                *,
                prefetch_factor: int = 2,
                persistent_workers: bool = False,
                skippable: bool = True):

        # use fork to create subprocesses
        if num_workers == 0:
            multiprocessing_context = None
            dataset.initialize()
        else:
            multiprocessing_context = 'fork'
        self.skippable = skippable

        super(FFRecordDataLoader,
              self).__init__(dataset=dataset,
                             batch_size=batch_size,
                             shuffle=shuffle,
                             sampler=sampler,
                             batch_sampler=batch_sampler,
                             num_workers=num_workers,
                             collate_fn=collate_fn,
                             pin_memory=pin_memory,
                             drop_last=drop_last,
                             timeout=timeout,
                             worker_init_fn=worker_init_fn,
                             multiprocessing_context=multiprocessing_context,
                             generator=generator,
                             prefetch_factor=prefetch_factor,
                             persistent_workers=persistent_workers)

if __name__ == "__main__":
    fnames = ["/demo.ffr"]

    dataset = FFRecordDataset(fnames, check_data=True)

    def worker_init_fn(worker_id):
        worker_info = torch.utils.data.get_worker_info()
        print(f"Worker initialized pid: {os.getpid()}, work_info: {worker_info}")
        dataset = worker_info.dataset
        dataset.initialize(worker_id=worker_id)

    def collate_fn(batch):
        return batch

    begin_time = time.time()

    dataloader = FFRecordDataLoader(dataset, batch_size=1, shuffle=True, num_workers=10, worker_init_fn=worker_init_fn, prefetch_factor=None, collate_fn=collate_fn)

    i=0
    for batch in dataloader:
        #  print(i, ": ", batch[0]["index"], "----", time.time()-begin_time)
        i+=1
        if i>1000:
            break
    end_time = time.time()
    print(f"takes: {end_time-begin_time}")


================================================
FILE: sdk/python/examples/ffrecord/dataset.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from typing import List, Union
from filereader import FileReader
# from filereader_dio import FileReader
import torch
import os

class FFRecordDataset(torch.utils.data.Dataset):
    def __init__(self, fnames: Union[str, List[str]], check_data: bool = True):
        if isinstance(fnames, str):
            fnames = [fnames]
        self.reader = FileReader(fnames, check_data=check_data)
        self.n = self.reader.n
        self.reader.close_fd()

    def initialize(self, worker_id=0, num_workers=1):
        self.reader.open_fd()
        self.n = self.reader.n

    def __len__(self) -> int:
        return self.n

    def __getitem__(self, index: Union[int, List[int]]) -> Union[np.array, List[np.array]]:
        if isinstance(index, int):
            return self.reader.read_one(index)
        elif isinstance(index, list):
            return self.reader.read_batch(index)
        else:
            raise TypeError(f"Index must be int or list, got {type(index)}")

    def close(self):
        self.reader.close_fd()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()


if __name__ == "__main__":
    fnames = ["/demo.ffr"]

    with FFRecordDataset(fnames, check_data=True) as dataset:
        sample = dataset[0]
        print("Sample 0:", sample)

        batch = dataset[[1, 2, 3]]
        print(batch)
        print("Dataset length:", len(dataset))


================================================
FILE: sdk/python/examples/ffrecord/filereader.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
sys.path.append('.')
from sdk.python.juicefs.juicefs import juicefs
# import juicefs
import zlib
from typing import Union
import struct
import os
import struct
import zlib
from typing import List, Tuple, Optional
import io
import pickle
import numpy as np

MAX_SIZE = 512 * (1 << 20)  # 512 MB

def ffcrc32(code: int, data: Union[bytes, bytearray], length: int) -> int:
    start = 0
    while start < length:
        chunk_size = min(MAX_SIZE, length - start)
        code = zlib.crc32(data[start:start + chunk_size], code)
        start += chunk_size
    return code

class FileHeader:
    def __init__(self, jfscli: juicefs.Client, fname: str, check_data: bool = True):
        self.fname = fname
        self.fd = jfscli.open(fname, mode='rb')

        self.fd.seek(0)
        self.checksum_meta = self._read_uint32()
        self.n = self._read_uint64()

        self.checksums = [self._read_uint32() for _ in range(self.n)]
        self.fd.seek(4+8+4*self.n)
        self.offsets = [self._read_uint64() for _ in range(self.n + 1)]

        self.offsets[self.n] = jfscli.stat(fname).st_size

        if check_data:
            self.validate()
        self.fd.close()
        self.fd = jfscli.open(fname, mode='rb', buffering=0)
        self.aiofd = self.fd


    def _read_uint32(self) -> int:
        return struct.unpack('<I', self.fd.read(4))[0]

    def _read_uint64(self) -> int:
        return struct.unpack('<Q', self.fd.read(8))[0]

    def close_fd(self):
        if self.fd:
            self.fd.close()
            self.fd = None

    def validate(self):
        if self.checksum_meta == 0:
            print("Warning: you are using an old version ffrecord file, please update the file")
            return

        checksum = 0
        checksum = ffcrc32(checksum, struct.pack('<Q', self.n), 8)
        checksum = ffcrc32(checksum, struct.pack(f'<{len(self.checksums)}I', *self.checksums), 4 * len(self.checksums))
        checksum = ffcrc32(checksum, struct.pack(f'<{len(self.offsets)}Q', *self.offsets), 8 * len(self.offsets) - 8)
        assert checksum == self.checksum_meta, f"{self.fname}: checksum of metadata mismatched!"

    def access(self, index: int, use_aio: bool = False) -> Tuple[int, int, int, int]:
        fd = self.aiofd if use_aio else self.fd
        offset = self.offsets[index]
        length = self.offsets[index + 1] - self.offsets[index]
        checksum = self.checksums[index]
        return fd, offset, length, checksum


class FileReader:
    def __init__(self, fnames: List[str], check_data: bool = True):
        self.fnames = fnames
        self.check_data = check_data
        self.nfiles = len(fnames)
        self.n = 1000
        self.nsamples = [0]
        self.headers = []

    def close_fd(self):
        for header in self.headers:
            header.close_fd()
        self.headers = []
        self.n = 0
        self.nsamples = [0]
        return
    
    def open_fd(self):
        self.v = juicefs.Client("myjfs", "redis://localhost", cache_dir="/tmp/data", cache_size="0", debug=False)

        for fname in self.fnames:
            header = FileHeader(self.v, fname, self.check_data)
            self.headers.append(header)
            self.n += header.n
            self.nsamples.append(self.n)

    def validate(self):
        for header in self.headers:
            header.validate()

    def validate_sample(self, index: int, buf: bytes, checksum: int):
        if self.check_data:
            checksum2 = ffcrc32(0, buf, len(buf))
            assert checksum2 == checksum, f"Sample {index}: checksum mismatched!"

    def read(self, indices: List[int]):
        return self.read_batch(indices)

    def read_batch(self, indices: List[int]):
        assert not any(index >= self.n for index in indices), "Index out of range"
        results = []

        for index in indices:
            results.append(self.read_one(index))

        return results

    def read_one(self, index: int):
        assert index < self.n, "Index out of range"

        fid = 0
        while index >= self.nsamples[fid + 1]:
            fid += 1

        header = self.headers[fid]
        fd, offset, length, checksum = header.access(index - self.nsamples[fid], use_aio=False)

        fd.seek(offset)
        buf = fd.read(length)
        self.validate_sample(index, buf, checksum)
        res = pickle.loads(buf)
        return res
    
    def close(self):
        self.close_fd()

if __name__ == "__main__":
    fnames = ["/demo.ffr"]
    reader = FileReader(fnames, check_data=True)
    reader.open_fd()
    data = reader.read_one(0)
    print(data)
    data = pickle.loads(data)
    print(data["index"])
    print(data["txt"])


================================================
FILE: sdk/python/examples/ffrecord/filereader_dio.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import zlib
import os
import struct
from typing import List, Tuple, Union
import numpy as np

MAX_SIZE = 512 * (1 << 20)  # 512 MB
DIRECTIO_BLOCK_SIZE = 1 * (1 << 20)  # 1 MB

def ffcrc32(code: int, data: Union[bytes, bytearray], length: int) -> int:
    start = 0
    while start < length:
        chunk_size = min(MAX_SIZE, length - start)
        code = zlib.crc32(data[start:start + chunk_size], code)
        start += chunk_size
    return code

class FileHeader:
    def __init__(self, fname: str, check_data: bool = True):
        print(f"__init__ self: {hex(id(self))}")
        print(f"pid: {os.getpid()}")
        self.fname = fname
        self.fd = os.open(fname, os.O_RDONLY | os.O_DIRECT)
        self.aiofd = self.fd 

        self.file_obj = os.fdopen(self.fd, 'rb', buffering=0)

        self.checksum_meta = self._read_uint32()
        self.n = self._read_uint64()

        checksums_size = 4 * self.n
        offsets_size = 8 * (self.n + 1)
        combined_data = self.file_obj.read(checksums_size + offsets_size)
        self.checksums = list(struct.unpack(f'<{self.n}I', combined_data[:checksums_size]))
        self.offsets = list(struct.unpack(f'<{self.n + 1}Q', combined_data[checksums_size:checksums_size + offsets_size]))

        self.offsets[self.n] = os.path.getsize(fname)
        if check_data:
            self.validate()

        print("FileHeader initialized for:", fname, "fd:", self.fd)

    def _read_uint32(self) -> int:
        return struct.unpack('<I', self.file_obj.read(4))[0]

    def _read_uint64(self) -> int:
        return struct.unpack('<Q', self.file_obj.read(8))[0]

    def close_fd(self):
        print("close fd: ", self.fd)
        if self.fd != -1:
            os.close(self.fd)
            self.fd = -1
            self.file_obj = None
    
    def open_fd(self):
        if self.fd == -1:
            self.fd = os.open(self.fname, os.O_RDONLY | os.O_DIRECT)
            self.aiofd = self.fd
            print(f"header.open_fd: {self.fd} address: {hex(id(self))} pid: {os.getpid()}")

    def validate(self):
        if self.checksum_meta == 0:
            print("Warning: you are using an old version ffrecord file, please update the file")
            return

        checksum = 0
        checksum = ffcrc32(checksum, struct.pack('<Q', self.n), 8)
        checksum = ffcrc32(checksum, struct.pack(f'<{len(self.checksums)}I', *self.checksums), 4 * len(self.checksums))
        checksum = ffcrc32(checksum, struct.pack(f'<{len(self.offsets)}Q', *self.offsets), 8 * len(self.offsets) - 8)
        assert checksum == self.checksum_meta, f"{self.fname}: checksum of metadata mismatched!"

    def access(self, index: int, use_aio: bool = False) -> Tuple[int, int, int, int]:
        fd = self.aiofd if use_aio else self.fd
        offset = self.offsets[index]
        length = self.offsets[index + 1] - self.offsets[index]
        checksum = self.checksums[index]
        return fd, offset, length, checksum


class FileReader:
    def __init__(self, fnames: List[str], check_data: bool = True):
        self.fnames = fnames
        self.check_data = check_data
        self.nfiles = len(fnames)
        self.n = 0
        self.nsamples = [0]
        self.headers = []

        for fname in fnames:
            header = FileHeader(fname, check_data)
            self.headers.append(header)
            self.n += header.n
            self.nsamples.append(self.n)

    def close_fd(self):
        for header in self.headers:
            header.close_fd()
    
    def open_fd(self):
      print(f"open_fd address: {hex(id(self))} pid: {os.getpid()}")
      for header in self.headers:
          header.open_fd()

    def validate(self):
        for header in self.headers:
            header.validate()

    def validate_sample(self, index: int, buf: bytes, checksum: int):
        if self.check_data:
            checksum2 = ffcrc32(0, buf, len(buf))
            assert checksum2 == checksum, f"Sample {index}: checksum mismatched!"

    def read_batch(self, indices: List[int]) -> List[np.array]:
        assert not any(index >= self.n for index in indices), "Index out of range"
        results = []

        for index in indices:
            results.append(self.read_one(index))

        return results

    def read_one(self, index: int) -> np.array:
        assert index < self.n, "Index out of range"

        fid = 0
        while index >= self.nsamples[fid + 1]:
            fid += 1

        header = self.headers[fid]
        fd, offset, length, checksum = header.access(index - self.nsamples[fid], use_aio=False)

        buf = bytearray(length)
        start = 0
        while start < length:
            chunk_size = min(DIRECTIO_BLOCK_SIZE, length - start)
            read_bytes = os.pread(fd, chunk_size, offset + start)
            buf[start:start + chunk_size] = read_bytes
            start += chunk_size

        self.validate_sample(index, buf, checksum)
        array = np.frombuffer(buf, dtype=np.uint8)

        return array


if __name__ == "__main__":
    fnames = ["/demo.ffr"]
    reader = FileReader(fnames, check_data=True)
    data = reader.read_one(0)
    print(data)


================================================
FILE: sdk/python/examples/ffrecord/main.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2025 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from pathlib import Path
import loguru
import random
import pickle
from multiprocessing import Pool
import numpy as np
from PIL import Image
from faker import Faker
import io
import time
from tqdm import tqdm
from ffrecord import FileWriter
from ffrecord.torch import Dataset, DataLoader
from ffrecord import FileReader

logger = loguru.logger
fake = Faker()

def serialize(sample):
    return pickle.dumps(sample)

def deserialize(sample):
    return pickle.loads(sample)

def generate_random_image_np(
    width=256,
    height=256,
    format="JPEG",  # JPEG, PNG, WEBP
    quality=90,     # only for JPEG/WEBP
):
    image_np = np.random.randint(0, 255, (height, width, 3), dtype=np.uint8)
    img = Image.fromarray(image_np)
    
    img_bytes = io.BytesIO()
    img.save(img_bytes, format=format, quality=quality)
    return img_bytes.getvalue()

def generate_data_entry(
    idx,
    text=None,
    avg_width=1024,
    avg_height=1024,
    variance=50,
    possible_formats=["PNG"],
    # possible_formats=["JPEG", "PNG", "WEBP"],
):
    """
    - avg_width/avg_height ± variance
    """
    image_format = random.choice(possible_formats).lower()

    width = random.randint(avg_width - variance, avg_width + variance)
    height = random.randint(avg_height - variance, avg_height + variance)
    width, height = max(width, 32), max(height, 32) 
    
    img_bytes = generate_random_image_np(
        width=width,
        height=height,
        format=image_format.upper(),
    )
    
    if text is None:
        text = fake.sentence()
    
    return {
        "index": idx,
        "txt": text,
        image_format: img_bytes,
    }

def write_ffrecord():
  ffr_output = Path(ffrecord_file)
  if ffr_output.exists():
    logger.warning(f"Output {ffr_output} exists, removing")    
  logger.info(f"Generating {num_samples} samples")
  with Pool(num_proc) as pool:
      data_to_write = list(
          tqdm(
              pool.imap_unordered(generate_data_entry, range(num_samples), chunksize=10),
              total=num_samples,
              desc="Generating data"
              )
            )
  begin_time = time.time()
  writer = FileWriter(ffr_output, len(data_to_write))
  for i, data in enumerate(data_to_write):
      writer.write_one(serialize(data))
      # writer.write_one(data)
  writer.close()
  end_time = time.time()
  lmdb_size = ffr_output.stat().st_size
  logger.info(f"FFRecord size: {lmdb_size / 1024 ** 3:.2f} GB")
  logger.info(f"Time taken to write: {end_time - begin_time:.2f} seconds")

def read_ffrecord(batch_size: int):
    reader = FileReader([ffrecord_file], check_data=True)

    sample_indices = list(range(num_samples))
    random.Random(0).shuffle(sample_indices)
    sample_batches = [sample_indices[i: i + batch_size] for i in range(0, len(sample_indices), batch_size)]
    logger.info(f'Number of samples to read: {reader.n}, batch_size = {batch_size}, num_batches = {len(sample_batches)}')
    read_indices = set()
    begin_time = time.time()
    index_iter = sample_batches
    index_iter = tqdm(index_iter, desc="Reading data in batches", total=len(sample_batches))

    for indices in index_iter:
        all_data = reader.read(indices)
        for data in all_data:
            data = deserialize(data)
            read_indices.add(data["index"])
    end_time = time.time()
    reader.close()
    assert read_indices == set(range(num_samples))
    logger.info(f"Read {len(read_indices)} samples in {end_time - begin_time:.2f} s: {len(read_indices) / (end_time - begin_time):.2f} samples/s")


class MyDataset(Dataset):
    def __init__(self, fnames, check_data=True):
        self.reader = FileReader(fnames, check_data=check_data)

    def __len__(self):
        return self.reader.n

    def __getitem__(self, indices):
        data = self.reader.read(indices)
        samples = []

        for bytes_ in data:
            item = pickle.loads(bytes_)
            samples.append(item)

        return samples


ffrecord_file="/tmp/jfs/demo.ffr"
num_samples=1000
num_proc=4

if __name__ == "__main__":
    if len(sys.argv) > 1:
        if sys.argv[1] == "write":
            write_ffrecord()
        elif sys.argv[1] == "read":
            read_ffrecord(batch_size=1)
    else:
        begin_time = time.time()
        dataset = MyDataset([ffrecord_file], check_data=True)
        dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=10,prefetch_factor=None)

        i=0
        for batch in dataloader:
            i+=1
            if i>1000:
                break
        end_time = time.time()
        print(f"takes: {end_time-begin_time}")


================================================
FILE: sdk/python/examples/ffrecord/readme.md
================================================
```bash
# This is a ffrecord dataloader example.
# Prepare
# Install ffrecord here: https://github.com/HFAiLab/ffrecord
# Mount JuiceFS
juicefs mount redis://localhost /tmp/jfs -d

# Generate dataset
python3 sdk/python/examples/ffrecord/main.py write
# Simple read dataset
python3 sdk/python/examples/ffrecord/main.py read
# Read dataset with dataloader: (takes 39.55s)
python3 sdk/python/examples/ffrecord/main.py

# Prepare python-sdk
make -C sdk/python libjfs.so
# Read dataset with Juicefs-pythonsdk-dataloader: (takes 10.02s)
python3 sdk/python/examples/ffrecord/dataloader.py
```

================================================
FILE: sdk/python/examples/fsspec/main.py
================================================
import fsspec
import ray
import sys
sys.path.append('.')
import sdk.python.juicefs.juicefs.spec
# from sdk.python.juicefs.juicefs.spec import JuiceFS

fs = fsspec.filesystem('https')
ds = ray.data.read_csv(
    "https://gender-pay-gap.service.gov.uk/viewing/download-data/2021",
    filesystem=fs,
    partition_filter=None # Since the file doesn't end in .csv
)
ds.count()

print("----++++----++++----")

jfs = fsspec.filesystem("jfs", auto_mkdir=True, name="myjfs", meta="redis://localhost")
dsjfs = ray.data.read_csv('/ray_demo_data.csv', filesystem=jfs)
dsjfs.count()


================================================
FILE: sdk/python/examples/fsspec/readme.md
================================================
```bash
# This example demonstrates how to use the fsspec library to read a CSV file.
juicefs mount redis://localhost /tmp/jfs -d
# Download the data file
wget https://gender-pay-gap.service.gov.uk/viewing/download-data/2021 -O /tmp/jfs/ray_demo_data.csv

# run the example
python3 sdk/python/examples/fsspec/main.py
```

================================================
FILE: sdk/python/juicefs/juicefs/__init__.py
================================================
from .juicefs import Client


================================================
FILE: sdk/python/juicefs/juicefs/juicefs.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2024 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import codecs
import errno
import grp
import io
import json
import locale
import os
import pwd
import six
import struct
import threading
import time
from ctypes import *

# pkg/vfs/helpers.go
MODE_WRITE = 2
MODE_READ = 4

XATTR_CREATE = 1
XATTR_REPLACE = 2

def check_error(r, fn, args):
    if fn.__name__ == "jfs_init" and r == 0:
        name = args[0].decode()
        e = OSError(f'JuiceFS initialized failed for {name}')
        e.errno = 1
        raise e
    elif r < 0:
        formatted_args = []
        for arg in args[2:]:
            if isinstance(arg, (bytes, bytearray)) and len(arg) > 1024:
                formatted_args.append(f'bytes(len={len(arg)})')
            else:
                formatted_args.append(repr(arg))

        e = OSError(f'call {fn.__name__} failed: [Errno {-r}] {os.strerror(-r)}: {formatted_args}')
        e.errno = -r
        raise e
    return r

class FileInfo(Structure):
    _fields_ = [
        ('inode', c_uint64),
        ('mode', c_uint32),
        ('uid', c_uint32),
        ('gid', c_uint32),
        ('atime', c_uint32),
        ('mtime', c_uint32),
        ('ctime', c_uint32),
        ('nlink', c_uint32),
        ('length', c_uint64),
    ]

def _tid():
    return threading.current_thread().ident

def _bin(s):
    return six.ensure_binary(s)

def unpack(fmt, buf):
    if not fmt.startswith("!"):
        fmt = "!" + fmt
    return struct.unpack(fmt, buf[: struct.calcsize(fmt)])


class JuiceFSLib(object):
    def __init__(self):
        self.lib = cdll.LoadLibrary(os.path.join(os.path.dirname(__file__), "libjfs.so"))

    def __getattr__(self, n):
        fn = getattr(self.lib, n)
        if n == "jfs_init" or n == "jfs_lseek":
            fn.restype = c_int64
            fn.errcheck = check_error
        elif n.startswith("jfs"):
            fn.restype = c_int32
            fn.errcheck = check_error
        return fn

class Client(object):
    """A JuiceFS client."""
    def __init__(self, name, meta, *, bucket="", storage_class="", read_only=False, no_session=False, no_bgjob=True,
                 open_cache="0", backup_meta="3600", backup_skip_trash=False, heartbeat="12",
                 cache_dir="memory", cache_size="100M", free_space_ratio="0.1", cache_partial_only=False,
                 verify_cache_checksum="extend", cache_eviction="2-random", cache_scan_interval="3600", cache_expire="0",
                 writeback=False, buffer_size="300M", prefetch=1, max_readahead="0", upload_limit="0",
                 download_limit="0", max_uploads=20, max_deletes=10, skip_dir_nlink=20, skip_dir_mtime="100ms",
                 io_retries=10, get_timeout="5", put_timeout="60", fast_resolve=False, attr_cache="1s",
                 entry_cache="0s", dir_entry_cache="1s", debug=False, no_usage_report=False, access_log="",
                 push_gateway="", push_interval="10", push_auth="", push_labels="", push_graphite="", push_remote_write="", 
                 push_remote_write_auth=""):
        self.lib = JuiceFSLib()
        kwargs = {}
        kwargs["meta"] = meta
        kwargs["bucket"] = bucket
        kwargs["storageClass"] = storage_class
        kwargs["readOnly"] = read_only
        kwargs["noSession"] = no_session
        kwargs["noBGJob"] = no_bgjob
        kwargs["openCache"] = open_cache
        kwargs["backupMeta"] = backup_meta
        kwargs["backupSkipTrash"] = backup_skip_trash
        kwargs["heartbeat"] = heartbeat
        kwargs["cacheDir"] = cache_dir
        kwargs["cacheSize"] = cache_size
        kwargs["freeSpace"] = free_space_ratio
        kwargs["autoCreate"] = True
        kwargs["cacheFullBlock"] = not cache_partial_only
        kwargs["cacheChecksum"] = verify_cache_checksum
        kwargs["cacheEviction"] = cache_eviction
        kwargs["cacheScanInterval"] = cache_scan_interval
        kwargs["cacheExpire"] = cache_expire
        kwargs["writeback"] = writeback
        kwargs["memorySize"] = buffer_size
        kwargs["prefetch"] = prefetch
        kwargs["readahead"] = max_readahead
        kwargs["uploadLimit"] = upload_limit
        kwargs["downloadLimit"] = download_limit
        kwargs["maxUploads"] = max_uploads
        kwargs["maxDeletes"] = max_deletes
        kwargs["skipDirNlink"] = skip_dir_nlink
        kwargs["skipDirMtime"] = skip_dir_mtime
        kwargs["ioRetries"] = io_retries
        kwargs["getTimeout"] = get_timeout
        kwargs["putTimeout"] = put_timeout
        kwargs["fastResolve"] = fast_resolve
        kwargs["attrTimeout"] = attr_cache
        kwargs["entryTimeout"] = entry_cache
        kwargs["dirEntryTimeout"] = dir_entry_cache
        kwargs["debug"] = debug
        kwargs["noUsageReport"] = no_usage_report
        kwargs["accessLog"] = access_log
        kwargs["pushGateway"] = push_gateway
        kwargs["pushInterval"] = push_interval
        kwargs["pushAuth"] = push_auth
        kwargs["pushLabels"] = push_labels
        kwargs["pushGraphite"] = push_graphite
        kwargs["pushRemoteWrite"] = push_remote_write
        kwargs["pushRemoteWriteAuth"] = push_remote_write_auth
        kwargs["caller"] = 1

        jsonConf = json.dumps(kwargs, sort_keys=True)
        self.umask = os.umask(0)
        os.umask(self.umask)
        user = pwd.getpwuid(os.geteuid())
        groups = [grp.getgrgid(gid).gr_name for gid in os.getgrouplist(user.pw_name, user.pw_gid)]
        superuser = pwd.getpwuid(0)
        supergroups = [grp.getgrgid(gid).gr_name for gid in os.getgrouplist(superuser.pw_name, superuser.pw_gid)]
        self.h = self.lib.jfs_init(0, 0, name.encode(), jsonConf.encode(), user.pw_name.encode(), ','.join(groups).encode(), superuser.pw_name.encode(), ''.join(supergroups).encode())

    def __del__(self):
        self.lib.jfs_term(c_int64(_tid()), c_int64(self.h))

    def stat(self, path):
        """Get the status of a file or a directory."""
        fi = FileInfo()
        self.lib.jfs_stat(c_int64(_tid()), c_int64(self.h), _bin(path), byref(fi))
        return os.stat_result((fi.mode, fi.inode, 0, fi.nlink, fi.uid, fi.gid, fi.length, fi.atime, fi.mtime, fi.ctime))

    def exists(self, path):
        """Check if a file exists."""
        try:
            self.stat(path)
            return True
        except OSError as e:
            return False

    def open(self, path, mode='r', buffering=-1, encoding=None, errors=None):
        """Open a file, returns a filelike object."""
        if len(mode) != len(set(mode)):
            raise ValueError(f'invalid mode: {mode}')
        flag = 0
        cnt = 0
        for c in mode:
            if c in 'rwxa':
                cnt += 1
                if c == 'r':
                    flag |= MODE_READ
                else:
                    flag |= MODE_WRITE
            elif c == '+':
                flag |= MODE_READ | MODE_WRITE
            elif c not in 'tb':
                raise ValueError(f'invalid mode: {mode}')
        if cnt != 1:
            raise ValueError('must have exactly one of create/read/write/append mode')
        if 'b' in mode:
            if 't' in mode:
                raise ValueError("can't have text and binary mode at once")
            if encoding:
                raise ValueError("binary mode doesn't take an encoding argument")
            if errors:
                raise ValueError("binary mode doesn't take an errors argument")
        else:
            if not encoding:
                encoding = locale.getpreferredencoding(False).lower()
            if not errors:
                errors = 'strict'
            codecs.lookup(encoding)

        size = 0
        if 'x' in mode:
            fd = self.lib.jfs_create(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint16(0o666), c_uint16(self.umask))
        else:
            try:
                sz = c_uint64()
                fd = self.lib.jfs_open_posix(c_int64(_tid()), c_int64(self.h), _bin(path), byref(sz), c_int32(flag))
                if 'w' in mode:
                    self.lib.jfs_ftruncate(c_int64(_tid()), fd, c_uint64(0))
                else:
                    size = sz.value
            except OSError as e:
                if e.errno != errno.ENOENT:
                    raise e
                if 'r' in mode:
                    raise FileNotFoundError(e)
                fd = self.lib.jfs_create(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint16(0o666), c_uint16(self.umask))
        return File(self.lib, fd, path, mode, flag, size, buffering, encoding, errors)

    def truncate(self, path, size):
        """Truncate a file to a specified size."""
        self.lib.jfs_truncate(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint64(size))

    def remove(self, path):
        """Remove a file."""
        self.lib.jfs_delete(c_int64(_tid()), c_int64(self.h), _bin(path))

    def mkdir(self, path, mode=0o777):
        """Create a directory."""
        self.lib.jfs_mkdir(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint16(mode&0o777), c_uint16(self.umask))

    def makedirs(self, path, mode=0o777, exist_ok=False):
        """Create a directory and all its parent components if they do not exist."""
        self.lib.jfs_mkdirAll(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint16(mode&0o777), c_uint16(self.umask), c_bool(exist_ok))

    def rmdir(self, path):
        """Remove a directory. The directory must be empty."""
        self.lib.jfs_rmdir(c_int64(_tid()), c_int64(self.h), _bin(path))

    def rename(self, old, new):
        """Rename the file or directory old to new."""
        self.lib.jfs_rename0(c_int64(_tid()), c_int64(self.h), _bin(old), _bin(new), c_uint32(0))

    def listdir(self, path, detail=False):
        """Return a list containing the names of the entries in the directory given by path."""
        buf = c_void_p()
        size = c_int()
        # func jfs_listdir(pid int, h int64, cpath *C.char, offset int, buf uintptr, bufsize int) int {

        self.lib.jfs_listdir2(c_int64(_tid()), c_int64(self.h), _bin(path), bool(detail), byref(buf), byref(size))
        data = string_at(buf, size)
        infos = []
        pos = 0
        while pos < len(data):
            nlen, = unpack("H", data[pos:pos+2])
            pos += 2
            name = six.ensure_str(data[pos : pos + nlen], errors='replace')
            pos += nlen
            if detail:
                mode, inode, nlink, uid, gid, length, atime, mtime, ctime = \
                    unpack("IQIIIQIII", data[pos:pos+44])
                infos.append((name, os.stat_result((mode, inode, 0, nlink, uid, gid, length, atime, mtime, ctime))))
                pos += 44
            else:
                infos.append(name)
        self.lib.free(buf)
        return sorted(infos)

    def chmod(self, path, mode):
        """Change the mode of a file."""
        self.lib.jfs_chmod(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint16(mode))

    def chown(self, path, uid, gid):
        """Change the owner and group id of a file."""
        self.lib.jfs_chown(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint32(uid), c_uint32(gid))

    def link(self, src, dst):
        """Create a hard link to a file."""
        self.lib.jfs_link(c_int64(_tid()), c_int64(self.h), _bin(src), _bin(dst))

    def lstat(self, path):
        """Like stat(), but do not follow symbolic links."""
        info = FileInfo()
        self.lib.jfs_lstat(c_int64(_tid()), c_int64(self.h), _bin(path), byref(info))
        return os.stat_result((info.mode, info.inode, 0, info.nlink, info.uid, info.gid, info.length, info.atime, info.mtime, info.ctime))

    def readlink(self, path):
        """Return a string representing the path to which the symbolic link points."""
        buf = bytes(1<<16)
        n = self.lib.jfs_readlink(c_int64(_tid()), c_int64(self.h), _bin(path), buf, c_int32(len(buf)))
        return buf[:n].decode()

    def symlink(self, src, dst):
        """Create a symbolic link."""
        self.lib.jfs_symlink(c_int64(_tid()), c_int64(self.h), _bin(src), _bin(dst))

    def unlink(self, path):
        """Remove a file."""
        self.lib.jfs_unlink(c_int64(_tid()), c_int64(self.h), _bin(path))

    def rmr(self, path):
        """Remove a directory and all its contents recursively."""
        self.lib.jfs_rmr(c_int64(_tid()), c_int64(self.h), _bin(path))

    def utime(self, path, times=None):
        """Set the access and modified times of a file."""
        if not times:
            now = time.time()
            times = (now, now)
        self.lib.jfs_utime(c_int64(_tid()), c_int64(self.h), _bin(path), c_int64(int(times[1]*1000)), c_int64(int(times[0]*1000)))

    def walk(self, top, topdown=True, onerror=None, followlinks=False):
        raise NotImplementedError

    def getxattr(self, path, name):
        """Get an extended attribute on a file."""
        size = 64 << 10 # XattrSizeMax
        buf = bytes(size)
        size = self.lib.jfs_getXattr(c_int64(_tid()), c_int64(self.h), _bin(path), _bin(name), buf, c_int32(size))
        return buf[:size]

    def listxattr(self, path):
        """List extended attributes on a file."""
        buf = c_void_p()
        size = c_int()
        self.lib.jfs_listXattr2(c_int64(_tid()), c_int64(self.h), _bin(path), byref(buf), byref(size))
        data = string_at(buf, size).decode()
        self.lib.free(buf)
        if not data:
            return []
        return data.split('\0')[:-1]

    def setxattr(self, path, name, value, flags=0):
        """Set an extended attribute on a file."""
        value = _bin(value)
        self.lib.jfs_setXattr(c_int64(_tid()),  c_int64(self.h), _bin(path), _bin(name), value, c_int32(len(value)), c_int32(flags))

    def removexattr(self, path, name):
        """Remove an extended attribute from a file."""
        self.lib.jfs_removeXattr(c_int64(_tid()), c_int64(self.h), _bin(path), _bin(name))

    def clone(self, src, dst, preserve=False):
        """Clone a file or directory."""
        self.lib.jfs_clone(c_int64(_tid()), c_int64(self.h), _bin(src), _bin(dst), c_bool(preserve))

    def set_quota(self, path, capacity=0, inodes=0, create=False, strict=False):
        """Set the quota of a directory."""
        self._quota(0, path, capacity, inodes, create=create, strict=strict)

    def get_quota(self, path):
        """Get the quota of a directory."""
        return self._quota(1, path)

    def del_quota(self, path):
        """Delete the quota of a directory."""
        self._quota(2, path)

    def list_quota(self):
        """List the quota of all directories."""
        return self._quota(3)

    def check_quota(self, path, repair=False, strict=False):
        """Check the quota of a directory."""
        return self._quota(4, path, repair=repair, strict=strict)

    def _quota(self, cmd, path="", capacity=0, inodes=0, create=False, repair=False, strict=False):
        """Get the quota of a directory."""
        buf = c_void_p()
        n = self.lib.jfs_quota(c_int64(_tid()), c_int64(self.h), _bin(path), c_uint8(cmd), c_uint64(capacity), c_uint64(inodes), c_bool(strict), c_bool(repair), c_bool(create), byref(buf))
        data = string_at(buf, n)
        res = json.loads(str(data, encoding='utf-8'))
        self.lib.free(buf)
        return res

    def info(self, path, recursive=False, strict=False):
        """Get the information of a file or a directory."""
        buf = c_void_p()
        n = self.lib.jfs_info(c_int64(_tid()), c_int64(self.h), _bin(path), byref(buf), c_bool(recursive), c_bool(strict))
        data = string_at(buf, n)
        res = json.loads(str(data, encoding='utf-8'))

        self.lib.free(buf)
        return res

    def summary(self, path, depth=0, entries=1):
        """Get the summary of a directory."""
        buf = c_void_p()

        n = self.lib.jfs_gettreesummary(_tid(), self.h, _bin(path), c_uint8(depth), c_uint32(entries), byref(buf))
        data = string_at(buf, n)
        res = json.loads(str(data, encoding='utf-8'))

        def parseSummary(entry, removefields):
            for f in removefields:
                entry.pop(f, None)

            if entry["Dirs"] == 0:
                entry.pop("Children", None)
            elif entry.get("Children") is not None:
                for v in entry["Children"]:
                    parseSummary(v, removefields)

        parseSummary(res, ["Inode"])
        self.lib.free(buf)
        return res

    def warmup(self, paths, threads=10, evict=False, check=False, background=False, **kwargs):
        # numthreads=10, background=False, isEvict=False, isCheck=False,
        for k in kwargs:
            if k == 'numthreads':
                threads = kwargs[k]
            elif k == 'isEvict':
                evict = kwargs[k]
            elif k == 'isCheck':
                check = kwargs[k]
            else:
                raise TypeError(f"warmup() got an unexpected keyword argument '{k}'")

        """Warm up a file or a directory."""
        if type(paths) is not list:
            paths = [paths]

        buf = c_void_p()

        n = self.lib.jfs_warmup(c_int64(_tid()), c_int64(self.h), json.dumps(paths).encode(), c_int32(threads), c_bool(background), c_bool(evict), c_bool(check), byref(buf))
        res = json.loads(str(string_at(buf, n), encoding='utf-8'))
        self.lib.free(buf)
        return res

    def status(self, trash=False, session=0):
        """Get the status of the volume and client sessions."""
        buf = c_void_p()
        n = self.lib.jfs_status(c_int64(_tid()), c_int64(self.h), c_bool(trash), c_bool(session), byref(buf))
        res = json.loads(str(string_at(buf, n), encoding='utf-8'))
        self.lib.free(buf)
        return res


class _File(object):
    """A JuiceFS file."""
    def __init__(self, lib, fd, path, mode, flag, length):
        self.lib = lib
        self.fd = fd
        self.name = path
        self.flag = flag
        self.length = length
        self.closed = False
        self.append = 'a' in mode
        self.off = self.length if self.append else 0

    def __fspath__(self):
        return self.name

    def readable(self):
        return self.flag & MODE_READ != 0

    def writable(self):
        return self.flag & MODE_WRITE != 0

    def seekable(self):
        return True

    def fileno(self):
        return self.fd

    def isatty(self):
        return False

    def read(self, size=-1):
        """Read at most size bytes, returned as a byes."""
        self._check_closed()
        if self.flag & MODE_READ == 0:
            raise io.UnsupportedOperation('not readable')
        # read directly
        rs = []
        got = 0
        while size > 0 or size < 0:
            n = 4 << 20
            if size > 0 and size < n:
                n = size
            buf = bytes(n)
            n = self.lib.jfs_pread(c_int64(_tid()), c_int32(self.fd), buf, c_int32(n), c_int64(self.off+got))
            if n == 0:
                break
            if n < len(buf):
                buf = buf[:n]
            rs.append(buf)
            got += n
            if size > 0:
                size -= n
        if len(rs) == 1:
            buf = rs[0]
        else:
            buf = b''.join(rs)
        self.off += len(buf)
        return buf

    def readinto(self, buffer):
        data = self.read(len(buffer))
        if not data:
            return 0
        buffer[:len(data)] = data
        return len(data)

    def write(self, data):
        """Write the string data to the file."""
        self._check_closed()
        if isinstance(data, memoryview):
            data = data.tobytes()
        if not isinstance(data, six.binary_type):
            raise TypeError(f"a bytes-like object is required, not '{type(data).__name__}'")
        if not self.writable():
            raise io.UnsupportedOperation('not writable')

        if not data:
            return 0
        if self.append:
            self.off = self.length
        n = self.lib.jfs_pwrite(c_int64(_tid()), c_int32(self.fd), data, c_int32(len(data)), c_int64(self.off))
        self.off += n
        if self.off > self.length:
            self.length = self.off
        return n

    def seek(self, offset, whence=0):
        """Set the stream position to the given byte offset.
        offset is interpreted relative to the position indicated by whence.
        The default value for whence is SEEK_SET."""
        self._check_closed()
        if whence not in (os.SEEK_SET, os.SEEK_CUR, os.SEEK_END):
            raise ValueError(f'invalid whence ({whence}, should be {os.SEEK_SET}, {os.SEEK_CUR} or {os.SEEK_END})')
        if whence == os.SEEK_SET:
            self.off = offset
        elif whence == os.SEEK_CUR:
            self.off += offset
        else:
            self.off = self.length + offset
        return self.off

    def tell(self):
        """Return the current stream position."""
        self._check_closed()
        return self.off

    def truncate(self, size=None):
        """Truncate the file to at most size bytes.
        Size defaults to the current file position, as returned by tell()."""
        self._check_closed()
        if not self.writable():
            raise io.UnsupportedOperation('File not open for writing')
        if size is None:
            size = self.tell()
        self.lib.jfs_ftruncate(c_int64(_tid()), c_int32(self.fd), c_uint64(size))
        self.length = size
        return size

    def flush(self):
        return

    def fsync(self):
        self.lib.jfs_fsync(c_int64(_tid()), c_int32(self.fd))

    def close(self):
        if self.closed:
            return
        self.lib.jfs_close(c_int64(_tid()), c_int32(self.fd))
        self.closed = True

    def __del__(self):
        self.close()

    def _check_closed(self):
        if self.closed:
            raise ValueError('I/O operation on closed file.')

    def readline(self): # TODO: add parameter `size=-1`
        """Read until newline or EOF."""
        ls = self.readlines(1)
        if ls:
            return ls[0]
        return b''

    def xreadlines(self):
        return self

    def readlines(self, hint=-1):
        """Return a list of lines from the stream."""
        self._check_closed()
        if hint == -1:
            data = self.read(-1)
        else:
            rs = []
            while hint > 0:
                r = self.read(1)
                if not r:
                    break
                rs.append(r)
                if r[0] == b'\n':
                    hint -= 1
            data = b''.join(rs)
        return data.splitlines(True)

    def writelines(self, lines):
        """Write a list of lines to the file."""
        self._check_closed()
        self.write(b''.join(lines))
        self.flush()

class File(object):
    """A JuiceFS file."""
    def __init__(self, lib, fd, path, mode, flag, length, buffering, encoding=None, errors=None):
        self._file = _File(lib, fd, path, mode, flag, length)

        if buffering < 0:
            buffering = 128<<10

        if buffering == 0:
            self.raw_io = self._file
        elif self._file.readable():
            if self._file.writable():
                self.raw_io = io.BufferedRandom(self._file, buffer_size=buffering)
            else:
                self.raw_io = io.BufferedReader(self._file, buffer_size=buffering)
        else:
            self.raw_io = io.BufferedWriter(self._file, buffer_size=buffering)

        if encoding:
            self.io = io.TextIOWrapper(self.raw_io, encoding=encoding, errors=errors)
        else:
            self.io = self.raw_io

    def __getattr__(self, name):
        return getattr(self.io, name)

    def __fspath__(self):
        return self._file.name

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def next(self):
        lines = self.readlines(1)
        if lines:
            return lines[0]
        raise StopIteration

    def fileno(self):
        return self._file.fd

    def isatty(self):
        return False

    def fsync(self):
        """Force write file data to the backend storage."""
        self.io.flush()
        return self._file.fsync()

    def close(self):
        """Close the file. A closed file cannot be used for further I/O operations."""
        self.io.close()
        self._file.close()


def test():
    volume = os.getenv("JFS_VOLUME", "test")
    meta = os.getenv("JFS_META", "redis://localhost")
    v = Client(volume, meta, access_log="/tmp/jfs.log")
    with v.open("/.config", "r") as f:
        print(f.read())
    with v.open("/.stats", "r") as f:
        print(f.read())
    print(v.status())
    st = v.stat("/")
    print(st)
    if v.exists("/d"):
        v.rmr("/d")
    v.makedirs("/d")
    if v.exists("/d/file"):
        v.remove("/d/file")
    with v.open("/d/file", "w") as f:
        f.write("hello")
    with v.open("/d/file", "a+") as f:
        f.write("world")
    with v.open("/d/file") as f:
        data = f.read()
        assert data == "helloworld"
    with v.open("/d/file", "w") as f:
        f.write("hello")
    with v.open("/d/file", 'rb', 5) as f:
        data = f.readlines()
        assert data == [b"hello"]
    with v.open("/d/file", 'rb', 0) as f:
        data = f.readlines()
        assert data == [b"hello"]
    print(list(v.open("/d/file")))
    assert list(v.open("/d/file")) == ['hello']
    try:
        v.open("/d/d/file", "w")
    except OSError as e:
        if e.errno != errno.ENOENT:
            raise e
    else:
        raise AssertionError
    v.chmod("/d/file", 0o777)
    # v.chown("/d/file", 0, 0)
    v.symlink("/d/file", "/d/link")
    assert v.readlink("/d/link") == "file"
    v.unlink("/d/link")
    v.link("/d/file", "/d/link")
    v.rename("/d/link", "/d/link2")
    names = sorted(v.listdir("/d"))
    assert names == ["file", "link2"]
    v.setxattr("/d/file", "user.key", b"value\0")
    xx = v.getxattr("/d/file", "user.key")
    assert xx == b"value\0"
    print(v.listxattr("/d/file"))
    assert v.listxattr("/d/file") == ["user.key"]
    v.removexattr("/d/file", "user.key")
    assert v.listxattr("/d/file") == []
    with v.open("/d/file", "a") as f:
        f.seek(0, 0)
        f.write("world")
        assert f.truncate(2) == 2
        assert f.seek(0, 2) == 2
    assert v.open("/d/file").read() == "he"
    k=1024
    start = time.time()
    size = 0
    with v.open("/bigfile", mode="wb") as f:
        for i in range(4000):
            f.write(b"!"*(k*k))
            size += k*k
    print("write time:", time.time()-start, size>>20)
    start = time.time()
    size = 0
    with v.open("/bigfile",mode='rb') as f:
        while True:
            t = f.read(4*k)
            if not t: break
            size += len(t)
    print("read time:", time.time()-start, size>>20)
    v.remove("/bigfile")
    v.rmr("/d")


if __name__ == '__main__':
    test()


================================================
FILE: sdk/python/juicefs/juicefs/spec.py
================================================
# encoding: utf-8
# JuiceFS, Copyright 2024 Juicedata, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import logging
import uuid
import os
from stat import S_ISDIR, S_ISLNK, S_ISREG

from fsspec.spec import AbstractFileSystem, AbstractBufferedFile

from .juicefs import Client

logger = logging.getLogger("fsspec.jfs")


class JuiceFS(AbstractFileSystem):
    """
    A JuiceFS file system.
    """
    protocol = "jfs", "juicefs"
    def __init__(self, name, auto_mkdir=False, **kwargs):
        if self._cached:
            return
        super().__init__(**kwargs)
        self.auto_mkdir = auto_mkdir
        self.temppath = kwargs.pop("temppath", "/tmp")
        self.fs = Client(name, **kwargs)

    @property
    def fsid(self):
        return "jfs_" + self.fs.name

    def makedirs(self, path, exist_ok=False, mode=511):
        if self.exists(path) and not exist_ok:
            raise FileExistsError(f"File exists: {path}")
        self.fs.makedirs(self._strip_protocol(path), mode, exist_ok=exist_ok)

    def mkdir(self, path, create_parents=True, mode=0o511):
        if self.exists(path):
            raise FileExistsError(f"File exists: {path}")
        if create_parents:
            self.fs.makedirs(self._strip_protocol(path), mode=mode)
        else:
            self.fs.mkdir(self._strip_protocol(path), mode)

    def rmdir(self, path):
        self.fs.rmdir(self._strip_protocol(path))

    def ls(self, path, detail=False, **kwargs):
        infos = self.fs.listdir(self._strip_protocol(path), detail)
        if not detail:
            return infos
        stats = []
        for name, st in infos:
            info = {
                "name": os.path.join(path, name),
                "size": st.st_size,
                "type": "directory" if S_ISDIR(st.st_mode) else "link" if S_ISLNK(st.st_mode) else "file",
                "mode": st.st_mode,
                "ino": st.st_ino,
                "nlink": st.st_nlink,
                "uid": st.st_uid,
                "gid": st.st_gid,
                "created": st.st_atime,
                "mtime": st.st_mtime,
            }
            if S_ISLNK(st.st_mode):
                info.update(**self.info(f"{path}/{name}"))
            stats.append(info)
        return stats

    def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
        if total:
            info = self.info(path)
            return info["size"]
        return super().du(path, total=total, maxdepth=maxdepth, withdirs=withdirs, **kwargs)

    def info(self, path):
        path = self._strip_protocol(path)
        try:
            st = self.fs.lstat(path)
        except OSError:
            raise FileNotFoundError(path)
        info = {
            "name": path,
        }
        if S_ISLNK(st.st_mode):
            info['destination'] = self.fs.readlink(path)
            st = self.fs.stat(path)
        info.update({
            "type": "directory" if S_ISDIR(st.st_mode) else "file" if S_ISREG(st.st_mode) else "other",
            "size": st.st_size,
            "uid": st.st_uid,
            "gid": st.st_gid,
            "created": st.st_atime,
            "mtime": st.st_mtime,
        })
        return info

    def lexists(self, path, **kwargs):
        try:
            self.fs.lstat(self._strip_protocol(path))
            return True
        except OSError:
            return False

    def cp_file(self, path1, path2, **kwargs):
        if self.isfile(path1):
            if self.auto_mkdir:
                self.makedirs(self._parent(path2), exist_ok=True)
            self.fs.clone(self._strip_protocol(path1), self._strip_protocol(path2))
        else:
            self.mkdirs(path2, exist_ok=True)

    def rm(self, path, recursive=False, maxdepth=None):
        if not isinstance(path, list):
            path = [path]
        for p in path:
            if recursive:
                self.fs.rmr(self._strip_protocol(p))
            else:
                self.fs.remove(self._strip_protocol(p))

    def _rm(self, path):
        self.fs.remove(self._strip_protocol(path))

    def mv(self, old, new, recursive=False, maxdepth=None, **kwargs):
        self.fs.rename(self._strip_protocol(old), self._strip_protocol(new))

    def link(self, src, dst, **kwargs):
        src = self._strip_protocol(src)
        dst = self._strip_protocol(dst)
        self.fs.link(src, dst, **kwargs)

    def symlink(self, src, dst, **kwargs):
        src = self._strip_protocol(src)
        dst = self._strip_protocol(dst)
        self.fs.symlink(src, dst, **kwargs)

    def islink(self, path) -> bool:
        try:
            self.fs.readlink(self._strip_protocol(path))
            return True
        except OSError:
            return False

    def _open(self, path, mode="rb", block_size=None, autocommit=True, **kwargs):
        path = self._strip_protocol(path)
        if self.auto_mkdir and "w" in mode:
            self.makedirs(self._parent(path), exist_ok=True)
        return JuiceFile(self, path, mode, block_size, autocommit, **kwargs)

    def touch(self, path, truncate=True, **kwargs):
        path = self._strip_protocol(path)
        if self.auto_mkdir:
            self.makedirs(self._parent(path), exist_ok=True)
        if truncate or not self.exists(path):
            with self.open(path, "wb", **kwargs):
                pass
        else:
            self.fs.utime(self._strip_protocol(path))

    @classmethod
    def _parent(cls, path):
        path = cls._strip_protocol(path)
        if os.sep == "/":
            # posix native
            return path.rsplit("/", 1)[0] or "/"
        else:
            # NT
            path_ = path.rsplit("/", 1)[0]
            if len(path_) <= 3:
                if path_[1:2] == ":":
                    # nt root (something like c:/)
                    return path_[0] + ":/"
            # More cases may be required here
            return path_

    def created(self, path):
        return datetime.datetime.fromtimestamp(
            self.info(path)["created"], tz=datetime.timezone.utc
        )

    def modified(self, path):
        return datetime.datetime.fromtimestamp(
            self.info(path)["mtime"], tz=datetime.timezone.utc
        )

    def _isfilestore(self):
        # Inheriting from DaskFileSystem makes this False (S3, etc. were)
        # the original motivation. But we are a posix-like file system.
        # See https://github.com/dask/dask/issues/5526
        return True

    def chmod(self, path, mode):
        path = self._strip_protocol(path)
        return self.fs.chmod(path, mode)


class JuiceFile(AbstractBufferedFile):
    def __init__(self, fs, path, mode="rb", block_size=None, autocommit=True, cache_options=None, **kwargs):
        super().__init__(fs, path, mode, block_size, autocommit, cache_options=cache_options, **kwargs)
        if autocommit:
            self.temp = path
        self.f = None
        self._open()

    def _open(self):
        if self.f is None or self.f.closed:
            if self.autocommit or "w" not in self.mode:
                self.f = self.fs.fs.open(self.path, self.mode, buffering=self.blocksize)
            else:
                self.temp = "/".join([self.fs.temppath, str(uuid.uuid4())])
                self.f = open(self.temp, self.mode, buffering=self.blocksize)
            if "w" not in self.mode:
                self.size = self.f.seek(0, 2)
                self.f.seek(0)

    def _fetch_range(self, start, end):
        # probably only used by cached FS
        if "r" not in self.mode:
            raise ValueError
        self._open()
        self.f.seek(start)
        return self.f.read(end - start)

    def __setstate__(self, state):
        self.f = None
        loc = state.pop("loc", None)
        self.__dict__.update(state)
        if "r" in state["mode"]:
            self.f = None
            self._open()
            self.f.seek(loc)

    def __getstate__(self):
        d = self.__dict__.copy()
        d.pop("f")
        if "r" in self.mode:
            d["loc"] = self.f.tell()
        else:
            if not self.f.closed:
                raise ValueError("Cannot serialise open write-mode local file")
        return d

    def commit(self):
        if self.autocommit:
            raise RuntimeError("Can only commit if not already set to autocommit")
        self.fs.fs.rename(self.temp, self.path)

    def discard(self):
        if self.autocommit:
            raise RuntimeError("Can only commit if not already set to autocommit")
        self.fs.fs.remove(self.temp)

    def tell(self):
        return self.f.tell()

    def seek(self, loc, whence=0):
        return self.f.seek(loc, whence)

    def write(self, data):
        return self.f.write(data)

    def read(self, length=-1):
        return self.f.read(length)

    def flush(self, force=True):
        return self.f.flush()

    def truncate(self, size=None):
        return self.f.truncate(size)

    def close(self):
        super().close()
        if getattr(self, "_unclosable", False):
            return
        self.f.close()

    def __getattr__(self, item):
        return getattr(self.f, item)

    def __del__(self):
        pass

from fsspec.registry import register_implementation
register_implementation("jfs", JuiceFS, True)
register_implementation("juicefs", JuiceFS, True)


================================================
FILE: sdk/python/juicefs/setup.py
================================================
from setuptools import setup, find_packages

# The following line will be replaced by the actual version number during the Make process
VERSION = "1.3.0"
BUILD_INFO = "BUILDDATE+COMMIT HASH"


setup(
    name='juicefs',
    version=VERSION,
    description=BUILD_INFO,
    package_data={'juicefs': ['*.so']},
    packages=find_packages(where="."),
    include_package_data=True,
    install_requires=['six'],
    entry_points={
        'fsspec.specs': [
            'jfs = juicefs.JuiceFS',
        ],
    },
)


================================================
FILE: sdk/python/juicefs/tests/__init__.py
================================================


================================================
FILE: sdk/python/juicefs/tests/test.py
================================================
import pytest

from fsspec import filesystem
import fsspec.tests.abstract as abstract

from juicefs.spec import JuiceFS
import os

class JuiceFSFixtures(abstract.AbstractFixtures):
    @pytest.fixture(scope="class")
    def fs(self):
        meta = os.getenv("JUICEFS_META", "redis://localhost")
        m = filesystem("jfs", auto_mkdir=True, name="test", meta=meta)
        return m

    @pytest.fixture
    def fs_path(self, tmpdir):
        return str(tmpdir)


class TestJuiceFSGet(abstract.AbstractGetTests, JuiceFSFixtures):
    pass


class TestJuiceFSPut(abstract.AbstractPutTests, JuiceFSFixtures):
    pass


class TestJuiceFSCopy(abstract.AbstractCopyTests, JuiceFSFixtures):
    pass